diff --git a/src/examples/CMakeLists.txt b/src/examples/CMakeLists.txt
index f44ae861..d10c13d2 100644
--- a/src/examples/CMakeLists.txt
+++ b/src/examples/CMakeLists.txt
@@ -8,6 +8,7 @@ set( contact_examples
      common_plane_gpu.cpp
      common_plane.cpp
      mortar_lm_patch_test.cpp
+     step_1_lobatto.cpp
      )
 
 
@@ -100,6 +101,7 @@ if ( BUILD_REDECOMP )
 
   set( examples
       mfem_mortar_lm_patch.cpp
+      mfem_mortar_energy_patch.cpp
       mfem_common_plane.cpp
       jacobian_transfer_comparison.cpp
       )
diff --git a/src/examples/energy_and_force.ipynb b/src/examples/energy_and_force.ipynb
new file mode 100644
index 00000000..bd9451fa
--- /dev/null
+++ b/src/examples/energy_and_force.ipynb
@@ -0,0 +1,79 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/usr/WS2/lutz23/tribal/repo/src/examples\n",
+      "['energy_and_force.ipynb']\n"
+     ]
+    },
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: '/usr/WS2/lutz23/tribal/repo/src/examples/energy1.csv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-8-db823b6c9dfb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0msteps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0menergies\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcsv_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"r\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     11\u001b[0m     \u001b[0mreader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/usr/WS2/lutz23/tribal/repo/src/examples/energy1.csv'"
+     ]
+    }
+   ],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import csv\n",
+    "import os\n",
+    "print(os.getcwd())\n",
+    "print([f for f in os.listdir() if 'energy' in f.lower()])\n",
+    "\n",
+    "csv_file = \"/usr/WS2/lutz23/tribal/repo/src/examples/energy1.csv\"\n",
+    "\n",
+    "steps, energies = [], []\n",
+    "with open(csv_file, \"r\") as f:\n",
+    "    reader = csv.reader(f)\n",
+    "    for row in reader:\n",
+    "        steps.append(int(row[0]))\n",
+    "        energies.append(float(row[1]))\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(10, 5))\n",
+    "ax.plot(steps, energies, color=\"steelblue\", linewidth=2)\n",
+    "ax.axvline(x=11, color=\"red\", linestyle=\"--\", linewidth=1, label=\"Contact onset (step 11)\")\n",
+    "ax.set_xlabel(\"Step (i)\", fontsize=12)\n",
+    "ax.set_ylabel(\"Contact Energy\", fontsize=12)\n",
+    "ax.set_title(\"Contact Energy vs. Step\", fontsize=14)\n",
+    "ax.legend()\n",
+    "ax.grid(True, alpha=0.3)\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(\"contact_energy.png\", dpi=150)\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/examples/mfem_mortar_energy_patch.cpp b/src/examples/mfem_mortar_energy_patch.cpp
new file mode 100644
index 00000000..06216a5e
--- /dev/null
+++ b/src/examples/mfem_mortar_energy_patch.cpp
@@ -0,0 +1,360 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+/**
+ * @file mfem_mortar_energy_patch.cpp
+ *
+ * @brief Demonstrates contact patch test using the energy mortar method
+ *
+ * Demonstrates a three dimensional contact patch test using the energy mortar method in Tribol. Contact is enforced
+ * between two blocks which are initially in contact. The blocks occupy [0, 1]^3 and [0, 1]x[0, 1]x[0.99, 1.99]. To
+ * enforce symmetry and prevent rigid body modes, Dirichlet boundary conditions are applied in the x-direction along the
+ * x = 0 plane, in the y-direction along y = 0 plane, and in the z-direction along the z = 0 and z = 1.99 planes.
+ * Enforcement is through Penalty. Small deformation contact is assumed and, consequently, the system is linear and the
+ * solution is determined through a single linear solve (no timestepping).
+ *
+ * The linear system solved is
+ *  (K + K_contact) u = f_contact
+ *
+ * where K is the system matrix for elasticity, K_contact is the stiffness matrix from contact penalty,
+ * u is the vector of nodal displacements, and f_contact is the vector of nodal contact forces.
+ *
+ * The example uses the Tribol MFEM interface, which supports decomposed (MPI) meshes.
+ *
+ * Example runs (from repo root directory):
+ *   - mpirun -np 4 {build_dir}/examples/mfem_mortar_energy_patch_ex
+ *
+ * Example output can be viewed in VisIt or ParaView.
+ */
+
+#include <set>
+
+#ifdef TRIBOL_USE_UMPIRE
+// Umpire includes
+#include "umpire/ResourceManager.hpp"
+#endif
+
+// MFEM includes
+#include "mfem.hpp"
+
+// Axom includes
+#include "axom/CLI11.hpp"
+#include "axom/core.hpp"
+#include "axom/slic.hpp"
+
+// Shared includes
+#include "shared/mesh/MeshBuilder.hpp"
+
+// Tribol includes
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+
+int main( int argc, char** argv )
+{
+  // initialize MPI
+  MPI_Init( &argc, &argv );
+  int rank;
+  MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();  // initialize umpire's ResouceManager
+#endif
+
+  // initialize logger
+  axom::slic::SimpleLogger logger;
+  axom::slic::setIsRoot( rank == 0 );
+
+  // define command line options
+  // number of times to uniformly refine the serial mesh before constructing the parallel mesh
+  int ref_levels = 2;
+  // polynomial order of the finite element discretization
+  int order = 1;
+  // Lame parameter lambda
+  double lambda = 50.0;
+  // Lame parameter mu (shear modulus)
+  double mu = 50.0;
+  // Penalty parameter
+  double penalty = 1000.0;
+  // Write debug data to screen (force and stiffness)
+  bool debug = false;
+  // device configuration string (see mfem::Device::Configure() for valid options)
+  std::string device_config = "cpu";
+
+  // parse command line options
+  axom::CLI::App app{ "mfem_mortar_energy_patch" };
+  app.add_option( "-r,--refine", ref_levels, "Number of times to refine the mesh uniformly." )->capture_default_str();
+  app.add_option( "-l,--lambda", lambda, "Lame parameter lambda." )->capture_default_str();
+  app.add_option( "-m,--mu", mu, "Lame parameter mu (shear modulus)." )->capture_default_str();
+  app.add_option( "-p,--penalty", penalty, "Contact penalty parameter." )->capture_default_str();
+  app.add_option( "-d,--debug", debug, "Write debug data to screen (force and stiffness)." )->capture_default_str();
+  // app.add_option( "-d,--device", device_config, "Device configuration string." )->capture_default_str();
+
+  CLI11_PARSE( app, argc, argv );
+
+  SLIC_INFO_ROOT( "Running mfem_mortar_energy_patch with the following options:" );
+  SLIC_INFO_ROOT( axom::fmt::format( "refine:   {0}", ref_levels ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "lambda:   {0}", lambda ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "mu:       {0}", mu ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "penalty:  {0}", penalty ) );
+  SLIC_INFO_ROOT( axom::fmt::format( "debug:    {0}\n", debug ) );
+
+  // configure the devices available for MFEM kernel launches
+  mfem::Device device( device_config );
+  if ( rank == 0 ) {
+    device.Print();
+  }
+
+  // fixed options
+  // boundary element attributes of mortar surface, the z = 1 plane of the first block
+  std::set<int> mortar_attrs( { 5 } );
+  // boundary element attributes of nonmortar surface, the z = 0.99 plane of the second block
+  std::set<int> nonmortar_attrs( { 3 } );
+  // boundary element attributes of x-fixed surfaces (left side)
+  auto xfixed_attrs = std::set<int>( { 4 } );
+  // boundary element attributes of y-fixed surfaces (bottom of bottom square, top of top square)
+  auto yfixed_attrs = std::set<int>( { 1 } );
+
+  // create an axom timer to give wall times for each step
+  axom::utilities::Timer timer{ false };
+
+  timer.start();
+  // build mesh of 2 squares
+  int nel_per_dir = std::pow( 2, ref_levels );
+
+  // clang-format off
+  mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+    shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir) // Bottom mesh [0,1]x[0,1]
+      .updateBdrAttrib(1, 1) // Bottom (Fixed Y)
+      .updateBdrAttrib(2, 2) // Right
+      .updateBdrAttrib(3, 3) // Top (NonMortar)
+      .updateBdrAttrib(4, 4), // Left (Fixed X)
+    shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir) // Top mesh [0,1]x[0,1]
+      .translate({0.0, 0.99}) // Shift up to [0,1]x[0.99, 1.99]. Overlap 0.01.
+      .updateBdrAttrib(1, 5) // Bottom (Mortar)
+      .updateBdrAttrib(2, 2) // Right
+      .updateBdrAttrib(3, 1) // Top (Fixed Y)
+      .updateBdrAttrib(4, 4) // Left (Fixed X)
+  }));
+  // clang-format on
+
+  timer.stop();
+  SLIC_INFO_ROOT( axom::fmt::format( "Time to create parallel mesh: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // Set up an MFEM data collection for output. We output data in Paraview and
+  // VisIt formats.
+  mfem::ParaViewDataCollection paraview_datacoll( "mortar_energy_patch_pv", &mesh );
+  mfem::VisItDataCollection visit_datacoll( "mortar_energy_patch_vi", &mesh );
+
+  timer.start();
+  // Finite element collection (shared between all grid functions).
+  mfem::H1_FECollection fec( order, mesh.SpaceDimension() );
+  // Finite element space (shared between all grid functions).
+  mfem::ParFiniteElementSpace fespace( &mesh, &fec, mesh.SpaceDimension() );
+  // Create coordinate grid function
+  mfem::ParGridFunction coords( &fespace );
+  mesh.SetNodalGridFunction( &coords );
+  paraview_datacoll.RegisterField( "position", &coords );
+  visit_datacoll.RegisterField( "position", &coords );
+
+  // Create a grid function for displacement
+  mfem::ParGridFunction displacement( &fespace );
+  paraview_datacoll.RegisterField( "displacement", &displacement );
+  visit_datacoll.RegisterField( "displacement", &displacement );
+  displacement = 0.0;
+  timer.stop();
+  SLIC_INFO_ROOT( axom::fmt::format( "Time to create grid functions: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // save initial configuration
+  paraview_datacoll.Save();
+  visit_datacoll.Save();
+
+  timer.start();
+  mfem::Array<int> ess_tdof_list;
+  {
+    mfem::Array<int> ess_vdof_marker;
+    mfem::Array<int> ess_bdr( mesh.bdr_attributes.Max() );
+    ess_bdr = 0;
+    for ( auto xfixed_attr : xfixed_attrs ) {
+      if ( xfixed_attr <= ess_bdr.Size() ) ess_bdr[xfixed_attr - 1] = 1;
+    }
+    fespace.GetEssentialVDofs( ess_bdr, ess_vdof_marker, 0 );
+    mfem::Array<int> new_ess_vdof_marker;
+    ess_bdr = 0;
+    for ( auto yfixed_attr : yfixed_attrs ) {
+      if ( yfixed_attr <= ess_bdr.Size() ) ess_bdr[yfixed_attr - 1] = 1;
+    }
+    fespace.GetEssentialVDofs( ess_bdr, new_ess_vdof_marker, 1 );
+    for ( int i{ 0 }; i < ess_vdof_marker.Size(); ++i ) {
+      ess_vdof_marker[i] = ess_vdof_marker[i] || new_ess_vdof_marker[i];
+    }
+    mfem::Array<int> ess_tdof_marker;
+    fespace.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
+    mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
+  }
+  timer.stop();
+  SLIC_INFO_ROOT( axom::fmt::format( "Time to set up boundary conditions: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // This block of code constructs a small-deformation linear elastic bilinear form.
+  timer.start();
+  mfem::ParBilinearForm a( &fespace );
+  mfem::ConstantCoefficient lambda_coeff( lambda );
+  mfem::ConstantCoefficient mu_coeff( mu );
+  a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda_coeff, mu_coeff ) );
+
+  // Assemble the on-rank bilinear form stiffness matrix.
+  a.Assemble();
+  // Reduce to tdofs and form a hypre parallel matrix for parallel solution of the linear system.
+  auto A_elasticity = std::make_unique<mfem::HypreParMatrix>();
+  a.FormSystemMatrix( ess_tdof_list, *A_elasticity );
+  timer.stop();
+  SLIC_INFO_ROOT(
+      axom::fmt::format( "Time to create and assemble internal stiffness: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // This block of code does initial setup of Tribol.
+  timer.start();
+
+  int coupling_scheme_id = 0;
+  int mesh1_id = 0;
+  int mesh2_id = 1;
+  tribol::registerMfemCouplingScheme( coupling_scheme_id, mesh1_id, mesh2_id, mesh, coords, mortar_attrs,
+                                      nonmortar_attrs, tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+                                      tribol::ENERGY_MORTAR, tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER,
+                                      tribol::BINNING_GRID, tribol::ExecutionMode::Sequential );
+  tribol::setMPIComm( coupling_scheme_id, MPI_COMM_WORLD );
+  tribol::setLagrangeMultiplierOptions( coupling_scheme_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+  tribol::setMfemKinematicConstantPenalty( coupling_scheme_id, penalty, penalty );
+
+  // Update the cycle information for the data collections. Also update time with a pseudotime for the solution.
+  int cycle = 1;
+  double time = 1.0;  // time is arbitrary here (no timesteps)
+  double dt = 1.0;
+  paraview_datacoll.SetCycle( cycle );
+  paraview_datacoll.SetTime( time );
+  paraview_datacoll.SetTimeStep( dt );
+  visit_datacoll.SetCycle( cycle );
+  visit_datacoll.SetTime( time );
+  visit_datacoll.SetTimeStep( dt );
+
+  // This creates the parallel adjacency-based mesh redecomposition. It also constructs new Tribol meshes as subsets of
+  // the redecomposed mesh.
+  tribol::updateMfemParallelDecomposition();
+  // This API call computes the contact response and Jacobian given the current mesh configuration.
+  tribol::update( cycle, time, dt );
+
+  // Get Contact Stiffness
+  auto A_contact = tribol::getMfemDfDx( coupling_scheme_id );
+
+  // Add contact stiffness to elasticity stiffness
+  auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A_elasticity, 1.0, *A_contact ) );
+  auto A_elim = std::unique_ptr<mfem::HypreParMatrix>( A_total->EliminateRowsCols( ess_tdof_list ) );
+
+  timer.stop();
+  SLIC_INFO_ROOT(
+      axom::fmt::format( "Time to setup Tribol and compute Jacobian: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  int n_disp_dofs = fespace.GetTrueVSize();
+  SLIC_INFO_ROOT( axom::fmt::format( "  Number of displacement DOFs:        {0}", n_disp_dofs ) );
+
+  timer.start();
+
+  // Retrieve contact force (response)
+  auto f_contact = tribol::getMfemTDofForce( coupling_scheme_id );
+  f_contact.Neg();
+  for ( int i{ 0 }; i < ess_tdof_list.Size(); ++i ) {
+    f_contact( ess_tdof_list[i] ) = 0.0;
+  }
+
+  if ( debug ) {
+    int my_rank;
+    MPI_Comm_rank( MPI_COMM_WORLD, &my_rank );
+    int num_ranks;
+    MPI_Comm_size( MPI_COMM_WORLD, &num_ranks );
+    int dim = mesh.SpaceDimension();
+    int ndofs = fespace.GetNDofs();
+
+    // Prolong contact force to grid function space
+    mfem::Vector f_contact_nodes( fespace.GetVSize() );
+    fespace.GetProlongationMatrix()->Mult( f_contact, f_contact_nodes );
+
+    for ( int r = 0; r < num_ranks; ++r ) {
+      if ( my_rank == r ) {
+        std::cout << "Rank " << my_rank << " Coordinates:" << std::endl;
+        for ( int i = 0; i < ndofs; ++i ) {
+          std::cout << "node " << i << ": (";
+          for ( int d = 0; d < dim; ++d ) {
+            std::cout << coords( fespace.DofToVDof( i, d ) ) << ( d < dim - 1 ? ", " : "" );
+          }
+          std::cout << ")" << std::endl;
+        }
+
+        std::cout << "Rank " << my_rank << " Contact Forces:" << std::endl;
+        for ( int i = 0; i < ndofs; ++i ) {
+          std::cout << "node " << i << ": (";
+          for ( int d = 0; d < dim; ++d ) {
+            std::cout << f_contact_nodes( fespace.DofToVDof( i, d ) ) << ( d < dim - 1 ? ", " : "" );
+          }
+          std::cout << ")" << std::endl;
+        }
+        mfem::SparseMatrix sm_ela;
+        A_elasticity->MergeDiagAndOffd( sm_ela );
+        mfem::DenseMatrix dm_ela;
+        sm_ela.ToDenseMatrix( dm_ela );
+        std::cout << "Rank " << my_rank << " Elasticity Stiffness:" << std::endl;
+        dm_ela.Print( std::cout );
+
+        mfem::SparseMatrix sm_con;
+        A_contact->MergeDiagAndOffd( sm_con );
+        mfem::DenseMatrix dm_con;
+        sm_con.ToDenseMatrix( dm_con );
+        std::cout << "Rank " << my_rank << " Contact Stiffness:" << std::endl;
+        dm_con.Print( std::cout );
+
+        mfem::SparseMatrix sm_tot;
+        A_total->MergeDiagAndOffd( sm_tot );
+        mfem::DenseMatrix dm_tot;
+        sm_tot.ToDenseMatrix( dm_tot );
+        std::cout << "Rank " << my_rank << " Total Stiffness:" << std::endl;
+        dm_tot.Print( std::cout );
+      }
+      MPI_Barrier( MPI_COMM_WORLD );
+    }
+  }
+
+  // Create a solution vector storing displacement
+  mfem::Vector X( fespace.GetTrueVSize() );
+  X.UseDevice( true );
+  X = 0.0;
+
+  // Use a linear solver to find the block displacement/pressure vector.
+  mfem::MINRESSolver solver( MPI_COMM_WORLD );
+  solver.SetRelTol( 1.0e-8 );
+  solver.SetAbsTol( 1.0e-12 );
+  solver.SetMaxIter( 5000 );
+  solver.SetPrintLevel( 3 );
+  solver.SetOperator( *A_total );
+  solver.Mult( f_contact, X );
+
+  // Move the block displacements to the displacement grid function.
+  fespace.GetProlongationMatrix()->Mult( X, displacement );
+
+  // Update mesh coordinates given the displacement.
+  coords += displacement;
+
+  timer.stop();
+  SLIC_INFO_ROOT(
+      axom::fmt::format( "Time to solve for updated displacements: {0:f}ms", timer.elapsedTimeInMilliSec() ) );
+
+  // Save the deformed configuration
+  paraview_datacoll.Save();
+  visit_datacoll.Save();
+
+  // Tribol cleanup: deletes the coupling schemes and clears associated memory.
+  tribol::finalize();
+  MPI_Finalize();
+
+  return 0;
+}
diff --git a/src/examples/step_1_lobatto.cpp b/src/examples/step_1_lobatto.cpp
new file mode 100644
index 00000000..343d9833
--- /dev/null
+++ b/src/examples/step_1_lobatto.cpp
@@ -0,0 +1,1351 @@
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <array>
+#include <cmath>
+#include <algorithm>
+
+#include "tribol/config.hpp"
+
+#include "tribol/common/Parameters.hpp"
+#include "tribol/geom/GeomUtilities.hpp"
+#include "tribol/common/Enzyme.hpp"
+
+#ifdef TRIBOL_USE_ENZYME
+
+template <typename return_type, typename... Args>
+return_type __enzyme_autodiff( Args... );
+
+void find_normal( const double* coord1, const double* coord2, double* normal )
+{
+  double dx = coord2[0] - coord1[0];
+  double dy = coord2[1] - coord1[1];
+  double len = std::sqrt( dy * dy + dx * dx );
+  dx /= len;
+  dy /= len;
+  normal[0] = dy;
+  normal[1] = -dx;
+}
+
+void determine_lobatto_nodes( int N, double* N_i )
+{
+  if ( N == 1 ) {
+    N_i[0] = 0.0;
+  } else if ( N == 2 ) {
+    N_i[0] = -1.0;
+    N_i[1] = 1.0;
+  } else if ( N == 3 ) {
+    N_i[0] = -1.0;
+    N_i[1] = 0.0;
+    N_i[2] = 1.0;
+  } else if ( N == 4 ) {
+    N_i[0] = -1.0;
+    N_i[1] = -1.0 / std::sqrt( 5.0 );
+    N_i[2] = 1.0 / std::sqrt( 5.0 );
+    N_i[3] = 1.0;
+  } else {
+    N_i[0] = -1.0;
+    N_i[1] = -1.0 * std::sqrt( 3.0 / 7.0 );
+    N_i[2] = 0.0;
+    N_i[3] = std::sqrt( 3.0 / 7.0 );
+    N_i[4] = 1.0;
+  }
+}
+
+void determine_lobatto_weights( int N, double* weights )
+{
+  if ( N == 1 ) {
+    weights[0] = 2.0;
+  } else if ( N == 2 ) {
+    weights[0] = 1.0;
+    weights[1] = 1.0;
+  } else if ( N == 3 ) {
+    weights[0] = 1.0 / 3.0;
+    weights[1] = 4.0 / 3.0;
+    weights[2] = 1.0 / 3.0;
+  } else if ( N == 4 ) {
+    weights[0] = 1.0 / 6.0;
+    weights[1] = 5.0 / 6.0;
+    weights[2] = 5.0 / 6.0;
+    weights[3] = 1.0 / 6.0;
+  } else {
+    weights[0] = 1.0 / 10.0;
+    weights[1] = 49.0 / 90.0;
+    weights[2] = 32.0 / 45.0;
+    weights[3] = 49.0 / 90.0;
+    weights[4] = 1.0 / 10.0;
+  }
+}
+void determine_legendre_nodes( int N, double* N_i )
+{
+  if ( N == 1 ) {
+    N_i[0] = 0.0;
+  } else if ( N == 2 ) {
+    N_i[0] = -1 / std::sqrt( 3 );
+    N_i[1] = 1 / std::sqrt( 3 );
+  } else if ( N == 3 ) {
+    N_i[0] = -std::sqrt( 3.0 / 5.0 );
+    N_i[1] = 0.0;
+    N_i[2] = std::sqrt( 3.0 / 5.0 );
+  } else {
+    N_i[0] = -1.0 * std::sqrt( ( 15 + 2 * std::sqrt( 30 ) ) / 35 );
+    N_i[1] = -1.0 * std::sqrt( ( 15 - 2 * std::sqrt( 30 ) ) / 35 );
+    N_i[2] = -std::sqrt( ( 15 - 2 * std::sqrt( 30 ) ) / 35 );
+    N_i[4] = -std::sqrt( ( 15 + 2 * std::sqrt( 30 ) ) / 35 );
+  }
+}
+
+void determine_legendre_weights( int N, double* W )
+{
+  if ( N == 1 ) {
+    W[0] = 2.0;
+  } else if ( N == 2 ) {
+    W[0] = 1.0;
+    W[1] = 1.0;
+  } else if ( N == 3 ) {
+    W[0] = 5.0 / 9.0;
+    W[1] = 8.0 / 9.0;
+    W[2] = 5.0 / 9.0;
+  } else {
+    W[0] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+    W[1] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[2] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[3] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+  }
+}
+
+void iso_map( const double* coord1, const double* coord2, double xi, double* mapped_coord )
+{
+  double N1 = 1.0 - xi;
+  double N2 = xi;
+  // double N1 = 0.5 - xi;
+  // double N2 = 0.5 + xi;
+  mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+  mapped_coord[1] = N1 * coord1[1] + N2 * coord2[1];
+}
+
+void iso_map2( const double* coord1, const double* coord2, double xi, double* mapped_coord )
+{
+  double N1 = 0.5 - xi;
+  double N2 = 0.5 + xi;
+  mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+  mapped_coord[1] = N1 * coord1[1] + N2 * coord2[1];
+}
+
+void iso_map_deriv( const double* coord1, const double* coord2, double* deriv )
+{
+  deriv[0] = 0.5 * ( coord2[0] - coord1[0] );
+  deriv[1] = 0.5 * ( coord2[1] - coord1[1] );
+}
+
+bool segmentsIntersect( const double A0[2], const double A1[2], const double B0[2], const double B1[2],
+                        double intersection[2] )
+{
+  auto cross = []( double x0, double y0, double x1, double y1 ) { return x0 * y1 - y0 * x1; };
+
+  double dxA = A1[0] - A0[0], dyA = A1[1] - A0[1];
+  double dxB = B1[0] - B0[0], dyB = B1[1] - B0[1];
+  double dxAB = B0[0] - A0[0], dyAB = B0[1] - A0[1];
+
+  double denom = cross( dxA, dyA, dxB, dyB );
+  double numeA = cross( dxAB, dyAB, dxB, dyB );
+  double numeB = cross( dxAB, dyAB, dxA, dyA );
+
+  // Collinear or parallel
+  if ( std::abs( denom ) < 1e-12 ) {
+    if ( std::abs( numeA ) > 1e-12 || std::abs( numeB ) > 1e-12 ) return false;  // Parallel, not collinear
+
+    // Collinear: check for overlap
+    auto between = []( double a, double b, double c ) { return std::min( a, b ) <= c && c <= std::max( a, b ); };
+
+    // Check if endpoints overlap
+    for ( int i = 0; i < 2; ++i ) {
+      if ( between( A0[0], A1[0], B0[0] ) && between( A0[1], A1[1], B0[1] ) ) {
+        intersection[0] = B0[0];
+        intersection[1] = B0[1];
+        return true;
+      }
+      if ( between( A0[0], A1[0], B1[0] ) && between( A0[1], A1[1], B1[1] ) ) {
+        intersection[0] = B1[0];
+        intersection[1] = B1[1];
+        return true;
+      }
+      if ( between( B0[0], B1[0], A0[0] ) && between( B0[1], B1[1], A0[1] ) ) {
+        intersection[0] = A0[0];
+        intersection[1] = A0[1];
+        return true;
+      }
+      if ( between( B0[0], B1[0], A1[0] ) && between( B0[1], B1[1], A1[1] ) ) {
+        intersection[0] = A1[0];
+        intersection[1] = A1[1];
+        return true;
+      }
+    }
+    // Overlap but not at a single point
+    return false;
+  }
+
+  double ua = numeA / denom;
+  double ub = numeB / denom;
+
+  if ( ua >= 0.0 && ua <= 1.0 && ub >= 0.0 && ub <= 1.0 ) {
+    intersection[0] = A0[0] + ua * dxA;
+    intersection[1] = A0[1] + ua * dyA;
+    return true;
+  }
+  return false;
+}
+
+// void lagrange_shape_functions(int N, double xi, const double* nodes, double* N_i) {
+//     for(int i = 0; i < N; ++i) {
+//         N_i[i] = 1.0;
+//         for(int j = 0; j < N; j++){
+//             if(i != j) {
+//                 N_i[i] *= (xi - nodes[j]) / (nodes[i] - nodes[j]);
+//             }
+//         }
+//     }
+// }
+
+// void iso_map(const double* coords, int N, double* mapped_coords, double xi) {
+//     double nodes[N];
+//     double shape_functions[N];
+//     determine_lobatto_nodes(N, nodes);
+//     lagrange_shape_functions(N, xi, nodes, shape_functions);
+//     mapped_coords[0] = 0.0;
+//     mapped_coords[1] = 0.0;
+//     for(int i = 0; i < N; ++i) {
+//         mapped_coords[0] += shape_functions[i] * coords[2 * i];
+//         mapped_coords[1] += shape_functions[i] * coords[2 * i + 1];
+//     }
+// }
+
+// void iso_map_deriv(double xi, const double* coords, int N, double* dxi_dx) {
+//     double mapped_coords[2] = {0.0, 0.0};
+//     double d_mapped_coords[2] = {0.0, 0.0};
+//     double dxi = 1.0;
+//     __enzyme_autodiff<void>( iso_map, enzyme_const, coords, enzyme_const, N, enzyme_dup, mapped_coords,
+//     d_mapped_coords, enzyme_dup, xi, dxi);
+
+//     dxi_dx[0] = d_mapped_coords[0];
+//     dxi_dx[1] = d_mapped_coords[1];
+// }
+
+// double compute_jacobian(const double* coords, const double* derivs, int N) {
+//     double dx_dxi = 0.0;
+//     double dy_dxi = 0.0;
+
+//     for (int i = 0; i < N; ++i) {
+//         dx_dxi += derivs[i] * coords[2 * i];
+//         dy_dxi += derivs[i] * coords[2 * i + 1];
+//     }
+
+//     double J = 0.5 * std::sqrt(dx_dxi * dx_dxi + dy_dxi * dy_dxi);
+//     return J;
+// }
+
+// double newtons_method(const double* p, const double* coord1, const double* coord2, double tol = 1e-20, int iter = 20)
+// {
+//     double xi = 0.0; //initial guess
+
+//     for(int i = 0; i < iter; ++i) {
+//         double mapped_coords[2] = {0.0, 0.0};
+//         iso_map(coord1, coord2, xi, mapped_coords);
+
+//         //compute residuals
+//         double rx = mapped_coords[0] - p[0];
+//         double ry = mapped_coords[1] - p[1];
+
+//         double dx_dxi[2] = {0.0, 0.0};
+//         iso_map_deriv(coord1, coord2, dx_dxi);
+
+//         double grad = 2.0 * (rx * dx_dxi[0] + ry * dx_dxi[1]);
+//         double hess = 2.0 * (dx_dxi[0] * dx_dxi[0] + dx_dxi[1] * dx_dxi[1]);
+//         //newton step
+//         double step = grad / hess;
+//         xi -= step;
+
+//         //clamp xi to [-1, 1] for segment
+//         xi = std::max(-1.0, std::min(1.0, xi));
+
+//         if (std::abs(step) < tol) {
+//             break;
+//         }
+
+//     }
+//     return xi;
+// }
+
+void find_intersection( const double* A0, const double* A1, const double* p, const double* nB, double* intersection )
+{
+  double tA[2] = { A1[0] - A0[0], A1[1] - A0[1] };
+  double d[2] = { p[0] - A0[0], p[1] - A0[1] };
+
+  double det = tA[0] * nB[1] - tA[1] * nB[0];
+
+  if ( std::abs( det ) < 1e-12 ) {
+    intersection[0] = p[0];
+    intersection[1] = p[1];
+    return;
+  }
+
+  double inv_det = 1.0 / det;
+
+  double alpha = ( d[0] * nB[1] - d[1] * nB[0] ) * inv_det;
+
+  // if (alpha < 0.0) alpha = 0.0;
+  // if (alpha > 1.0) alpha = 1.0;
+
+  intersection[0] = ( A0[0] + alpha * tA[0] );
+  intersection[1] = A0[1] + alpha * tA[1];
+}
+
+// void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections) {
+//     double nA[2] = {0.0};
+//     double nB[2] = {0.0};
+//     find_normal(A0, A1, nA);
+//     find_normal(B0, B1, nB);
+//     // double eta_values[N];
+//     // determine_lobatto_nodes(N, eta_values)
+//     double end_points[2] = {-0.5, 0.5}; // change for [-0.5, 0.5] mapping
+//     for (int i = 0; i < 2; ++i) {
+//         double p[2] = {0.0};
+
+//         double intersection[2] = {0.0};
+//         double seg_intersection[2] = {0.0};
+//         iso_map2(B0, B1, end_points[i], p);
+
+//         // std::cout << "gx: " << p[0] << "gy: " << p[1] << std::endl;
+//         // // double xiA = newtons_method(p, A0, A1);
+//         // // tribol::ProjectPointToSegment(p[0], p[1],  nB[0], nB[1], A0[0], A0[1], px, py);
+//         // std::cout << "px: " << p[0] << ", " << "py: " << p[1] <<std::endl;
+
+//         find_intersection(A0, A1, p, nB, intersection);
+
+//         // std::cout << "intersection: " << intersection[0] << ',' << intersection[1] << std::endl;
+
+//         double dx = A1[0] - A0[0];
+//         double dy = A1[1] - A0[1];
+//         double len2 = dx*dx + dy*dy;
+//         double xiA = ((intersection[0] - A0[0]) * dx + (intersection[1] - A0[1]) * dy) / len2;
+
+//         // bool current_inside = (xiA >= 0.0 && xiA <= 1.0);
+
+//         double nB_unit[2] = { nB[0], nB[1] };
+//         double norm = std::sqrt(nB_unit[0]*nB_unit[0] + nB_unit[1]*nB_unit[1]);
+//         nB_unit[0] /= norm;
+//         nB_unit[1] /= norm;
+
+//         double dx_gap = intersection[0] - p[0];
+//         double dy_gap = intersection[1] - p[1];
+//         double gap = dx_gap * nB_unit[0] + dy_gap * nB_unit[1];
+
+//         // if (gap > 0) {
+//         //     xiA_was_inside[i] = true;  // mark this slot as valid
+//         // }
+
+//         double del = 0.1;
+
+//         if(segmentsIntersect(A0, A1, B0, B1, seg_intersection) &&  gap > 0.0) {
+//             // std::cout << "Segments intersect" << std::endl;
+//             // if(xiA < 0.0 || xiA > 1.0) {
+//                 // std::cout << "entered loop" << std::endl;
+//                 // std::cout << "Seg intersection: " << seg_intersection[0] << ", " << seg_intersection[1] <<
+//                 std::endl;
+//                 // std::cout << "xia before: " << xiA << std::endl;
+//                 xiA = ((seg_intersection[0] - A0[0]) * dx + (seg_intersection[1] - A0[1]) * dy) / len2;
+//                 // std::cout << "xia after: " << xiA << std::endl;
+//                 if (xiA < del) {
+//                     xiA = del;
+//                 }
+//                 // std::cout << "xia after: " << xiA << std::endl;
+//             // }
+//         }
+//         xiA = xiA - 0.5;
+//         // xiA = (xiA + 1) / 2;
+//         // std::cout << "Xia: " << xiA << std::endl;  //PICK UP HERE******
+//         projections[i] = xiA;
+//     }
+// }
+
+// void get_endpoint_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* proj0,
+// double* proj1) {
+//     double nA[2];
+//     find_normal(A0, A1, nA);
+//     find_intersection(B0, B1, A0, nA, proj0);
+//     find_intersection(B0, B1, A1, nA, proj1);
+
+// }
+
+// void get_projections(const double* A0, const double* A1, const double* B0, const double* B1, double* projections,
+// double del) {
+//     double nA[2] = {0.0};
+//     find_normal(A0, A1, nA);
+
+//     double end_points[2] = {-0.5, 0.5};
+//     for (int i = 0; i < 2; ++i) {
+//         double p[2] = {0.0};
+//         iso_map2(B0, B1, end_points[i], p);
+//         std::cout << "EndPoints: " << end_points[0] << ", " << end_points[1] << std::endl;
+
+//         double intersection[2] = {0.0};
+//         find_intersection(B0, B1, p, nA, intersection);
+//         std::cout << "intersection: " << intersection[0] << ", " << intersection[1] << std::endl;
+
+//         // Convert intersection to parametric coordinate on A
+//         // double dx = A1[0] - A0[0];
+//         // double dy = A1[1] - A0[1];
+//         // double len2 = dx*dx + dy*dy;
+//         // std::cout << "len2: " << len2 << std::endl;
+//         // double xiA = ((intersection[0] - A0[0]) * dx + (intersection[1] - A0[1]) * dy) / len2;
+//         // std::cout << "Xia: " << xiA << std::endl;
+
+//         // Apply constraints and convert to reference interval
+//         // xiA = std::max(del, std::min(1.0 - del, xiA)) - 0.5;
+
+//         // xiA = 0.5 - xiA;
+//         projections[i] = intersection[i];
+//     }
+// }
+void get_projections( const double* A0, const double* A1, const double* B0, const double* B1, double* projections,
+                      double del )
+{
+  double nB[2] = { 0.0 };
+  find_normal( B0, B1, nB );
+  double B_endpoints[2][2];
+  B_endpoints[0][0] = B0[0];
+  B_endpoints[0][1] = B0[1];
+  B_endpoints[1][0] = B1[0];
+  B_endpoints[1][1] = B1[1];
+
+  for ( int i = 0; i < 2; ++i ) {
+    // prohect A endpoints onto B
+    double intersection[2] = { 0.0 };
+    find_intersection( A0, A1, B_endpoints[i], nB, intersection );
+
+    // std::cout << "Intersection: " << intersection[0] << ", " << intersection[1] << std::endl;
+
+    // convert to parametric coords
+    double dx = A1[0] - A0[0];
+    // std::cout << "dx: " << dx << std::endl;
+    double dy = A1[1] - A0[1];
+    // std::cout << "dy: " << dy << std::endl;
+    double len2 = dx * dx + dy * dy;
+    double alpha = ( ( intersection[0] - A0[0] ) * dx + ( intersection[1] - A0[1] ) * dy ) / len2;
+    // map to xiB
+    //  std::cout << "alpha: " << alpha << std::endl;
+    //  double xiB = 0.5 - alpha;
+    double xiB = alpha - 0.5;
+    // xiB = std::max(-0.5, std::min(0.5, xiB));
+
+    // std::cout << "xi on B: " << xiB << std::endl;
+
+    projections[i] = xiB;
+  }
+}
+
+// void compute_integration_bounds(const double* projections, double* integration_bounds, int N) {
+//     double xi_min = projections[0];
+//     double xi_max = projections[0];
+//     for (int i = 0; i < 2; ++i) {
+//         if (xi_min > projections[i]) {
+//             xi_min = projections[i];
+//         }
+//         if(xi_max < projections[i]) {
+//             xi_max = projections[i];
+//         }
+
+//     }
+
+//     if (xi_max < -0.5) {
+//         xi_max = -0.5;
+//     }
+//     if(xi_min > 0.5) {
+//         xi_min  = 0.5;
+//     }
+//     if (xi_min < -0.5) {
+//         xi_min = -0.5;
+//     }
+//     if (xi_max > 0.5) {
+//         xi_max = 0.5;
+//     }
+
+//     double del = 0.1;
+
+//     integration_bounds[0] = xi_min;
+//     integration_bounds[1] = xi_max;
+//     // std::cout << "x_min: " << xi_min << "  xi_max: " << xi_max << std::endl;
+
+// }
+
+void compute_integration_bounds( const double* projections, double* integration_bounds, double del )
+{
+  // std::cout << "Projections in Compute bounds: " << projections[0] << ", " <<  projections[1] << std::endl;
+  double xi_min = projections[0];
+  double xi_max = projections[0];
+  for ( int i = 0; i < 2; ++i ) {
+    if ( xi_min > projections[i] ) {
+      xi_min = projections[i];
+    }
+    if ( xi_max < projections[i] ) {
+      xi_max = projections[i];
+    }
+  }
+
+  // std::cout << "BEFORE xi min: " << xi_min << " xi_max: " << xi_max << std::endl;
+
+  if ( xi_max < -0.5 - del ) {
+    xi_max = -0.5 - del;
+  }
+  if ( xi_min > 0.5 + del ) {
+    xi_min = 0.5 + del;
+  }
+  if ( xi_min < -0.5 - del ) {
+    xi_min = -0.5 - del;
+  }
+  if ( xi_max > 0.5 + del ) {
+    xi_max = 0.5 + del;
+  }
+
+  // if (xi_max < -0.5) {
+  //     xi_max = -0.5;
+  // }
+  // if(xi_min > 0.5) {
+  //     xi_min  = 0.5;
+  // }
+  // if (xi_min < -0.5) {
+  //     xi_min = -0.5;
+  // }
+  // if (xi_max > 0.5) {
+  //     xi_max = 0.5;
+  // }
+
+  integration_bounds[0] = xi_min;
+  integration_bounds[1] = xi_max;
+  // std::cout << "xi min: " << xi_min << " xi_max: " << xi_max << std::endl;
+}
+
+void modify_bounds( double* integration_bounds, double del, double* modified_bounds )
+{
+  double xi = 0.0;
+  double int_bound[2] = { 0.0 };
+  for ( int i = 0; i < 2; ++i ) {
+    int_bound[i] = integration_bounds[i];
+  }
+  // int_bound[0] -= del;
+  // int_bound[1] += del;
+
+  for ( int i = 0; i < 2; ++i ) {
+    double xi_hat = 0.0;
+    // xi = 0.5 * (integration_bounds[i] + 1.0);
+    xi = int_bound[i] + 0.5;
+    // std::cout << "xi in smoothoing: " << xi << std::endl;
+    if ( 0.0 - del <= xi && xi <= del ) {
+      xi_hat = ( 1.0 / ( 4 * del ) ) * ( xi * xi ) + 0.5 * xi + del / 4.0;
+      // std::cout << "zone1" << std::endl;
+    } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 + del ) {
+      double b = -1.0 / ( 4.0 * del );
+      double c = 0.5 + 1.0 / ( 2.0 * del );
+      double d = 1.0 - del + ( 1.0 / ( 4.0 * del ) ) * pow( 1.0 - del, 2 ) - 0.5 * ( 1.0 - del ) -
+                 ( 1.0 - del ) / ( 2.0 * del );
+
+      xi_hat = b * xi * xi + c * xi + d;
+
+      // xi_hat = (1.0/del) * xi*xi - (2.0*(1.0-del)/del) * xi + (-1.0 + 1.0/del);
+      // xi_hat = -1.0/del * xi*xi + 2.0/del * xi + (1.0 - 1.0/del);
+
+      // xi_hat= (-1.0/(del*del))*pow(xi,3) + ((3.0/(del*del)) - (2.0/del))*pow(xi,2) + ((-3.0/(del*del)) +
+      // (4.0/del))*xi + (1.0 + (1.0/(del*del)) - (2.0/del));
+
+      // xi_hat = -1.0/(del*del)*pow(xi,3) + (3.0+del)/(del*del)*pow(xi,2) + (1.0 + (-3.0-2.0*del)/(del*del))*xi +
+      // (1.0+del)/(del*del);
+
+      //     double d = 1 - del
+      //  + (1.0 / (4.0 * del * del)) * (1 - 3 * del + 3 * del * del - del * del * del)
+      //  - ((-1.0 / (4.0 * del) + 3.0 / (4.0 * del * del)) * (1 - 2 * del + del * del))
+      //  - ((5.0 / 4.0 + 1.0 / (2.0 * del) - 3.0 / (4.0 * del * del)) * (1 - del));
+      //         xi_hat =
+      // -1.0*(xi*xi*xi) / (4.0 * del * del)
+      // + (-1.0/(4.0*del) + 3.0/(4.0*del*del)) * (xi*xi)
+      // + (1.25 + 1.0/(2.0*del) - 3.0/(4.0*del*del)) * xi
+      // + d;
+      // std::cout << "d: " << d << std::endl;
+
+      //  std::cout << "zone2" << std::endl;
+    } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
+      xi_hat = xi;
+      // std::cout << "zone3" << std::endl;
+    } else {
+      // std::cerr << "Xi did not fall in an expected range for modifying bounds for 1" << std::endl;
+    }
+    // modified_bounds[i] = 2.0 * xi_hat - 1;
+    modified_bounds[i] = xi_hat - 0.5;
+  }
+  // std::cout << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+}
+
+// void modify_bounds(double* integration_bounds, double del, double* modified_bounds) {
+//     double xi = 0.0;
+//     // integration_bounds[0] -= del;
+//     // integration_bounds[1] += del;
+
+//     for (int i = 0; i < 2; ++i) {
+//         double xi_hat = 0.0;
+//         // xi = 0.5 * (integration_bounds[i] + 1.0);
+//         xi = integration_bounds[i] + 0.5;
+//         // std::cout << "xi: " << xi << std::endl;
+//         if (0.0 <= xi && xi <= del) {
+//             xi_hat = (1.0/del) * (xi*xi) + 0.5 * xi + del/4.0;
+//             // std::cout << "zone1" << std::endl;
+//         }
+//         else if((1.0 - del) <= xi && xi <= 1.0) {
+//             xi_hat =  1.0 -(((1.0 - xi) * (1.0 - xi)) / (2 * del * (1.0 - del)));
+//             // std::cout << "zone2" << std::endl;
+//         }
+//         else if(del <= xi && xi <= (1.0)) {
+//             xi_hat = xi;
+//             // std::cout << "zone3" << std::endl;
+//         }
+//         else{
+//             std::cerr << "Xi did not fall in an expected range for modifying bounds for 1" << std::endl;
+//         }
+//         // modified_bounds[i] = 2.0 * xi_hat - 1;
+//         modified_bounds[i] = xi_hat - 0.5;
+//     }
+//     // std::cout << "modified bounds: " << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+// }
+
+void modify_bounds_for_weight( double* integration_bounds, double del, double* modified_bounds )
+{
+  double xi = 0.0;
+  integration_bounds[0];
+  integration_bounds[1];
+  for ( int i = 0; i < 2; ++i ) {
+    double xi_hat = 0.0;
+    // xi = 0.5 * (integration_bounds[i] + 1.0);
+    xi = integration_bounds[i] + 0.5;
+    if ( xi < std::abs( 1e-10 ) ) {
+      xi = 0.0;
+    }
+    // std::cout << "xi: " << xi << std::endl;
+    if ( 0 <= xi && xi <= del ) {
+      xi_hat = ( ( xi ) * ( xi ) ) / ( 2.0 * del * ( 1.0 - del ) );
+      // std::cout << "zone1" << std::endl;
+    } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 ) {
+      xi_hat = 1.0 - ( ( ( 1.0 - xi ) * ( 1.0 - xi ) ) / ( 2 * del * ( 1.0 - del ) ) );
+      // std::cout << "zone2" << std::endl;
+    } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
+      xi_hat = ( ( 2.0 * xi ) - del ) / ( 2.0 * ( 1.0 - del ) );
+      // std::cout << "zone3" << std::endl;
+    } else {
+      std::cerr << "Xi did not fall in an expected range for modifying bounds for weight fpr 2" << std::endl;
+    }
+    // modified_bounds[i] = 2.0 * xi_hat - 1;
+    modified_bounds[i] = xi_hat - 0.5;
+  }
+  // std::cout << "modified bounds: " << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+}
+
+void compute_quadrature_point( double* integration_bounds, const double* A0, const double* A1, int N,
+                               double* quad_points )
+{
+  // std::cout << "=== ENTERING compute_quadrature_point ===" << std::endl;
+  double eta_values[N];
+  determine_legendre_nodes( N, eta_values );
+  // for(int i = 0; i < N; ++i) {
+  //     eta_values[i] = (eta_values[i] + 1) / 2;
+  // }
+
+  // for (int i = 0; i < N; ++i) {
+  //     eta_values[i] = eta_values[i] - 0.5;  // scale to [-0.5, 0.5] per suggestion of mike.
+  // }
+
+  // for (int i = 0; i < N; ++i) {
+  //     eta_values[i] *= 0.5;
+  // }
+
+  double xi_min = integration_bounds[0];
+  double xi_max = integration_bounds[1];
+  // std::cout << "xi values: " << xi_min << ", " << xi_max << std::endl;
+
+  for ( int i = 0; i < N; ++i ) {
+    double xi_i =
+        0.5 * ( xi_max - xi_min ) * eta_values[i] + 0.5 * ( xi_max + xi_min );  // this was th original implementation
+    // double xi_i = 0.5 * (xi_max + xi_min) + eta_values[i] + 0.5 *(xi_max - xi_min); //mikes suggestions
+    // double xi_i = xi_min + (xi_max - xi_min) * eta_values[i];
+    // xi_i *= 0.5;
+    double mapped_coords[2] = { 0.0, 0.0 };
+
+    iso_map2( A0, A1, xi_i, mapped_coords );
+    quad_points[2 * i] = mapped_coords[0];
+    quad_points[2 * i + 1] = mapped_coords[1];
+    // std::cout << "x: " << quad_points[2 * i] << " y: " << quad_points[2 * i + 1] << std::endl;
+  }
+}
+
+void assign_weights( const double* integration_bounds, int N, double* weights )
+{
+  double ref_weights[N];
+  determine_legendre_weights( N, ref_weights );
+  // std::cout << integration_bounds[0] << ' ' << integration_bounds[1] << std::endl;
+  double J = 0.0;
+
+  double xi_min = integration_bounds[0];
+  double xi_max = integration_bounds[1];
+
+  J = 0.5 * ( xi_max - xi_min );
+
+  for ( int i = 0; i < N; ++i ) {
+    weights[i] = ref_weights[i] * J;
+  }
+}
+
+// double compute_gap(const double* p, const double* B0, const double* B1, double* A0, double* A1, double* nB) {
+//     double nB_orig[2] = {nB[0], nB[1]};
+//     double len = std::sqrt(nB[0] * nB[0] + nB[1] * nB[1]);
+//     // std::cout << len << std:: endl;
+//     nB_orig[0] /= len;
+//     nB_orig[1] /= len;
+//     // std::cout << "nbx: " << nB_orig[0] << " nby: " << nB_orig[1] << std::endl;
+
+//     double intersection[2] = {0.0};
+//     find_intersection(B0, B1, p, nB_orig, intersection);
+
+//     // std::cout << "intersection at B: " << intersection[0] << ", " << intersection[1] << std::endl;
+
+//     //  std::cout << "intersection for gap: " << intersection[0] << ',' << intersection[1] << std::endl;
+
+//     // double eta = newtons_method(p, B0, B1); //closest projection of p onto elem B
+//     // double px, py;
+//     // tribol::ProjectPointToSegment(p[0], p[1],nB_orig[0], nB_orig[1], B0[0], B0[1], px, py);
+
+//     // double q[2] = {0.0, 0.0};
+//     // iso_map(B0, B1, eta, q); //map eta back to physical space to get closest point q on A
+
+//     double dx = intersection[0] - p[0];
+//     double dy = intersection[1] - p[1];
+//     // std::cout << "px: " << p[0] << "py: " << p[1] << std::endl;
+//     // std::cout << "dx: " << dx << "dy: " << dy << std::endl;
+
+//     double gap = dx * nB_orig[0] + dy * nB_orig[1];
+
+//     // if(dx == 0 && dy == 0){
+//     //     gap = (A0[1] - p[1]) * nB_orig[1];
+//     //     // std::cout << "gap in loop: " << gap << std::endl;
+//     //     return gap;
+
+//     // }
+//     // std::cout << "gap in compute_gap: " << gap << std::endl;
+//     return gap;
+// }
+
+double compute_gap( const double* p, const double* B0, const double* B1, const double* nA, const double* A0,
+                    const double* A1 )
+{
+  double nA_orig[2] = { nA[0], nA[1] };
+  // std::cout << "nA: " << nA_orig[0] << ", " << nA_orig[1] << std::endl;
+
+  double len = std::sqrt( nA[0] * nA[0] + nA[1] * nA[1] );
+  // std::cout << "LEN: " << len << std::endl;
+  nA_orig[0] /= len;
+  nA_orig[1] /= len;
+  double intersection[2] = { 0.0 };
+  find_intersection( B0, B1, p, nA_orig, intersection );
+  // std::cout << "INTERSECTION: " << intersection[0] << ", " << intersection[1] << std::endl;
+
+  double dx = intersection[0] - p[0];
+  double dy = intersection[1] - p[1];
+
+  double gap = dx * nA_orig[0] + dy * nA_orig[1];
+  gap *= -1;
+  // std::cout << "GAP: " << gap << std::endl;
+  return gap;
+}
+
+double compute_modified_gap( double gap, double* nA, double* nB )
+{
+  double dot = nA[0] * nB[0] + nA[1] * nB[1];
+  double eta = ( dot < 0 ) ? -dot : 0.0;
+
+  //    if(nu >= 0) {
+  //         nu = 0;
+  //     }
+
+  //     gap *= nu;
+  // std::cout << "gap in modify gap: " << gap << std::endl;
+  // std::cout << "eta: " << eta << std::endl;
+  return gap * eta;
+}
+
+double compute_contact_potential( double gap, double k1, double k2 )
+{
+  if ( gap < 1e-12 ) {
+    return 0;
+  }
+  double gap1 = gap;
+  double pot = k1 * ( gap1 * gap1 ) - k2 * ( gap1 * gap1 * gap1 );
+  // std::cout << "potential: " << pot << std::endl;
+  return pot;
+}
+
+void compute_contact_energy( const double* coords, double del, double k1, double k2, int N, double lenA,
+                             double* projections, double* energy )
+{
+  double A0[2] = { coords[0], coords[1] };
+  double A1[2] = { coords[2], coords[3] };
+  double B0[2] = { coords[4], coords[5] };
+  double B1[2] = { coords[6], coords[7] };
+
+  // double lenA = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
+  double lenB = sqrt( ( B1[0] - B0[0] ) * ( B1[0] - B0[0] ) + ( B1[1] - B0[1] ) * ( B1[1] - B0[1] ) );
+
+  double AC[2] = { 0.5 * ( A0[0] + A1[0] ), 0.5 * ( A0[1] + A1[1] ) };
+  double AR[2] = { 0.5 * ( A0[0] - A1[0] ), 0.5 * ( A0[1] - A1[1] ) };
+  double normAR = std::sqrt( AR[0] * AR[0] + AR[1] * AR[1] );
+
+  double BC[2] = { 0.5 * ( B0[0] + B1[0] ), 0.5 * ( B0[1] + B1[1] ) };
+  double BR[2] = { 0.5 * ( B0[0] - B1[0] ), 0.5 * ( B0[1] - B1[1] ) };
+  double normBR = std::sqrt( BR[0] * BR[0] + BR[1] * BR[1] );
+
+  A0[0] = AC[0] + AR[0] * lenA * 0.5 / normAR;
+  A0[1] = AC[1] + AR[1] * lenA * 0.5 / normAR;
+
+  A1[0] = AC[0] - AR[0] * lenA * 0.5 / normAR;
+  A1[1] = AC[1] - AR[1] * lenA * 0.5 / normAR;
+
+  B0[0] = BC[0] + BR[0] * lenB * 0.5 / normBR;
+  B0[1] = BC[1] + BR[1] * lenB * 0.5 / normBR;
+  ;
+
+  B1[0] = BC[0] - BR[0] * lenB * 0.5 / normBR;
+  ;
+  B1[1] = BC[1] - BR[1] * lenB * 0.5 / normBR;
+  ;
+
+  double nA[2] = { 0.0 };
+  double nB[2] = { 0.0 };
+  find_normal( A0, A1, nA );
+  find_normal( B0, B1, nB );
+
+  double dot_product = nA[0] * nB[0] + nA[1] * nB[1];
+
+  if ( std::abs( dot_product ) < 1e-10 ) {
+    *energy = 0;
+  }
+
+  else {
+    // std::cout << "length: " << lenA << std::endl;
+
+    // double projections[2];
+    // get_projections(A0, A1, B0, B1, projections);
+
+    double integration_bounds[2];
+    compute_integration_bounds( projections, integration_bounds, del );
+
+    // double len = sqrt((A1[0] - A0[0]) * (A1[0] - A0[0]) + (A1[1] - A0[1]) * (A1[1] - A0[1]));
+    // std::cout << "length: " << len << std::endl;
+
+    double modified_bounds[2];
+    modify_bounds( integration_bounds, del, modified_bounds );
+    // std::cout << "Integration Bounds Original" << integration_bounds[0] << ", " << integration_bounds[1] <<
+    // std::endl; std::cout << "Modifed Bounds" << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+
+    // double modified_bounds_w[2];
+    // modify_bounds_for_weight(integration_bounds, del, modified_bounds_w);
+
+    //     std::cout << "A: x from " << A0[0] << " to " << A1[0] << std::endl;
+    // std::cout << "B: x from " << B0[0] << " to " << B1[0] << std::endl;
+    // std::cout << "Raw projections from get_projections: [" << projections[0] << ", " << projections[1] << "]" <<
+    // std::endl; std::cout << "Integration bounds: [" << integration_bounds[0] << ", " << integration_bounds[1] << "]"
+    // << std::endl; std::cout << "Modified bounds for quadrature: [" << modified_bounds[0] << ", " <<
+    // modified_bounds[1] << "]" << std::endl;
+
+    double quad_points[2 * N];
+    compute_quadrature_point( modified_bounds, A0, A1, N, quad_points );
+
+    // std::cout << "integration Bounds" << integration_bounds[0] << ", " << integration_bounds[1] << std::endl;
+    // double modified_bounds_w[2];
+    // modify_bounds_for_weight(integration_bounds, del, modified_bounds_w);
+
+    double weights[N];
+    assign_weights( modified_bounds, N, weights );  // was for weigh orginalally
+
+    *energy = 0.0;
+    for ( int i = 0; i < N; ++i ) {
+      // double p[2] = {quad_points[2 * i], quad_points[2 * i + 1]};
+      double mapped_coords[2] = { quad_points[2 * i], quad_points[2 * i + 1] };
+      // iso_map2(A0, A1, quad_points[i], mapped_coords);
+      // std::cout << "quad point: " << quad_points[2*i] << std::endl;
+
+      // std::cout << "Mapped coords: " << mapped_coords[0] << ", " << mapped_coords[1] << std::endl;
+
+      double gap = compute_gap( mapped_coords, B0, B1, nA, A0, A1 );
+      // if (gap < 0.0) {
+      //     continue;
+      // }
+      double smooth_gap = compute_modified_gap( gap, nA, nB );
+      // std::cout << "gap: " << smooth_gap << std::endl;
+
+      double potential = compute_contact_potential( smooth_gap, k1, k2 );
+
+      *energy += weights[i] * potential;
+      // std::cout << "energy: " << *energy << std::endl;
+    }
+    *energy *= lenA * 0.5;
+    // std::cout << "energy: " << *energy << std::endl;
+  }
+}
+
+// void compute_sym_energy(const double* coords, double del, double k1, double k2, int N, double len, double* energy) {
+//     double energy1 = 0.0;
+//     compute_contact_energy(coords, del, k1, k2, N, len, &energy1);
+
+//     double A0[2] = {coords[0], coords[1]};
+//     double A1[2] = {coords[2], coords[3]};
+//     double B0[2] = {coords[4], coords[5]};
+//     double B1[2] = {coords[6], coords[7]};
+
+//     double nA[2] = {0.0};
+//     double nB[2] = {0.0};
+
+//     // std::cout << "length: " << len << std::endl;
+//     double energy2 = 0.0;
+
+//     find_normal(A0, A1, nA);
+//     find_normal(B0, B1, nB);
+
+//     double projections[2];
+//     get_projections(A0, A1, B0, B1, projections, N);
+
+//     double integration_bounds[2];
+//     compute_integration_bounds(projections, integration_bounds, N);
+
+//     // double switch_bounds[2] = {integration_bounds[1], integration_bounds[0]};
+
+//     double modified_bounds[2];
+//     modify_bounds(integration_bounds, del, modified_bounds);
+//     // std::cout << modified_bounds[0] << ',' << modified_bounds[1] << std::endl;
+
+//      double switch_bounds[2] = {modified_bounds[1], modified_bounds[0]};
+
+//     double quad_points[2 * N];
+//     compute_quadrature_point(switch_bounds, A0, A1, N, quad_points);
+
+//     // double modified_bounds[2];
+//     // modify_bounds(switch_bounds, del, modified_bounds);
+
+//     double weights[N];
+//     assign_weights(switch_bounds, N, weights);
+
+//     *energy = 0.0;
+//     for(int i = 0; i < N; ++i) {
+//         double p[2] = {quad_points[2 * i], quad_points[2 * i + 1]};
+
+//         double gap = compute_gap(p, B0, B1, A0, A1, nB);
+//         double smooth_gap = compute_modified_gap(gap, nA, nB);
+//         // std::cout << smooth_gap << std::endl;
+
+//         double potential = compute_contact_potential(smooth_gap, k1, k2);
+
+//         energy2 +=  weights[i] * potential;
+
+//     }
+//     energy2 *= len * 0.5;
+
+//     *energy = 0.5 * (energy1 - energy2);
+
+// }
+
+void read_element_coords( int N, std::vector<double>& coords )
+{
+  for ( int i = 0; i < 2; ++i ) {
+    double x;
+    double y;
+    std::cout << "Enter x" << i + 1 << ": ";
+    std::cin >> x;
+
+    std::cout << "Enter y" << i + 1 << ": ";
+    std::cin >> y;
+
+    coords.push_back( x );
+    coords.push_back( y );
+  }
+}
+
+void populate_C_arrays( double* C, const std::vector<double>& elem )
+{
+  for ( size_t i = 0; i < elem.size(); ++i ) {
+    C[i] = elem[i];
+  }
+}
+
+// void calc_force(double* coords, double del, double k1, double k2, int N, double len, double* dE_dX) {
+// double E = 0.0;
+// for (int i = 0; i < 8; ++i) {
+//     double dcoords[8] = {0.0};
+//     dcoords[i] = 1.0;
+//     double dk1 = 0.0;
+//     double dk2 = 0.0;
+//     double ddel = 0.0;
+//     double dE = 1.0;
+//     double dlen = 0.0;
+//     __enzyme_fwddiff<void>( compute_contact_energy, coords, dcoords, del, ddel, k1, dk1, k2, dk2, enzyme_const, N,
+//     dlen, len, &E, &dE); dE_dX[i] = -dE;
+
+// }
+// }
+
+void calc_force_reverse( const double* coords, double del, double k1, double k2, int N, double len, double* projections,
+                         double* dE_dX )
+{
+  double dcoords[8] = { 0.0 };
+  double E = 0.0;
+  double dE = 1.0;
+  __enzyme_autodiff<void>( compute_contact_energy, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1,
+                           enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_const, projections, enzyme_dup,
+                           &E, &dE );
+
+  for ( int i = 0; i < 8; ++i ) {
+    dE_dX[i] = dcoords[i];
+  }
+}
+
+// void calc_force_FD(double* coords, double del, double k1, double k2, int N, double* dE_dX, double h = 1e-10) {
+//     double X_plus[8] = {0.0};
+//     double X_minus[8] = {0.0};
+//     double  E_plus = 0.0;
+//     double E_minus;
+//     for(int i = 0; i < 8; ++i) {
+//         for (int j = 0; j < 8; ++j) {
+//             X_plus[j] = coords[j];
+//             X_minus[j] = coords[j];
+//         }
+//         X_plus[i] = coords[i] + h;
+//         X_minus[i] = coords[i] - h;
+//         compute_contact_energy(X_plus, del, k1, k2, N, len, &E_plus);
+//         compute_contact_energy(X_minus, del, k1, k2, N, len, &E_minus);
+//         dE_dX[i] = (E_plus - E_minus) / (2 * h);
+//      }
+
+// }
+
+void calc_stiffness_rev_fwd( double* coords, double del, double k1, double k2, int N, double lenA, double* projections,
+                             double* force, double* d2E_d2X )
+{
+  double dE[8] = { 0.0 };
+  double d2E[8] = { 0.0 };
+  double dEF[8] = { 0.0 };
+  calc_force_reverse( coords, del, k1, k2, N, lenA, projections, dEF );
+  for ( int i = 0; i < 8; ++i ) {
+    force[i] = dEF[i];
+  }
+  for ( int i = 0; i < 8; ++i ) {
+    double d2coords[8] = { 0.0 };
+    d2coords[i] = 1.0;
+    double d2k1 = 0.0;
+    double d2del = 0.0;
+    double d2k2 = 0.0;
+    double d2lenA = 0.0;
+    double d2projections[] = { 0.0 };
+    __enzyme_fwddiff<void>( (void*)calc_force_reverse, coords, d2coords, del, d2del, k1, d2k1, k2, d2k2, N, lenA,
+                            d2lenA, projections, d2projections, dE, d2E );
+    for ( int j = 0; j < 8; ++j ) {
+      d2E_d2X[8 * i + j] = d2E[j];
+    }
+  }
+}
+
+// void calc_stiffness_rev_rev(double* coords, double del, double k1, double k2, int N, double lenA, double lenB,
+// double* d2E_d2X) {
+//     for (int i = 0; i < 8; ++i) {
+//         double d2X[8] = {0.0};
+//         double dE[8] = {0.0};
+//         double d2E[8] = {0.0};
+//         d2E[i] = 1.0;
+//         __enzyme_autodiff<void>( (void*)calc_force_reverse, enzyme_dup, coords, d2X, enzyme_const, del, enzyme_const,
+//         k1, enzyme_const, k2, enzyme_const, N, enzyme_const, lenA, enzyme_const, lenB, enzyme_dup, dE, d2E); for(int
+//         j = 0; j < 8; ++j) {
+//             d2E_d2X[8 * i + j] = d2X[j];
+//         }
+//     }
+// }
+
+// void calc_stiffness_FD(double* coords, double del, double k1, double k2, double lenA , double lenB, int N, double
+// *d2E_d2X, double h = 1e-7) {
+//     double dX_plus[8] = {0.0};
+//     double dX_minus[8] = {0.0};
+//     double dW_plus[8] = {0.0};
+//     double dW_minus[8] = {0.0};
+//     for (int i = 0; i < 8; ++i) {
+//         for (int j = 0; j < 8; ++j) {
+//             dX_plus[j] = coords[j];
+//             dX_minus[j] = coords[j];
+//         }
+//         dX_plus[i] = coords[i] + h;
+//         dX_minus[i] = coords[i] - h;
+
+//         calc_force_reverse(dX_plus, del, k1, k2, N, lenA, lenB, dW_plus);
+//         calc_force_reverse(dX_minus, del, k1, k2, N, lenA, lenB, dW_minus);
+//         for(int j = 0; j < 8; ++j){
+//         d2E_d2X[8 * i + j] = (dW_plus[j] - dW_minus[j]) / (2  * h);
+
+//     }
+
+// }
+// }
+
+// void calc_ab(const double* coord1, const double* coord2, const double* normal, double* a){
+//     double y_diff = coord1[1] - coord2[1];
+//     double x_diff = coord1[0] - coord2[0];
+//     *a = (x_diff * normal[0]) + (y_diff * normal[1]);
+
+// }
+
+// void analytical_integral(const double* coords, double del, double k1, double k2, int N, double len, double* energy) {
+//     double A0[2] = {coords[0], coords[1]};
+//     double A1[2] = {coords[2], coords[3]};
+//     double B0[2] = {coords[4], coords[5]};
+//     double B1[2] = {coords[6], coords[7]};
+
+//     double nB[2] = {0.0};
+//     find_normal(B0, B1, nB);
+//     double a = 0.0;
+//     calc_ab(A1, A0, nB, &a);
+//     double b = 0.0;
+//     calc_ab(A0, B0, nB, &b);
+
+//     double projections[2] = {0.0};
+//     get_projections(A0, A1, B0, B1, projections);
+
+//     double integration_bounds[2] = {0.0};
+//     compute_integration_bounds(projections, integration_bounds, N);
+//     double xi[2] = {0.0};
+//     modify_bounds(integration_bounds,del, xi);
+
+//     double term_one = (k1 * ((a * a) * (xi[1] * xi[1] * xi[1] / 3) + a * b * xi[1] + (b * b * xi[1])) + k2 * ((a * a
+//     * a) * (xi[1] * xi[1] * xi[1] * xi[1]) / 4) + (a * a) * (xi[1] * xi[1]) * b + ((3 * a * (xi[1] * xi[1] * xi[1]) *
+//     b) / 2) + (b * b * b) * (xi[1])); double term_two = (k1 * ((a * a) * (xi[0] * xi[0] * xi[0] / 3) + a * b * xi[0]
+//     + (b * b * xi[0])) + k2 * ((a * a * a) * (xi[0] * xi[0] * xi[0] * xi[0]) / 4) + (a * a) * (xi[0] * xi[0]) * b +
+//     ((3 * a * (xi[0] * xi[0] * xi[0]) * b) / 2) + (b * b * b) * (xi[0]));
+
+//     *energy = term_one - term_two;
+//     *energy *= len;
+// }
+
+// void calc_force_reverse_exact(double* coords, double del, double k1, double k2, int N, double len, double* dE_dX) {
+//     double dcoords[8] = {0.0};
+//     double E = 0.0;
+//     double dE = 1.0;
+//     __enzyme_autodiff<void>( analytical_integral, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1,
+//     enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_dup, &E, &dE);
+
+//     for(int i = 0; i < 8; ++i) {
+//         dE_dX[i] = -dcoords[i];
+//     }
+// }
+
+// void calc_force_reverse_sym(double* coords, double del, double k1, double k2, int N, double len, double* dE_dX) {
+//     double dcoords[8] = {0.0};
+//     double E = 0.0;
+//     double dE = 1.0;
+//     __enzyme_autodiff<void>( compute_sym_energy, enzyme_dup, coords, dcoords, enzyme_const, del, enzyme_const, k1,
+//     enzyme_const, k2, enzyme_const, N, enzyme_const, len, enzyme_dup, &E, &dE);
+
+//     for(int i = 0; i < 8; ++i) {
+//         dE_dX[i] = -dcoords[i];
+//     }
+// }
+
+#endif  // TRIBOL_USE_ENZYME
+
+int main()
+{
+  // int N;
+  // std::cout << "Enter N quadrature points: ";
+  // std::cin >> N;
+
+  // if(N !=3 && N != 4 && N != 5) {
+  //     std::cerr << "Error: not a valid number qaud pts" << std::endl;
+  // }
+
+  // std::vector<double> elem_A;
+  // std::vector<double> elem_B;
+
+  // std::cout << "Enter coordinates for element A:
+  // read_element_coords(N, elem_A);
+
+  // std::cout << "Eneter coordinates for element B: ";
+  // read_element_coords(N, elem_B);
+
+  // double A[4] = {0.0};
+  // double B[4] = {0.0};
+
+  // populate_C_arrays(A, elem_A);
+  // populate_C_arrays(B, elem_B);
+
+#ifdef TRIBOL_USE_ENZYME
+
+  int N = 3;
+
+  // double A0[2] = {A[0], A[1]};
+  // double A1[2] = {A[2], A[3]};
+  // double B0[2] = {B[0], B[1]};
+  // double B1[2] = {B[2], B[3]};
+
+  double A0_i[2] = { -0.3, -0.05 };
+  double A1_i[2] = { 0.0, -0.05 };
+  double B0[2] = { 1.0, 0.0 };
+  double B1[2] = { 0.1, 0.0 };
+  double del = 0.05;
+  double k1 = 100;
+  double k2 = 0.0;
+  for ( int i = 0; i < 140; ++i ) {
+    // std::cout << i << std::endl;
+    double energy = 0.0;
+    double energy2;
+    double shift = 0.01 * i;
+
+    // std::cout << i << std::endl;
+    // std::cout << "location: " << shift << std::endl;
+    double A0[2] = { A0_i[0] + shift, A0_i[1] };
+    double A1[2] = { A1_i[0] + shift, A1_i[1] };
+
+    // std::cout << "A0x: " << A0[0] << " A0y: " << A0[1] << std::endl;
+    // std::cout << "A1x: " << A1[0] << " A1y: " << A1[1] << std::endl;
+
+    double coords[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+    double lenA = sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+    // double lenB = sqrt((B1[0] - B0[0]) * (B1[0] - B0[0]) + (B1[1] - B0[1]) * (B1[1] - B0[1]));
+    // analytical_integral(coords, del, k1, k2, N, len, &energy2);
+    // if (i == 410) {
+    // std::cout << "B0x: " << B0[0] << ' ' << "B1x: " << B1[0] << std::endl;
+    // }
+    // // compute_sym_energy(coords, k1, k2, del, N, len, &energy);
+    // compute_contact_energy(coords, del, k1, k2, N, lenA, lenB, &energy);
+
+    double dE_dX[8] = { 0.0 };
+    double projections[2] = { 0.0 };
+    double proj1[2];
+    double proj0[2];
+    // get_endpoint_projections(A0, A1, B0, B1, proj0, proj1);
+    // std::cout << "Proj 0: " << proj0[0] << ", " << proj0[1] << std::endl;
+    // std::cout << "Proj 1: " << proj1[0] << ", " << proj1[1] << std::endl;
+    get_projections( A0, A1, B0, B1, projections, del );
+    // std::cout << "Projections in Main: " << projections[0] << ", " << projections[1] << std::endl;
+    compute_contact_energy( coords, del, k1, k2, N, lenA, projections, &energy );
+    // calc_force_reverse_sym(coords, del, k1, k2, N, len, dE_dX);
+    calc_force_reverse( coords, del, k1, k2, N, lenA, projections, dE_dX );
+    //  calc_force_reverse_exact(coords, del, k1, k2, N, len, dE_dX);
+    // std::cout << '[';
+    // for(int j = 0; j < 8; ++j) {
+
+    for ( int j = 0; j < 8; ++j ) {
+      if ( j == 0 ) {
+        std::cout << dE_dX[j];
+
+      } else {
+        std::cout << "," << dE_dX[j];
+      }
+    }
+    std::cout << std::endl;
+    // }
+
+    // // //         std::cout << ']' << std::endl;
+    //    std::cout << i * 0.01 << ',' << energy << std::endl;
+    double dE_dXrev[8] = { 0.0 };
+    //    calc_force_reverse(coords, del, k1, k2, N, len,dE_dXrev);
+    //    std::cout << "[";
+    //    for (int j = 0; j < 8; ++j) {
+    //     std::cout << ", " << dE_dXrev[j];
+    //    }
+    //    std::cout << "]" << std::endl;
+    //    double dE_dXFD[8] = {0.0};
+    //    calc_force_FD(coords, del, k1, k2, N, dE_dXFD);
+    //    std::cout << "[";
+    //     for (int j = 0; j < 8; ++j) {
+    //     std::cout << ", " << dE_dXFD[j];
+    //    }
+    //    std::cout << "]" << std::endl;
+    // double d2E_d2XFD[64] = {0.0};
+    // calc_stiffness_rev_fwd(coords, del, k1, k2, N, lenA, lenB, dE_dX, d2E_d2X);
+    // std::cout << " rev fwd: [";
+    // for (int j = 0; j < 64; ++j) {
+    //     std::cout << ", " << d2E_d2X[j];
+    // }
+    // std::cout << "]" << std::endl;
+    //      double d2E_d2XFD[64] = {0.0};
+    // calc_stiffness_FD(coords, del, k1, k2, lenA, lenB, N, d2E_d2XFD);
+    //     for (int i = 0; i < 16; ++i) {
+    //     // Create unit vector e_i
+    //     double v[16] = {0.0};
+    //     v[i] = 1.0;
+
+    //     // Multiply: result = K * v
+    //     double result[16] = {0.0};
+    //     for (int row = 0; row < 16; ++row) {
+    //         for (int col = 0; col < 16; ++col){
+    //             result[row] += d2E_d2XFD[16 * row + col] * v[col];
+    //             if (std::abs(result[row]) < 1e-10) {
+    //                 result[row] = 0.0;
+    //             }
+    //         }
+    //     }
+
+    //     std::cout << "Column " << i << ": [";
+    //     for (int j = 0; j < 16; ++j) {
+    //         std::cout << result[j];
+    //         if (j < 15) std::cout << ", ";
+    //     }
+    //     std::cout << "]" << std::endl;
+    // }
+
+    // const int N = 8;
+    // int k = 5; // The DOF (column) you want
+
+    // double result[N] = {0.0};
+    // for (int j = 0; j < N; ++j) {
+    //     result[j] = d2E_d2XFD[N * j + k];
+    //     // This grabs the k-th column (since your matrix is row-major)
+    //     // If you want the k-th row, swap indices
+    // }
+
+    // // Print result to compare with J_exact
+    // for (int j = 0; j < N; ++j) {
+    //     printf("J exact: %.17g\n", result[j]);
+    // }
+
+    //     double d2E_d2XFD[64] = {0.0};
+    // calc_stiffness_FD(coords, del, k1, k2, lenA, lenB, N, d2E_d2XFD);
+    // std::cout << "FD: [";
+    // for (int j = 0; j < 64; ++j) {
+    //     std::cout << ", " << d2E_d2XFD[j];
+    // }
+    // std::cout << "]" << std::endl;
+
+    //             double d2E_d2Xrevrev[64] = {0.0};
+    //     calc_stiffness_rev_rev(coords, del, k1, k2, lenA, lenB, N, d2E_d2Xrevrev);
+    //     std::cout << "Rev rev: [";
+    //     for (int j = 0; j < 64; ++j) {
+    //         std::cout << ", " << d2E_d2Xrevrev[j];
+    //     }
+    //     std::cout << "]" << std::endl;
+
+    //     std::cout << "Difference rev fwd - FD: [";
+    // for (int j = 0; j < 64; ++j) {
+    //     std::cout << ", " << (d2E_d2X[j] - d2E_d2XFD[j]);
+    // }
+    // std::cout << "]" << std::endl;
+
+    // std::cout << "Difference rev rev - FD: [";
+    // for (int j = 0; j < 64; ++j) {
+    //     std::cout << ", " << (d2E_d2Xrevrev[j] - d2E_d2XFD[j]);
+    // }
+    // std::cout << "]" << std::endl;
+
+    // double energy = compute_contact_energy(A0, A1, B0, B1, del, k1, k2, N);
+    // std::cout << "Energy: " << energy << std::endl;
+  }
+#endif  // TRIBOL_USE_ENZYME
+}
\ No newline at end of file
diff --git a/src/shared/math/ParSparseMat.cpp b/src/shared/math/ParSparseMat.cpp
index b0747c15..800d81c1 100644
--- a/src/shared/math/ParSparseMat.cpp
+++ b/src/shared/math/ParSparseMat.cpp
@@ -272,6 +272,32 @@ ParSparseMat ParSparseMat::diagonalMatrix( MPI_Comm comm, HYPRE_BigInt global_si
   return diagonalMatrix( comm, global_size, row_starts_array, diag_val, ordered_rows, skip_rows );
 }
 
+ParSparseMat ParSparseMat::diagonalMatrix( MPI_Comm comm, HYPRE_BigInt global_size, HYPRE_BigInt* row_starts,
+                                           const mfem::Vector& diag_vals )
+{
+  int num_local_rows = diag_vals.Size();
+
+  mfem::Array<int> rows( num_local_rows + 1 );
+  mfem::Array<int> cols( num_local_rows );
+  rows[0] = 0;
+
+  for ( int i = 0; i < num_local_rows; ++i ) {
+    rows[i + 1] = i + 1;
+    cols[i] = i;
+  }
+
+  rows.GetMemory().SetHostPtrOwner( false );
+  cols.GetMemory().SetHostPtrOwner( false );
+
+  mfem::Vector vals = diag_vals;
+  vals.GetMemory().SetHostPtrOwner( false );
+
+  mfem::SparseMatrix inactive_diag( rows.GetData(), cols.GetData(), vals.GetData(), num_local_rows, num_local_rows,
+                                    false, false, true );
+
+  return ParSparseMat( comm, global_size, row_starts, std::move( inactive_diag ) );
+}
+
 #endif  // #ifdef TRIBOL_USE_MPI
 
 }  // namespace shared
diff --git a/src/shared/math/ParSparseMat.hpp b/src/shared/math/ParSparseMat.hpp
index 0b8171be..54fda42f 100644
--- a/src/shared/math/ParSparseMat.hpp
+++ b/src/shared/math/ParSparseMat.hpp
@@ -334,6 +334,18 @@ class ParSparseMat : public ParSparseMatView {
                                       double diag_val, const mfem::Array<int>& ordered_rows = mfem::Array<int>(),
                                       bool skip_rows = true );
 
+  /**
+   * @brief Returns a diagonal matrix with the values from the given vector on the diagonal
+   *
+   * @param comm MPI communicator
+   * @param global_size Global size of the matrix (rows and columns)
+   * @param row_starts Row partitioning (global offsets)
+   * @param diag_vals Vector containing the values for the diagonal entries. Size must match local rows.
+   * @return ParSparseMat The constructed diagonal matrix
+   */
+  static ParSparseMat diagonalMatrix( MPI_Comm comm, HYPRE_BigInt global_size, HYPRE_BigInt* row_starts,
+                                      const mfem::Vector& diag_vals );
+
  private:
   std::unique_ptr<mfem::HypreParMatrix> owned_mat_;
 };
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index ed3caf7d..1f047f1d 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -206,6 +206,10 @@ if( TRIBOL_USE_ENZYME )
       tribol_enzyme_nodal_normal.cpp
       tribol_enzyme_mortar_assembled.cpp
       tribol_enzyme_poly_intersect.cpp
+      tribol_mfem_mortar_energy.cpp
+      tribol_new_energy_patch.cpp
+      tribol_new_energy_patch_LM.cpp
+      tribol_finite_diff_energy_mortar.cpp
       )
 
   set(combined_test_depends tribol gtest)
diff --git a/src/tests/enzyme_smoke.cpp b/src/tests/enzyme_smoke.cpp
index 4a3b1d77..24662611 100644
--- a/src/tests/enzyme_smoke.cpp
+++ b/src/tests/enzyme_smoke.cpp
@@ -31,6 +31,31 @@ void LinearQuadBasisDeriv( const double* xi, double* phi, double* dphi_dxi, doub
   __enzyme_fwddiff<void>( (void*)LinearQuadBasis, xi, xi_dot, phi, dphi_deta );
 }
 
+void LinearQuadBasisDeriv_FD( const double* xi, double* phi, double* dphi_dxi, double* dphi_deta, double h = 1e-4 )
+{
+  // compute drivatives wrt to xi[0]
+  double phi_p[4];
+  double phi_m[4];
+  double xi_plush[2] = { xi[0] + h, xi[1] };
+  double xi_minush[2] = { xi[0] - h, xi[1] };
+  LinearQuadBasis( xi_plush, phi_p );
+  LinearQuadBasis( xi_minush, phi_m );
+  for ( int i = 0; i < 4; ++i ) {
+    dphi_dxi[i] = ( phi_p[i] - phi_m[i] ) / ( 2 * h );
+  }
+
+  // compute derivatives wrt xi[1]
+  xi_plush[1] = xi[1] + h;
+  xi_plush[0] = xi[0];
+  xi_minush[1] = xi[1] - h;
+  xi_minush[0] = xi[0];
+  LinearQuadBasis( xi_plush, phi_p );
+  LinearQuadBasis( xi_minush, phi_m );
+  for ( int i = 0; i < 4; ++i ) {
+    dphi_deta[i] = ( phi_p[i] - phi_m[i] ) / ( 2 * h );
+  }
+}
+
 TEST( enzyme_smoke, basic_use )
 {
   double xi[2] = { 0.2, -0.4 };
@@ -38,14 +63,29 @@ TEST( enzyme_smoke, basic_use )
   double dphi_dxi[4] = { 0.0, 0.0, 0.0, 0.0 };
   double dphi_deta[4] = { 0.0, 0.0, 0.0, 0.0 };
 
-  LinearQuadBasisDeriv( xi, phi, dphi_dxi, dphi_deta );
+  double xi_fw[2] = { 0.2, -0.4 };
+  double phi_fw[4] = { 0.0, 0.0, 0.0, 0.0 };
+  double dphi_dxi_fw[4] = { 0.0, 0.0, 0.0, 0.0 };
+  double dphi_deta_fw[4] = { 0.0, 0.0, 0.0, 0.0 };
+
+  LinearQuadBasisDeriv_FD( xi, phi, dphi_dxi, dphi_deta );
+  LinearQuadBasisDeriv( xi_fw, phi_fw, dphi_dxi_fw, dphi_deta_fw );
+
+  EXPECT_NEAR( dphi_dxi[0], dphi_dxi_fw[0], 1e-6 );
+  EXPECT_NEAR( dphi_deta[0], dphi_deta_fw[0], 1e-6 );
+  EXPECT_NEAR( dphi_dxi[1], dphi_dxi_fw[1], 1e-6 );
+  EXPECT_NEAR( dphi_deta[1], dphi_deta_fw[1], 1e-6 );
+  EXPECT_NEAR( dphi_dxi[2], dphi_dxi_fw[2], 1e-6 );
+  EXPECT_NEAR( dphi_deta[2], dphi_deta_fw[2], 1e-6 );
+  EXPECT_NEAR( dphi_dxi[3], dphi_dxi_fw[3], 1e-6 );
+  EXPECT_NEAR( dphi_deta[3], dphi_deta_fw[3], 1e-6 );
 
-  EXPECT_EQ( dphi_dxi[0], -0.25 * ( 1.0 - xi[1] ) );
-  EXPECT_EQ( dphi_deta[0], -0.25 * ( 1.0 - xi[0] ) );
-  EXPECT_EQ( dphi_dxi[1], 0.25 * ( 1.0 - xi[1] ) );
-  EXPECT_EQ( dphi_deta[1], -0.25 * ( 1.0 + xi[0] ) );
-  EXPECT_EQ( dphi_dxi[2], 0.25 * ( 1.0 + xi[1] ) );
-  EXPECT_EQ( dphi_deta[2], 0.25 * ( 1.0 + xi[0] ) );
-  EXPECT_EQ( dphi_dxi[3], -0.25 * ( 1.0 + xi[1] ) );
-  EXPECT_EQ( dphi_deta[3], 0.25 * ( 1.0 - xi[0] ) );
+  // EXPECT_EQ( dphi_dxi[0], -0.25 * ( 1.0 - xi[1] ) );
+  // EXPECT_EQ( dphi_deta[0], -0.25 * ( 1.0 - xi[0] ) );
+  // EXPECT_EQ( dphi_dxi[1], 0.25 * ( 1.0 - xi[1] ) );
+  // EXPECT_EQ( dphi_deta[1], -0.25 * ( 1.0 + xi[0] ) );
+  // EXPECT_EQ( dphi_dxi[2], 0.25 * ( 1.0 + xi[1] ) );
+  // EXPECT_EQ( dphi_deta[2], 0.25 * ( 1.0 + xi[0] ) );
+  // EXPECT_EQ( dphi_dxi[3], -0.25 * ( 1.0 + xi[1] ) );
+  // EXPECT_EQ( dphi_deta[3], 0.25 * ( 1.0 - xi[0] ) );
 }
diff --git a/src/tests/shared_par_sparse_mat.cpp b/src/tests/shared_par_sparse_mat.cpp
index 65c7ae4b..daebafda 100644
--- a/src/tests/shared_par_sparse_mat.cpp
+++ b/src/tests/shared_par_sparse_mat.cpp
@@ -385,6 +385,37 @@ TEST_F( ParSparseMatTest, Accessors )
   EXPECT_EQ( A->Height(), local_size );
 }
 
+// Test Construction from Vector
+TEST_F( ParSparseMatTest, DiagonalFromVector )
+{
+  int rank;
+  MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+  if ( rank == 0 ) std::cout << "Testing Construction from Vector..." << std::endl;
+
+  int num_procs;
+  MPI_Comm_size( MPI_COMM_WORLD, &num_procs );
+  constexpr int size = 10;
+  int local_size = size / num_procs + ( rank < ( size % num_procs ) ? 1 : 0 );
+
+  auto row_starts = GetRowStarts( MPI_COMM_WORLD, size );
+
+  mfem::Vector diag_vals( local_size );
+  for ( int i = 0; i < local_size; ++i ) {
+    diag_vals[i] = static_cast<double>( rank * 100 + i );
+  }
+
+  shared::ParSparseMat A =
+      shared::ParSparseMat::diagonalMatrix( MPI_COMM_WORLD, size, row_starts.GetData(), diag_vals );
+
+  shared::ParVector x( A.get(), 0 );
+  x.Fill( 1.0 );
+  auto y = A * x;
+
+  for ( int i = 0; i < local_size; ++i ) {
+    EXPECT_NEAR( y[i], static_cast<double>( rank * 100 + i ), 1e-12 );
+  }
+}
+
 //------------------------------------------------------------------------------
 #include "axom/slic/core/SimpleLogger.hpp"
 
diff --git a/src/tests/tribol_finite_diff_energy_mortar.cpp b/src/tests/tribol_finite_diff_energy_mortar.cpp
new file mode 100644
index 00000000..a6a9b744
--- /dev/null
+++ b/src/tests/tribol_finite_diff_energy_mortar.cpp
@@ -0,0 +1,508 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include <cmath>
+#include <set>
+#include "tribol/physics/EnergyMortar.hpp"
+#include <gtest/gtest.h>
+
+#ifdef TRIBOL_USE_UMPIRE
+#include "umpire/ResourceManager.hpp"
+#endif
+
+#include "mfem.hpp"
+
+#include "axom/CLI11.hpp"
+#include "axom/slic.hpp"
+
+#include "shared/mesh/MeshBuilder.hpp"
+#include "redecomp/redecomp.hpp"
+
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+
+namespace tribol {
+
+static ContactSmoothing smoother( ContactParams{} );
+
+inline void endpoints( const MeshData::Viewer& mesh, int elem_id, double P0[2], double P1[2] )
+{
+  double P0_P1[4];
+  mesh.getFaceCoords( elem_id, P0_P1 );
+  P0[0] = P0_P1[0];
+  P0[1] = P0_P1[1];
+  P1[0] = P0_P1[2];
+  P1[1] = P0_P1[3];
+}
+
+std::pair<double, double> ContactEvaluator::eval_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                         const MeshData::Viewer& mesh2 ) const
+{
+  NodalContactData ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+  // double gt1 = ncd.g_tilde[0];
+  // double gt2 = ncd.g_tilde[1];
+  double A1 = ncd.g_tilde[0];
+  double A2 = ncd.g_tilde[1];
+
+  return { A1, A2 };
+}
+
+FiniteDiffResult ContactEvaluator::validate_g_tilde( const InterfacePair& pair, MeshData& mesh1, MeshData& mesh2,
+                                                     double epsilon ) const
+{
+  FiniteDiffResult result;
+
+  auto viewer1 = mesh1.getView();
+  auto viewer2 = mesh2.getView();
+
+  auto projs0 = projections( pair, viewer1, viewer2 );
+  auto bounds0 = smoother_.bounds_from_projections( projs0, smoother.get_del() );
+  auto smooth_bounds0 = smoother_.smooth_bounds( bounds0, smoother.get_del() );
+  QuadPoints qp0;
+  if ( !p_.enzyme_quadrature ) {
+    qp0 = compute_quadrature( smooth_bounds0 );
+  }
+
+  auto [g1_base, g2_base] = eval_gtilde( pair, viewer1, viewer2 );
+
+  result.g_tilde1_baseline = g1_base;
+  result.g_tilde2_baseline = g2_base;
+
+
+  auto A_conn = viewer1.getConnectivity()( static_cast<std::size_t>( pair.m_element_id1 ) );
+  auto B_conn = viewer2.getConnectivity()( static_cast<std::size_t>( pair.m_element_id2 ) );
+
+
+  result.node_ids = { static_cast<int>( A_conn[0] ), static_cast<int>( A_conn[1] ), static_cast<int>( B_conn[0] ),
+                      static_cast<int>( B_conn[1] ) };
+
+  const int num_dofs = 8;
+  result.fd_gradient_g1.resize( num_dofs );
+  result.fd_gradient_g2.resize( num_dofs );
+  result.analytical_gradient_g1.resize( num_dofs );
+  result.analytical_gradient_g2.resize( num_dofs );
+
+  // ===== ANALYTICAL GRADIENTS =====
+  double dgt1_dx[8] = { 0.0 };
+  double dgt2_dx[8] = { 0.0 };
+  grad_gtilde( pair, viewer1, viewer2, dgt1_dx, dgt2_dx );
+  for ( size_t i = 0; i < 8; ++i ) {
+    result.analytical_gradient_g1[i] = dgt1_dx[i];
+    result.analytical_gradient_g2[i] = dgt2_dx[i];
+  }
+
+
+  // ===== ORIGINAL COORDS =====
+  const IndexT num_nodes1 = mesh1.numberOfNodes();
+  const std::size_t n1 = static_cast<std::size_t>( num_nodes1 );
+
+  std::vector<RealT> x1_orig( n1 ), y1_orig( n1 );
+  {
+    auto pos = mesh1.getView().getPosition();
+    for ( IndexT i = 0; i < num_nodes1; ++i ) {
+      const std::size_t iu = static_cast<std::size_t>( i );
+      x1_orig[iu] = pos[0][i];
+      y1_orig[iu] = pos[1][i];
+    }
+  }
+
+  IndexT num_nodes2 = mesh2.numberOfNodes();
+  const std::size_t n2 = static_cast<std::size_t>( num_nodes2 );
+
+  std::vector<RealT> x2_orig( n2 ), y2_orig( n2 );
+  {
+    auto pos = mesh2.getView().getPosition();
+    for ( int i = 0; i < num_nodes2; ++i ) {
+      const std::size_t iu = static_cast<std::size_t>( i );
+      x2_orig[iu] = pos[0][i];
+      y2_orig[iu] = pos[1][i];
+    }
+  }
+
+  auto eval = [&]( const MeshData::Viewer& v1, const MeshData::Viewer& v2 ) -> std::pair<double, double> {
+    return p_.enzyme_quadrature ? eval_gtilde( pair, v1, v2 ) : eval_gtilde_fixed_qp( pair, v1, v2, qp0 );
+  };
+
+  // ===== FINITE DIFFERENCE GRADIENTS =====
+  size_t dof_idx = 0;
+
+  // A nodes → perturb mesh1
+  for ( int k = 0; k < 2; ++k ) {
+    const int local_node = A_conn[k];
+
+    // x perturbation
+    {
+      auto x_pert = x1_orig;
+      x_pert[static_cast<std::size_t>( local_node )] += epsilon;
+      mesh1.setPosition( x_pert.data(), y1_orig.data(), nullptr );
+      auto [g1p, g2p] = eval( mesh1.getView(), mesh2.getView() );
+
+      x_pert[static_cast<std::size_t>( local_node )] = x1_orig[static_cast<std::size_t>( local_node )] - epsilon;
+      mesh1.setPosition( x_pert.data(), y1_orig.data(), nullptr );
+      auto [g1m, g2m] = eval( mesh1.getView(), mesh2.getView() );
+
+      mesh1.setPosition( x1_orig.data(), y1_orig.data(), nullptr );
+      result.fd_gradient_g1[dof_idx] = ( g1p - g1m ) / ( 2.0 * epsilon );
+      result.fd_gradient_g2[dof_idx] = ( g2p - g2m ) / ( 2.0 * epsilon );
+      dof_idx++;
+    }
+
+    // y perturbation
+    {
+      auto y_pert = y1_orig;
+      y_pert[static_cast<std::size_t>( local_node )] += epsilon;
+      mesh1.setPosition( x1_orig.data(), y_pert.data(), nullptr );
+      auto [g1p, g2p] = eval( mesh1.getView(), mesh2.getView() );
+
+      y_pert[static_cast<std::size_t>( local_node )] = y1_orig[static_cast<std::size_t>( local_node )] - epsilon;
+      mesh1.setPosition( x1_orig.data(), y_pert.data(), nullptr );
+      auto [g1m, g2m] = eval( mesh1.getView(), mesh2.getView() );
+
+      mesh1.setPosition( x1_orig.data(), y1_orig.data(), nullptr );
+      result.fd_gradient_g1[dof_idx] = ( g1p - g1m ) / ( 2.0 * epsilon );
+      result.fd_gradient_g2[dof_idx] = ( g2p - g2m ) / ( 2.0 * epsilon );
+      dof_idx++;
+    }
+  }
+
+  // B nodes → perturb mesh2
+  for ( int k = 0; k < 2; ++k ) {
+    const int local_node = B_conn[k];
+
+    // x perturbation
+    {
+      auto x_pert = x2_orig;
+      x_pert[static_cast<std::size_t>( local_node )] += epsilon;
+      mesh2.setPosition( x_pert.data(), y2_orig.data(), nullptr );
+      auto [g1p, g2p] = eval( mesh1.getView(), mesh2.getView() );
+
+      x_pert[static_cast<std::size_t>( local_node )] = x2_orig[static_cast<std::size_t>( local_node )] - epsilon;
+      mesh2.setPosition( x_pert.data(), y2_orig.data(), nullptr );
+      auto [g1m, g2m] = eval( mesh1.getView(), mesh2.getView() );
+
+      mesh2.setPosition( x2_orig.data(), y2_orig.data(), nullptr );
+      result.fd_gradient_g1[dof_idx] = ( g1p - g1m ) / ( 2.0 * epsilon );
+      result.fd_gradient_g2[dof_idx] = ( g2p - g2m ) / ( 2.0 * epsilon );
+      dof_idx++;
+    }
+
+    // y perturbation
+    {
+      auto y_pert = y2_orig;
+      y_pert[static_cast<std::size_t>( local_node )] += epsilon;
+      mesh2.setPosition( x2_orig.data(), y_pert.data(), nullptr );
+      auto [g1p, g2p] = eval( mesh1.getView(), mesh2.getView() );
+
+      y_pert[static_cast<std::size_t>( local_node )] = y2_orig[static_cast<std::size_t>( local_node )] - epsilon;
+      mesh2.setPosition( x2_orig.data(), y_pert.data(), nullptr );
+      auto [g1m, g2m] = eval( mesh1.getView(), mesh2.getView() );
+
+      mesh2.setPosition( x2_orig.data(), y2_orig.data(), nullptr );
+      result.fd_gradient_g1[dof_idx] = ( g1p - g1m ) / ( 2.0 * epsilon );
+      result.fd_gradient_g2[dof_idx] = ( g2p - g2m ) / ( 2.0 * epsilon );
+      dof_idx++;
+    }
+  }
+
+  return result;
+}
+
+std::pair<double, double> ContactEvaluator::eval_gtilde_fixed_qp( const InterfacePair& pair,
+                                                                  const MeshData::Viewer& mesh1,
+                                                                  const MeshData::Viewer& mesh2,
+                                                                  const QuadPoints& qp_fixed ) const
+{
+  double A0[2], A1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+
+  const double J = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  double gt1 = 0.0, gt2 = 0.0;
+
+  for ( size_t i = 0; i < qp_fixed.qp.size(); ++i ) {
+    const double xiA = qp_fixed.qp[i];
+    const double w = qp_fixed.w[i];
+
+    const double N1 = 0.5 - xiA;
+    const double N2 = 0.5 + xiA;
+
+    const double gn = gap( pair, mesh1, mesh2, xiA );  
+
+
+    gt1 += w * N1 * J * gn;
+    gt2 += w * N2 * J * gn;
+  }
+
+  return { gt1, gt2 };
+}
+
+
+
+
+
+
+
+
+FiniteDiffResult ContactEvaluator::validate_hessian( const InterfacePair& pair,
+                                                     MeshData& mesh1,
+                                                     MeshData& mesh2,
+                                                     double epsilon ) const
+{
+  FiniteDiffResult result;
+
+  auto viewer1 = mesh1.getView();
+  auto viewer2 = mesh2.getView();
+
+  double hess1[64] = { 0.0 };
+  double hess2[64] = { 0.0 };
+
+  const int ndof = 8;
+  result.fd_gradient_g1.assign( ndof * ndof, 0.0 );
+  result.fd_gradient_g2.assign( ndof * ndof, 0.0 );
+
+  auto A_conn = viewer1.getConnectivity()( static_cast<std::size_t>( pair.m_element_id1 ) );
+  auto B_conn = viewer2.getConnectivity()( static_cast<std::size_t>( pair.m_element_id2 ) );
+
+  result.node_ids = {
+    static_cast<int>( A_conn[0] ),
+    static_cast<int>( A_conn[1] ),
+    static_cast<int>( B_conn[0] ),
+    static_cast<int>( B_conn[1] )
+  };
+
+  // analytical Hessian
+  d2_g2tilde( pair, viewer1, viewer2, hess1, hess2 );
+  result.analytical_gradient_g1.assign( hess1, hess1 + 64 );
+  result.analytical_gradient_g2.assign( hess2, hess2 + 64 );
+
+  // ===== ORIGINAL COORDS =====
+  const IndexT num_nodes1 = mesh1.numberOfNodes();
+  const std::size_t n1 = static_cast<std::size_t>( num_nodes1 );
+  std::vector<RealT> x1_orig( n1 ), y1_orig( n1 );
+  {
+    auto pos = mesh1.getView().getPosition();
+    for ( IndexT i = 0; i < num_nodes1; ++i ) {
+      const std::size_t iu = static_cast<std::size_t>( i );
+      x1_orig[iu] = pos[0][i];
+      y1_orig[iu] = pos[1][i];
+    }
+  }
+
+  const IndexT num_nodes2 = mesh2.numberOfNodes();
+  const std::size_t n2 = static_cast<std::size_t>( num_nodes2 );
+  std::vector<RealT> x2_orig( n2 ), y2_orig( n2 );
+  {
+    auto pos = mesh2.getView().getPosition();
+    for ( IndexT i = 0; i < num_nodes2; ++i ) {
+      const std::size_t iu = static_cast<std::size_t>( i );
+      x2_orig[iu] = pos[0][i];
+      y2_orig[iu] = pos[1][i];
+    }
+  }
+
+  // ===== FIXED QUADRATURE FOR enzyme_quadrature = false =====
+  QuadPoints qp0;
+  if ( !p_.enzyme_quadrature ) {
+    auto projs0 = projections( pair, viewer1, viewer2 );
+    auto bounds0 = smoother_.bounds_from_projections( projs0, smoother.get_del() );
+    auto smooth_bounds0 = smoother_.smooth_bounds( bounds0, smoother.get_del() );
+    qp0 = compute_quadrature( smooth_bounds0 );
+  }
+
+  auto eval_from_offsets = [&]( const std::array<double, 8>& du ) -> std::pair<double, double> {
+    auto x1 = x1_orig;
+    auto y1 = y1_orig;
+    auto x2 = x2_orig;
+    auto y2 = y2_orig;
+
+    x1[static_cast<std::size_t>( A_conn[0] )] += du[0];
+    y1[static_cast<std::size_t>( A_conn[0] )] += du[1];
+    x1[static_cast<std::size_t>( A_conn[1] )] += du[2];
+    y1[static_cast<std::size_t>( A_conn[1] )] += du[3];
+
+    x2[static_cast<std::size_t>( B_conn[0] )] += du[4];
+    y2[static_cast<std::size_t>( B_conn[0] )] += du[5];
+    x2[static_cast<std::size_t>( B_conn[1] )] += du[6];
+    y2[static_cast<std::size_t>( B_conn[1] )] += du[7];
+
+    mesh1.setPosition( x1.data(), y1.data(), nullptr );
+    mesh2.setPosition( x2.data(), y2.data(), nullptr );
+
+    if ( p_.enzyme_quadrature ) {
+      return eval_gtilde( pair, mesh1.getView(), mesh2.getView() );
+    } else {
+      return eval_gtilde_fixed_qp( pair, mesh1.getView(), mesh2.getView(), qp0 );
+    }
+  };
+
+  const std::array<double, 8> zero = { 0., 0., 0., 0., 0., 0., 0., 0. };
+  const auto [g10, g20] = eval_from_offsets( zero );
+
+  // ===== FD HESSIAN =====
+  for ( size_t i = 0; i < ndof; ++i ) {
+    for ( size_t j = 0; j < ndof; ++j ) {
+      const std::size_t idx = static_cast<std::size_t>( i * ndof + j );
+
+      if ( i == j ) {
+        std::array<double, 8> up = zero;
+        std::array<double, 8> um = zero;
+        up[i] += epsilon;
+        um[i] -= epsilon;
+
+        const auto [g1p, g2p] = eval_from_offsets( up );
+        const auto [g1m, g2m] = eval_from_offsets( um );
+
+        result.fd_gradient_g1[idx] = ( g1p - 2.0 * g10 + g1m ) / ( epsilon * epsilon );
+        result.fd_gradient_g2[idx] = ( g2p - 2.0 * g20 + g2m ) / ( epsilon * epsilon );
+      } else {
+        std::array<double, 8> upp = zero;
+        std::array<double, 8> upm = zero;
+        std::array<double, 8> ump = zero;
+        std::array<double, 8> umm = zero;
+
+        upp[i] += epsilon; upp[j] += epsilon;
+        upm[i] += epsilon; upm[j] -= epsilon;
+        ump[i] -= epsilon; ump[j] += epsilon;
+        umm[i] -= epsilon; umm[j] -= epsilon;
+
+        const auto [g1pp, g2pp] = eval_from_offsets( upp );
+        const auto [g1pm, g2pm] = eval_from_offsets( upm );
+        const auto [g1mp, g2mp] = eval_from_offsets( ump );
+        const auto [g1mm, g2mm] = eval_from_offsets( umm );
+
+        result.fd_gradient_g1[idx] = ( g1pp - g1pm - g1mp + g1mm ) / ( 4.0 * epsilon * epsilon );
+        result.fd_gradient_g2[idx] = ( g2pp - g2pm - g2mp + g2mm ) / ( 4.0 * epsilon * epsilon );
+      }
+    }
+  }
+
+  mesh1.setPosition( x1_orig.data(), y1_orig.data(), nullptr );
+  mesh2.setPosition( x2_orig.data(), y2_orig.data(), nullptr );
+
+  return result;
+}
+
+
+
+
+TEST( GradientCheck, GtildeFDvsAD )
+{
+  // ── Geometry: two facing LINEAR_EDGE segments ────────────────────────────
+  // Segment A: (0,0) -> (1,0)
+  // Segment B: (0.2, 0.5) -> (0.8, 0.5)
+
+  RealT x1[2] = { 0.0, 1.0 };
+  RealT y1[2] = { 0.0, 0.0 };
+  IndexT conn1[2] = { 1, 0 };  // reversed so nA points toward B
+  MeshData mesh1( 0, 1, 2, conn1, LINEAR_EDGE, x1, y1, nullptr, MemorySpace::Host );
+
+  RealT x2[2] = { 0.2, 0.8 };
+  RealT y2[2] = { 0.5, 0.5 };
+  IndexT conn2[2] = { 0, 1 };
+  MeshData mesh2( 1, 1, 2, conn2, LINEAR_EDGE, x2, y2, nullptr, MemorySpace::Host );
+
+  InterfacePair pair( 0, 0 );
+
+  // ── Evaluator setup ──────────────────────────────────────────────────────
+  ContactParams params;
+  params.del = 0.1;                 // smoothing m
+  params.k = 1.0;                   // penalty stiffness
+  params.N = 3;                     // quadrature points
+  params.enzyme_quadrature = true;  // use the non-Enzyme quadrature path
+
+  ContactEvaluator evaluator( params );
+
+  // ── Run validation ───────────────────────────────────────────────────────
+  const double epsilon = 1e-7;
+
+  RealT y1_plus[2] = { epsilon, 0.0 };
+  RealT y1_minus[2] = { -epsilon, 0.0 };
+  mesh1.setPosition( x1, y1_plus, nullptr );
+  auto [gp1, gp2] = evaluator.eval_gtilde( pair, mesh1.getView(), mesh2.getView() );
+  mesh1.setPosition( x1, y1_minus, nullptr );
+  auto [gm1, gm2] = evaluator.eval_gtilde( pair, mesh1.getView(), mesh2.getView() );
+  mesh1.setPosition( x1, y1, nullptr );
+  std::cout << "Manual FD DOF1: " << ( gp1 - gm1 ) / ( 2 * epsilon ) << ", " << ( gp2 - gm2 ) / ( 2 * epsilon ) << "\n";
+  auto result = evaluator.validate_g_tilde( pair, mesh1, mesh2, epsilon );
+
+  // ── Compare ──────────────────────────────────────────────────────────────
+  const double tol = 1e-6;
+  const std::size_t num_dofs = result.node_ids.size() * 2;
+
+  ASSERT_EQ( result.fd_gradient_g1.size(), result.analytical_gradient_g1.size() );
+  ASSERT_EQ( result.fd_gradient_g2.size(), result.analytical_gradient_g2.size() );
+
+  for ( size_t i = 0; i < num_dofs; ++i ) {
+    EXPECT_NEAR( result.fd_gradient_g1[i], result.analytical_gradient_g1[i], tol )
+        << "gtilde1 mismatch at DOF [" << i << "]"
+        << "  node=" << result.node_ids[i / 2] << "  dir=" << ( i % 2 == 0 ? "x" : "y" )
+        << "  FD=" << result.fd_gradient_g1[i] << "  AD=" << result.analytical_gradient_g1[i];
+
+    EXPECT_NEAR( result.fd_gradient_g2[i], result.analytical_gradient_g2[i], tol )
+        << "gtilde2 mismatch at DOF [" << i << "]"
+        << "  node=" << result.node_ids[i / 2] << "  dir=" << ( i % 2 == 0 ? "x" : "y" )
+        << "  FD=" << result.fd_gradient_g2[i] << "  AD=" << result.analytical_gradient_g2[i];
+  }
+}
+
+TEST( HessianCheck, GtildeFDvsAD )
+{
+  // ── Geometry: two facing LINEAR_EDGE segments ────────────────────────────
+  // Segment A: (0,0) -> (1,0)
+  // Segment B: (0.2, 0.5) -> (0.8, 0.5)
+
+  RealT x1[2] = { 0.0, 1.0 };
+  RealT y1[2] = { 0.0, 0.0 };
+
+  RealT x2[2] = { 0.2, 0.8 };
+  RealT y2[2] = { 0.5, 0.5 };
+
+  IndexT conn1[2] = { 1, 0 };
+  IndexT conn2[2] = { 0, 1 };
+
+  MeshData mesh1( 0, 1, 2, conn1, LINEAR_EDGE, x1, y1, nullptr, MemorySpace::Host );
+  MeshData mesh2( 1, 1, 2, conn2, LINEAR_EDGE, x2, y2, nullptr, MemorySpace::Host );
+
+  InterfacePair pair( 0, 0 );
+
+  // ── Evaluator setup ──────────────────────────────────────────────────────
+  ContactParams params;
+  params.del = 0.1;
+  params.k = 1.0;
+  params.N = 4;
+  params.enzyme_quadrature = true;
+
+  ContactEvaluator evaluator( params );
+
+  // ── Run validation ───────────────────────────────────────────────────────
+  const double epsilon = 1e-5;  
+  auto result = evaluator.validate_hessian( pair, mesh1, mesh2, epsilon );
+
+  // ── Compare ──────────────────────────────────────────────────────────────
+  const double tol = 1e-4; 
+  const int ndof = 8;
+
+  ASSERT_EQ( result.fd_gradient_g1.size(), 64u );
+  ASSERT_EQ( result.fd_gradient_g2.size(), 64u );
+  ASSERT_EQ( result.analytical_gradient_g1.size(), 64u );
+  ASSERT_EQ( result.analytical_gradient_g2.size(), 64u );
+
+  for ( size_t row = 0; row < ndof; ++row ) {
+    for ( size_t col = 0; col < ndof; ++col ) {
+      size_t idx = row * ndof + col;
+
+      EXPECT_NEAR( result.fd_gradient_g1[idx], result.analytical_gradient_g1[idx], tol )
+          << "Hessian g1 mismatch at [row=" << row << ", col=" << col << "]"
+          << "  FD=" << result.fd_gradient_g1[idx] << "  AD=" << result.analytical_gradient_g1[idx];
+
+      EXPECT_NEAR( result.fd_gradient_g2[idx], result.analytical_gradient_g2[idx], tol )
+          << "Hessian g2 mismatch at [row=" << row << ", col=" << col << "]"
+          << "  FD=" << result.fd_gradient_g2[idx] << "  AD=" << result.analytical_gradient_g2[idx];
+    }
+  } 
+}
+
+}  // namespace tribol
\ No newline at end of file
diff --git a/src/tests/tribol_mfem_mortar_energy.cpp b/src/tests/tribol_mfem_mortar_energy.cpp
new file mode 100644
index 00000000..d5d4817a
--- /dev/null
+++ b/src/tests/tribol_mfem_mortar_energy.cpp
@@ -0,0 +1,222 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include <set>
+
+#include <gtest/gtest.h>
+
+#ifdef TRIBOL_USE_UMPIRE
+// Umpire includes
+#include "umpire/ResourceManager.hpp"
+#endif
+
+// MFEM includes
+#include "mfem.hpp"
+
+// Axom includes
+#include "axom/CLI11.hpp"
+#include "axom/slic.hpp"
+
+// Shared includes
+#include "shared/mesh/MeshBuilder.hpp"
+
+// Redecomp includes
+#include "redecomp/redecomp.hpp"
+
+// Tribol includes
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+
+/**
+ * @brief This tests the Tribol MFEM interface running a contact patch test using ENERGY_MORTAR.
+ *
+ */
+class MfemMortarEnergyTest : public testing::TestWithParam<std::tuple<int>> {
+ protected:
+  tribol::RealT max_disp_;
+  void SetUp() override
+  {
+    // number of times to uniformly refine the serial mesh before constructing the
+    // parallel mesh
+    int ref_levels = std::get<0>( GetParam() );
+    // polynomial order of the finite element discretization
+    int order = 1;
+
+    // fixed options
+    // boundary element attributes of mortar surface (bottom of top square)
+    auto mortar_attrs = std::set<int>( { 5 } );
+    // boundary element attributes of nonmortar surface (top of bottom square)
+    auto nonmortar_attrs = std::set<int>( { 3 } );
+    // boundary element attributes of x-fixed surfaces (left side)
+    auto xfixed_attrs = std::set<int>( { 4 } );
+    // boundary element attributes of y-fixed surfaces (bottom of bottom square, top of top square)
+    auto yfixed_attrs = std::set<int>( { 1 } );
+
+    // build mesh of 2 squares
+    int nel_per_dir = std::pow( 2, ref_levels );
+
+    // clang-format off
+    mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir) // Bottom mesh [0,1]x[0,1]
+        .updateBdrAttrib(1, 1) // Bottom (Fixed Y)
+        .updateBdrAttrib(2, 2) // Right
+        .updateBdrAttrib(3, 3) // Top (NonMortar)
+        .updateBdrAttrib(4, 4), // Left (Fixed X)
+      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir) // Top mesh [0,1]x[0,1]
+        .translate({0.0, 0.99}) // Shift up to [0,1]x[0.99, 1.99]. Overlap 0.01.
+        .updateBdrAttrib(1, 5) // Bottom (Mortar)
+        .updateBdrAttrib(2, 2) // Right
+        .updateBdrAttrib(3, 1) // Top (Fixed Y)
+        .updateBdrAttrib(4, 4) // Left (Fixed X)
+    }));
+    // clang-format on
+
+    // grid function for higher-order nodes
+    auto fe_coll = mfem::H1_FECollection( order, mesh.SpaceDimension() );
+    auto par_fe_space = mfem::ParFiniteElementSpace( &mesh, &fe_coll, mesh.SpaceDimension() );
+    auto coords = mfem::ParGridFunction( &par_fe_space );
+    if ( order > 1 ) {
+      mesh.SetNodalGridFunction( &coords, false );
+    } else {
+      mesh.GetNodes( coords );
+    }
+
+    // grid function for displacement
+    mfem::ParGridFunction displacement{ &par_fe_space };
+    displacement = 0.0;
+
+    // recover dirichlet bc tdof list
+    mfem::Array<int> ess_tdof_list;
+    {
+      mfem::Array<int> ess_vdof_marker;
+      mfem::Array<int> ess_bdr( mesh.bdr_attributes.Max() );
+      ess_bdr = 0;
+      for ( auto xfixed_attr : xfixed_attrs ) {
+        if ( xfixed_attr <= ess_bdr.Size() ) ess_bdr[xfixed_attr - 1] = 1;
+      }
+      par_fe_space.GetEssentialVDofs( ess_bdr, ess_vdof_marker, 0 );
+      mfem::Array<int> new_ess_vdof_marker;
+      ess_bdr = 0;
+      for ( auto yfixed_attr : yfixed_attrs ) {
+        if ( yfixed_attr <= ess_bdr.Size() ) ess_bdr[yfixed_attr - 1] = 1;
+      }
+      par_fe_space.GetEssentialVDofs( ess_bdr, new_ess_vdof_marker, 1 );
+      for ( int i{ 0 }; i < ess_vdof_marker.Size(); ++i ) {
+        ess_vdof_marker[i] = ess_vdof_marker[i] || new_ess_vdof_marker[i];
+      }
+      mfem::Array<int> ess_tdof_marker;
+      par_fe_space.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
+      mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
+    }
+
+    // set up mfem elasticity bilinear form
+    mfem::ParBilinearForm a( &par_fe_space );
+    mfem::ConstantCoefficient lambda( 50.0 );
+    mfem::ConstantCoefficient mu( 50.0 );
+    a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda, mu ) );
+    a.Assemble();
+
+    // compute elasticity contribution to stiffness
+    auto A = std::make_unique<mfem::HypreParMatrix>();
+    a.FormSystemMatrix( ess_tdof_list, *A );
+
+    // set up tribol
+    coords.ReadWrite();
+    int coupling_scheme_id = 0;
+    int mesh1_id = 0;
+    int mesh2_id = 1;
+    tribol::registerMfemCouplingScheme( coupling_scheme_id, mesh1_id, mesh2_id, mesh, coords, mortar_attrs,
+                                        nonmortar_attrs, tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING,
+                                        tribol::ENERGY_MORTAR, tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER,
+                                        tribol::BINNING_GRID );
+    tribol::setLagrangeMultiplierOptions( coupling_scheme_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+
+    // Set Penalty options
+    tribol::setMfemKinematicConstantPenalty( coupling_scheme_id, 1000.0, 1000.0 );
+
+    coords.ReadWrite();
+    // update tribol (compute contact contribution to force and stiffness)
+    tribol::updateMfemParallelDecomposition();
+    tribol::RealT dt{ 1.0 };  // time is arbitrary here (no timesteps)
+    tribol::update( 1, 1.0, dt );
+
+    // retrieve contact stiffness matrix
+    auto A_cont = tribol::getMfemDfDx( coupling_scheme_id );
+
+    // retrieve contact force (response)
+    auto f_contact = tribol::getMfemTDofForce( coupling_scheme_id );
+    f_contact.Neg();
+    for ( int i{ 0 }; i < ess_tdof_list.Size(); ++i ) {
+      f_contact( ess_tdof_list[i] ) = 0.0;
+    }
+
+    // Add contact stiffness to elasticity stiffness
+    auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A, 1.0, *A_cont ) );
+    auto A_elim = std::unique_ptr<mfem::HypreParMatrix>( A_total->EliminateRowsCols( ess_tdof_list ) );
+
+    // Solve for X (displacement)
+    mfem::Vector X( par_fe_space.GetTrueVSize() );
+    X = 0.0;
+
+    mfem::MINRESSolver solver( MPI_COMM_WORLD );
+    solver.SetRelTol( 1.0e-8 );
+    solver.SetAbsTol( 1.0e-12 );
+    solver.SetMaxIter( 5000 );
+    solver.SetPrintLevel( 3 );
+    solver.SetOperator( *A_total );
+    solver.Mult( f_contact, X );
+
+    // move displacements to grid function
+    {
+      auto& P = *par_fe_space.GetProlongationMatrix();
+      P.Mult( X, displacement );
+    }
+
+    // We can check max displacement magnitude.
+    auto local_max = displacement.Max();
+    max_disp_ = 0.0;
+    MPI_Allreduce( &local_max, &max_disp_, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
+  }
+};
+
+TEST_P( MfemMortarEnergyTest, check_mortar_displacement )
+{
+  // Penalty enforcement with nonlinear contact enforcement. Let's just check that max_disp_ is positive and roughly
+  // correct order of magnitude.
+  EXPECT_GT( max_disp_, 0.0 );
+  EXPECT_LT( max_disp_, 0.01 );
+
+  MPI_Barrier( MPI_COMM_WORLD );
+}
+
+INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyTest, testing::Values( std::make_tuple( 2 ) ) );
+
+//------------------------------------------------------------------------------
+#include "axom/slic/core/SimpleLogger.hpp"
+
+int main( int argc, char* argv[] )
+{
+  int result = 0;
+
+  MPI_Init( &argc, &argv );
+
+  ::testing::InitGoogleTest( &argc, argv );
+
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();  // initialize umpire's ResouceManager
+#endif
+
+  axom::slic::SimpleLogger logger;  // create & initialize test logger, finalized when
+                                    // exiting main scope
+
+  result = RUN_ALL_TESTS();
+
+  tribol::finalize();
+  MPI_Finalize();
+
+  return result;
+}
diff --git a/src/tests/tribol_new_energy_patch.cpp b/src/tests/tribol_new_energy_patch.cpp
new file mode 100644
index 00000000..163e0dcb
--- /dev/null
+++ b/src/tests/tribol_new_energy_patch.cpp
@@ -0,0 +1,366 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include <cmath>
+#include <set>
+
+#include <gtest/gtest.h>
+
+#ifdef TRIBOL_USE_UMPIRE
+#include "umpire/ResourceManager.hpp"
+#endif
+ 
+#include "mfem.hpp"
+
+#include "axom/CLI11.hpp"
+#include "axom/slic.hpp"
+
+#include "shared/mesh/MeshBuilder.hpp"
+#include "redecomp/redecomp.hpp"
+
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+
+/**
+ * @brief Contact patch test using ENERGY_MORTAR with zero initial gap
+ *        and prescribed displacement applied incrementally over timesteps.
+ *
+ * Two unit squares [0,1]x[0,1] and [0,1]x[1,2] with zero gap.
+ * Linear elasticity with lambda = mu = 50.
+ *
+ * Analytical solution (plane strain, uniaxial stress with sigma_xx = 0):
+ *   eps_yy = applied_disp / total_height
+ *   eps_xx = -lambda / (lambda + 2*mu) * eps_yy
+ *   u_y(x,y) = eps_yy * y
+ *   u_x(x,y) = eps_xx * x
+ */
+class MfemMortarEnergyPatchTest : public testing::TestWithParam<std::tuple<int>> {
+ protected:
+  tribol::RealT max_disp_;
+  double l2_err_vec_;
+  double l2_err_x_;
+  double l2_err_y_;
+
+  // --- User-configurable parameters ---
+  static constexpr int num_timesteps_ = 10;
+  static constexpr double total_prescribed_disp_ = -0.01;
+  static constexpr double lam_ = 50.0;
+  static constexpr double mu_ = 50.0;
+  // ------------------------------------
+
+  void SetUp() override
+  {
+    int ref_levels = std::get<0>( GetParam() );
+    int order = 1;
+
+    auto mortar_attrs = std::set<int>( { 5 } );
+    auto nonmortar_attrs = std::set<int>( { 3 } );
+    auto xfixed_attrs = std::set<int>( { 4 } );
+    auto yfixed_bottom_attrs = std::set<int>( { 1 } );
+    auto prescribed_attrs = std::set<int>( { 6 } );
+
+    int nel_per_dir = std::pow( 2, ref_levels );
+
+    // clang-format off
+    mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir)
+        .updateBdrAttrib(1, 1)   // bottom (Fixed Y)
+        .updateBdrAttrib(2, 2)   // right 
+        .updateBdrAttrib(3, 3)   // top  (NonMortar)
+        .updateBdrAttrib(4, 4),  // left (X-fixed)
+      shared::MeshBuilder::SquareMesh(nel_per_dir, nel_per_dir)
+        .translate({0.0, 1.0})
+        .updateBdrAttrib(1, 5)   // bottom (Mortar)
+        .updateBdrAttrib(2, 2)   // right 
+        .updateBdrAttrib(3, 6)   // top  (prescribed displacement)
+        .updateBdrAttrib(4, 4)   // left  (Fixed x)
+    }));
+    // clang-format on
+
+    // FE space and grid functions
+    auto fe_coll = mfem::H1_FECollection( order, mesh.SpaceDimension() );
+    auto par_fe_space = mfem::ParFiniteElementSpace( &mesh, &fe_coll, mesh.SpaceDimension() );
+    auto coords = mfem::ParGridFunction( &par_fe_space );
+    if ( order > 1 ) {
+      mesh.SetNodalGridFunction( &coords, false );
+    } else {
+      mesh.GetNodes( coords );
+    }
+
+    // Grid fucntion for displacement
+    mfem::ParGridFunction displacement( &par_fe_space );
+    displacement = 0.0;
+
+    mfem::ParGridFunction ref_coords( &par_fe_space );
+    mesh.GetNodes( ref_coords );
+
+    // recover dirchlet bd tdof list
+    mfem::Array<int> ess_vdof_marker( par_fe_space.GetVSize() );
+    ess_vdof_marker = 0;
+
+    // x-fixed on left
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : xfixed_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 0 );
+      for ( int i = 0; i < tmp.Size(); ++i ) ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    // y-fixed on bottom
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : yfixed_bottom_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 1 );
+      for ( int i = 0; i < tmp.Size(); ++i ) ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    // y-prescribed on top
+    mfem::Array<int> prescribed_vdof_marker( par_fe_space.GetVSize() );
+    prescribed_vdof_marker = 0;
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : prescribed_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 1 );
+      prescribed_vdof_marker = tmp;
+      for ( int i = 0; i < tmp.Size(); ++i ) ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    mfem::Array<int> ess_tdof_list;
+    {
+      mfem::Array<int> ess_tdof_marker;
+      par_fe_space.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
+      mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
+    }
+
+    mfem::Array<int> prescribed_tdof_list;
+    {
+      mfem::Array<int> marker;
+      par_fe_space.GetRestrictionMatrix()->BooleanMult( prescribed_vdof_marker, marker );
+      mfem::FiniteElementSpace::MarkerToList( marker, prescribed_tdof_list );
+    }
+
+    // set up mfem elasticity bilinear form
+    mfem::ParBilinearForm a( &par_fe_space );
+    mfem::ConstantCoefficient lambda_coeff( lam_ );
+    mfem::ConstantCoefficient mu_coeff( mu_ );
+    a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda_coeff, mu_coeff ) );
+    a.Assemble();
+    a.Finalize();
+    auto A_elastic_raw = std::unique_ptr<mfem::HypreParMatrix>( a.ParallelAssemble() );
+
+    // Visit Output
+    mfem::VisItDataCollection visit_dc( "energy_patch_test", &mesh );
+    visit_dc.SetPrecision( 8 );
+    visit_dc.RegisterField( "displacement", &displacement );
+    visit_dc.SetCycle( 0 );
+    visit_dc.SetTime( 0.0 );
+    visit_dc.Save();
+
+    // timestepping loop for displacement
+    double disp_increment = total_prescribed_disp_ / num_timesteps_;
+    tribol::RealT dt = 1.0 / num_timesteps_;
+    int cs_id = 0, mesh1_id = 0, mesh2_id = 1;
+
+    mfem::Vector X( par_fe_space.GetTrueVSize() );
+    X = 0.0;
+
+    for ( int step = 1; step <= num_timesteps_; ++step ) {
+      double current_prescribed_disp = disp_increment * step;
+
+      // Prescribed displacement vector
+      mfem::Vector X_prescribed( par_fe_space.GetTrueVSize() );
+      X_prescribed = 0.0;
+      for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+        X_prescribed( prescribed_tdof_list[i] ) = current_prescribed_disp;
+      }
+
+      // Update coordinates for contact detection
+      {
+        mfem::Vector X_temp( X );
+        for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+          X_temp( prescribed_tdof_list[i] ) = current_prescribed_disp;
+        }
+        auto& P = *par_fe_space.GetProlongationMatrix();
+        P.Mult( X_temp, displacement );
+      }
+      coords = ref_coords;
+      coords += displacement;
+
+      // Re-register tribol each step (internal arrays need fresh allocation
+      // when contact pairs change between steps)
+      coords.ReadWrite();
+      tribol::registerMfemCouplingScheme( cs_id, mesh1_id, mesh2_id, mesh, coords, mortar_attrs, nonmortar_attrs,
+                                          tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING, tribol::ENERGY_MORTAR,
+                                          tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER, tribol::BINNING_GRID );
+      tribol::setLagrangeMultiplierOptions( cs_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+      tribol::setMfemKinematicConstantPenalty( cs_id, 10000.0, 10000.0 );
+
+      tribol::updateMfemParallelDecomposition();
+      tribol::update( step, step * dt, dt );
+
+      auto A_cont = tribol::getMfemDfDx( cs_id );
+
+      mfem::Vector f_contact( par_fe_space.GetTrueVSize() );
+      f_contact = 0.0;
+      tribol::getMfemResponse( cs_id, f_contact );
+      f_contact.Neg();
+
+      // Inhomogeneous Dirichlet: rhs = f_contact - K * u_prescribed
+      auto A_total = std::unique_ptr<mfem::HypreParMatrix>( mfem::Add( 1.0, *A_elastic_raw, 1.0, *A_cont ) );
+
+      mfem::Vector rhs( par_fe_space.GetTrueVSize() );
+      A_total->Mult( X_prescribed, rhs );
+      rhs.Neg();
+      rhs += f_contact;
+
+      for ( int i = 0; i < ess_tdof_list.Size(); ++i ) {
+        rhs( ess_tdof_list[i] ) = 0.0;
+      }
+
+      A_total->EliminateRowsCols( ess_tdof_list );
+
+      mfem::Vector X_free( par_fe_space.GetTrueVSize() );
+      X_free = 0.0;
+
+      mfem::HypreBoomerAMG amg( *A_total );
+      amg.SetElasticityOptions( &par_fe_space );
+      amg.SetPrintLevel( 0 );
+
+      mfem::MINRESSolver solver( MPI_COMM_WORLD );
+      solver.SetRelTol( 1.0e-8 );
+      solver.SetAbsTol( 1.0e-12 );
+      solver.SetMaxIter( 5000 );
+      solver.SetPrintLevel( step == num_timesteps_ ? 3 : 1 );
+      solver.SetPreconditioner( amg );
+      solver.SetOperator( *A_total );
+      solver.Mult( rhs, X_free );
+
+      X = X_free;
+      X += X_prescribed;
+
+      SLIC_INFO( "Timestep " << step << "/" << num_timesteps_ << " | prescribed disp = " << current_prescribed_disp );
+
+      // Save VisIt output
+      {
+        auto& P = *par_fe_space.GetProlongationMatrix();
+        P.Mult( X, displacement );
+      }
+      visit_dc.SetCycle( step );
+      visit_dc.SetTime( step * dt );
+      visit_dc.Save();
+    }
+
+    // Get final disaplacent
+    {
+      auto& P = *par_fe_space.GetProlongationMatrix();
+      P.Mult( X, displacement );
+    }
+
+    auto local_max = displacement.Max();
+    max_disp_ = 0.0;
+    MPI_Allreduce( &local_max, &max_disp_, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
+    SLIC_INFO( "Max displacement: " << max_disp_ );
+
+    // -----------------------------------------------------------------
+    // Analytical solution comparison
+    //
+    // Plane strain, uniaxial stress (sigma_xx = 0, free right side):
+    //   eps_yy = applied_disp / total_height = -0.01 / 2.0 = -0.005
+    //   eps_xx = -lambda/(lambda + 2*mu) * eps_yy
+    //   u_y = eps_yy * y
+    //   u_x = eps_xx * x
+    // -----------------------------------------------------------------
+    double total_height = 2.0;
+    double eps_yy = total_prescribed_disp_ / total_height;
+    double eps_xx = -lam_ / ( lam_ + 2.0 * mu_ ) * eps_yy;
+
+    SLIC_INFO( "Analytical: eps_yy = " << eps_yy << ", eps_xx = " << eps_xx );
+
+    mfem::VectorFunctionCoefficient exact_sol_coeff( 2, [eps_xx, eps_yy]( const mfem::Vector& x, mfem::Vector& u ) {
+      u[0] = eps_xx * x[0];
+      u[1] = eps_yy * x[1];
+    } );
+
+    mfem::ParGridFunction exact_disp( &par_fe_space );
+    exact_disp.ProjectCoefficient( exact_sol_coeff );
+
+    // Vector error
+    mfem::ParGridFunction error_vec( exact_disp );
+    error_vec -= displacement;
+    l2_err_vec_ = mfem::ParNormlp( error_vec, 2, MPI_COMM_WORLD );
+
+    // Component-wise errors
+    const mfem::FiniteElementCollection* fec = par_fe_space.FEColl();
+    mfem::ParFiniteElementSpace scalar_fes( &mesh, fec, 1, par_fe_space.GetOrdering() );
+    const int n = scalar_fes.GetNDofs();
+
+    mfem::ParGridFunction ux_exact( &scalar_fes ), ux_num( &scalar_fes );
+    mfem::ParGridFunction uy_exact( &scalar_fes ), uy_num( &scalar_fes );
+
+    for ( int i = 0; i < n; ++i ) {
+      ux_exact( i ) = exact_disp( i );
+      ux_num( i ) = displacement( i );
+      uy_exact( i ) = exact_disp( n + i );
+      uy_num( i ) = displacement( n + i );
+    }
+
+    mfem::ParGridFunction ux_err( ux_exact );
+    ux_err -= ux_num;
+    l2_err_x_ = mfem::ParNormlp( ux_err, 2, MPI_COMM_WORLD );
+
+    mfem::ParGridFunction uy_err( uy_exact );
+    uy_err -= uy_num;
+    l2_err_y_ = mfem::ParNormlp( uy_err, 2, MPI_COMM_WORLD );
+
+    SLIC_INFO( "L2 error (vector): " << l2_err_vec_ );
+    SLIC_INFO( "L2 error (x):      " << l2_err_x_ );
+    SLIC_INFO( "L2 error (y):      " << l2_err_y_ );
+    SLIC_INFO( "Consistency check |err_vec^2 - (err_x^2 + err_y^2)| = "
+               << std::abs( l2_err_vec_ * l2_err_vec_ - ( l2_err_x_ * l2_err_x_ + l2_err_y_ * l2_err_y_ ) ) );
+  }
+};
+
+TEST_P( MfemMortarEnergyPatchTest, check_patch_test )
+{
+  EXPECT_GT( max_disp_, 0.0 );
+  EXPECT_NEAR( 0.0, l2_err_vec_, 1.0e-2 );
+  EXPECT_NEAR( 0.0, l2_err_x_, 1.0e-2 );
+  EXPECT_NEAR( 0.0, l2_err_y_, 1.0e-2 );
+
+  MPI_Barrier( MPI_COMM_WORLD );
+}
+
+INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyPatchTest, testing::Values( std::make_tuple( 2 ) ) );
+
+//------------------------------------------------------------------------------
+#include "axom/slic/core/SimpleLogger.hpp"
+
+int main( int argc, char* argv[] )
+{
+  int result = 0;
+
+  MPI_Init( &argc, &argv );
+  ::testing::InitGoogleTest( &argc, argv );
+
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();
+#endif
+
+  axom::slic::SimpleLogger logger;
+  result = RUN_ALL_TESTS();
+
+  tribol::finalize();
+  MPI_Finalize();
+
+  return result;
+}
diff --git a/src/tests/tribol_new_energy_patch_LM.cpp b/src/tests/tribol_new_energy_patch_LM.cpp
new file mode 100644
index 00000000..b9a7597d
--- /dev/null
+++ b/src/tests/tribol_new_energy_patch_LM.cpp
@@ -0,0 +1,471 @@
+
+
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+
+// SPDX-License-Identifier: (MIT)
+
+#include <cmath>
+#include <set>
+
+#include <gtest/gtest.h>
+
+#ifdef TRIBOL_USE_UMPIRE
+#include "umpire/ResourceManager.hpp"
+#endif
+
+#include "mfem.hpp"
+
+#include "axom/CLI11.hpp"
+#include "axom/slic.hpp"
+
+#include "shared/mesh/MeshBuilder.hpp"
+#include "redecomp/redecomp.hpp"
+
+#include "tribol/config.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/interface/tribol.hpp"
+#include "tribol/interface/mfem_tribol.hpp"
+
+/**
+ * @brief Contact patch test using ENERGY_MORTAR with Lagrange multiplier
+ *        enforcement and prescribed displacement applied incrementally.
+ *
+ * Two unit squares [0,1]x[0,1] and [0,1]x[1,2] with zero initial gap.
+ * Linear elasticity with lambda = mu = 5.
+ *
+ *
+ * Analytical solution (plane strain, uniaxial stress with sigma_xx = 0):
+ *   eps_yy = applied_disp / total_height
+ *   eps_xx = -lambda / (lambda + 2*mu) * eps_yy
+ *   u_y(x,y) = eps_yy * y
+ *   u_x(x,y) = eps_xx * x
+ */
+class MfemMortarEnergyLagrangePatchTest : public testing::TestWithParam<std::tuple<int>> {
+ protected:
+  tribol::RealT max_disp_;
+  double l2_err_vec_;
+  double l2_err_x_;
+  double l2_err_y_;
+
+  // --- User-configurable parameters ---
+  static constexpr int num_timesteps_ = 1;
+  static constexpr double total_prescribed_disp_ = -0.01;
+  static constexpr double lam_ = 50.0;
+  static constexpr double mu_ = 50.0;
+  static constexpr int max_newton_iter_ = 10;
+  static constexpr double newton_rtol_ = 1.0e-10;
+  static constexpr double newton_atol_ = 1.0e-12;
+  // ------------------------------------
+
+  void SetUp() override
+  {
+    int ref_levels = std::get<0>( GetParam() );
+    int order = 1;
+
+    auto mortar_attrs = std::set<int>( { 5 } );
+    auto nonmortar_attrs = std::set<int>( { 3 } );
+    auto xfixed_attrs = std::set<int>( { 4 } );
+    auto yfixed_bottom_attrs = std::set<int>( { 1 } );
+    auto prescribed_attrs = std::set<int>( { 6 } );
+
+    int nel_per_dir_top = 10;
+    int nel_per_dir_bottom = 10;
+
+    // clang-format off
+    mfem::ParMesh mesh = shared::ParMeshBuilder(MPI_COMM_WORLD, shared::MeshBuilder::Unify({
+      shared::MeshBuilder::SquareMesh(nel_per_dir_top, nel_per_dir_top)
+        .updateBdrAttrib(1, 1)   // bottom (Fixed Y)
+        .updateBdrAttrib(2, 2)   // right
+        .updateBdrAttrib(3, 3)   // top  (NonMortar)
+        .updateBdrAttrib(4, 4),  // left (X-fixed)
+      shared::MeshBuilder::SquareMesh(nel_per_dir_bottom, nel_per_dir_bottom)
+        .translate({0.0, 1.0})
+        .updateBdrAttrib(1, 5)   // bottom (Mortar)
+        .updateBdrAttrib(2, 2)   // right
+        .updateBdrAttrib(3, 6)   // top  (prescribed displacement)
+        .updateBdrAttrib(4, 4)   // left  (Fixed x)
+    }));
+
+    // FE space and grid functions
+    auto fe_coll = mfem::H1_FECollection( order, mesh.SpaceDimension() );
+    auto par_fe_space = mfem::ParFiniteElementSpace( &mesh, &fe_coll, mesh.SpaceDimension() );
+    auto coords = mfem::ParGridFunction( &par_fe_space );
+    if ( order > 1 ) {
+      mesh.SetNodalGridFunction( &coords, false );
+    } else {
+      mesh.GetNodes( coords );
+    }
+
+    // Grid function for displacement
+    mfem::ParGridFunction displacement( &par_fe_space );
+    displacement = 0.0;
+
+    mfem::ParGridFunction ref_coords( &par_fe_space );
+    mesh.GetNodes( ref_coords );
+
+    // ---- Essential boundary conditions ----
+
+    mfem::Array<int> ess_vdof_marker( par_fe_space.GetVSize() );
+    ess_vdof_marker = 0;
+
+    // x-fixed on left
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : xfixed_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 0 );
+      for ( int i = 0; i < tmp.Size(); ++i ) ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    // y-fixed on bottom
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : yfixed_bottom_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 1 );
+      for ( int i = 0; i < tmp.Size(); ++i ) ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    // y-prescribed on top
+    mfem::Array<int> prescribed_vdof_marker( par_fe_space.GetVSize() );
+    prescribed_vdof_marker = 0;
+    {
+      mfem::Array<int> tmp;
+      mfem::Array<int> bdr( mesh.bdr_attributes.Max() );
+      bdr = 0;
+      for ( auto a : prescribed_attrs ) bdr[a - 1] = 1;
+      par_fe_space.GetEssentialVDofs( bdr, tmp, 1 );
+      prescribed_vdof_marker = tmp;
+      for ( int i = 0; i < tmp.Size(); ++i ) ess_vdof_marker[i] = ess_vdof_marker[i] || tmp[i];
+    }
+
+    mfem::Array<int> ess_tdof_list;
+    {
+      mfem::Array<int> ess_tdof_marker;
+      par_fe_space.GetRestrictionMatrix()->BooleanMult( ess_vdof_marker, ess_tdof_marker );
+      mfem::FiniteElementSpace::MarkerToList( ess_tdof_marker, ess_tdof_list );
+    }
+
+    mfem::Array<int> prescribed_tdof_list;
+    {
+      mfem::Array<int> marker;
+      par_fe_space.GetRestrictionMatrix()->BooleanMult( prescribed_vdof_marker, marker );
+      mfem::FiniteElementSpace::MarkerToList( marker, prescribed_tdof_list );
+    }
+
+    // ---- Elastic stiffness matrix ----
+
+    mfem::ParBilinearForm a( &par_fe_space );
+    mfem::ConstantCoefficient lambda_coeff( lam_ );
+    mfem::ConstantCoefficient mu_coeff( mu_ );
+    a.AddDomainIntegrator( new mfem::ElasticityIntegrator( lambda_coeff, mu_coeff ) );
+    a.Assemble();
+    a.Finalize();
+    auto K_elastic = std::unique_ptr<mfem::HypreParMatrix>( a.ParallelAssemble() );
+
+    // ---- VisIt output ----
+
+    mfem::VisItDataCollection visit_dc( "energy_lagrange_patch_test", &mesh );
+    visit_dc.SetPrecision( 8 );
+    visit_dc.RegisterField( "displacement", &displacement );
+
+    mfem::ParGridFunction exact_disp( &par_fe_space );
+    exact_disp = 0.0;
+    visit_dc.RegisterField( "Exact Replacement", &exact_disp );
+    visit_dc.SetCycle( 0 );
+    visit_dc.SetTime( 0.0 );
+    visit_dc.Save();
+
+    // ---- Time-stepping loop ----
+
+    double disp_increment = total_prescribed_disp_ / num_timesteps_;
+    tribol::RealT dt = 1.0 / num_timesteps_;
+    int cs_id = 0, mesh1_id = 0, mesh2_id = 1;
+
+    const int disp_size = par_fe_space.GetTrueVSize();
+
+    mfem::Vector U( disp_size );  // total displacement true-dof vector
+    U = 0.0;
+
+    // Lambda persists across timesteps (warm start)
+    // NOTE: sized after first tribol registration when contact FE space is known
+    mfem::HypreParVector* lambda = nullptr;
+    int contact_size = 0;
+
+    for ( int step = 1; step <= num_timesteps_; ++step ) {
+      double current_prescribed_disp = disp_increment * step;
+
+      // Build prescribed displacement vector
+      mfem::Vector U_prescribed( disp_size );
+      U_prescribed = 0.0;
+      for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+        U_prescribed( prescribed_tdof_list[i] ) = current_prescribed_disp;
+      }
+
+      // Set initial guess for this step: use previous converged displacement
+      // with updated prescribed DOFs
+      for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+        U( prescribed_tdof_list[i] ) = current_prescribed_disp;
+      }
+
+      // ---- Newton iteration ----
+      for ( int newton = 0; newton < max_newton_iter_; ++newton ) {
+        // Update coordinates with current displacement
+        {
+          auto& P = *par_fe_space.GetProlongationMatrix();
+          P.Mult( U, displacement );
+        }
+        coords = ref_coords;
+        coords += displacement;
+
+        // Register tribol and update contact data
+        coords.ReadWrite();
+        tribol::registerMfemCouplingScheme( cs_id, mesh1_id, mesh2_id, mesh, coords, mortar_attrs, nonmortar_attrs,
+                                            tribol::SURFACE_TO_SURFACE, tribol::NO_SLIDING, tribol::ENERGY_MORTAR,
+                                            tribol::FRICTIONLESS, tribol::LAGRANGE_MULTIPLIER, tribol::BINNING_GRID );
+        tribol::setLagrangeMultiplierOptions( cs_id, tribol::ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN );
+
+        tribol::updateMfemParallelDecomposition();
+        tribol::update( step, step * dt, dt );
+
+        // ---- Get contact surface FE space and initialize lambda on first pass ----
+        auto& contact_fes = tribol::getMfemContactFESpace( cs_id );
+        contact_size = contact_fes.GetTrueVSize();
+
+        if ( lambda == nullptr ) {
+          lambda = new mfem::HypreParVector( &contact_fes );
+          *lambda = 0.0;
+        }
+
+        // ---- Evaluate contact residual ----
+        mfem::HypreParVector r_contact_force( &par_fe_space );  // G^T * lambda (disp-sized)
+        r_contact_force = 0.0;
+        mfem::HypreParVector r_gap( &contact_fes );  // g_tilde (contact-sized)
+        r_gap = 0.0;
+
+        tribol::evaluateContactResidual( cs_id, *lambda, r_contact_force, r_gap );
+
+        // ---- Evaluate contact Jacobian blocks ----
+        std::unique_ptr<mfem::HypreParMatrix> H;  // lambda * d2g/du2 (disp x disp)
+        std::unique_ptr<mfem::HypreParMatrix> G;  // dg/du (contact x disp)
+
+        tribol::evaluateContactJacobian( cs_id, *lambda, H, G );
+
+        mfem::Vector R_u( disp_size );
+        K_elastic->Mult( U, R_u );  // R_u = K * U
+        R_u += r_contact_force;     // R_u += G^T * lambda
+
+        mfem::Vector R_lambda( contact_size );
+        R_lambda = r_gap;  // R_lambda = g_tilde
+
+        // Compute residual norms for convergence check
+        double norm_R_u = mfem::InnerProduct( MPI_COMM_WORLD, R_u, R_u );
+        double norm_R_lambda = mfem::InnerProduct( MPI_COMM_WORLD, R_lambda, R_lambda );
+        // Zero out essential DOF contributions before computing norm
+        for ( int i = 0; i < ess_tdof_list.Size(); ++i ) {
+          norm_R_u -= R_u( ess_tdof_list[i] ) * R_u( ess_tdof_list[i] );
+        }
+        double residual_norm = std::sqrt( std::abs( norm_R_u ) + norm_R_lambda );
+
+        SLIC_INFO( "  Step " << step << " Newton " << newton << " | residual = " << residual_norm );
+
+        if ( newton > 0 && residual_norm < newton_atol_ ) {
+          SLIC_INFO( "  Newton converged (abs tol) at iteration " << newton );
+          break;
+        }
+
+        // ---- Assemble block Jacobian ----
+        // (0,0) block: K + H
+        // NOTE: H may be null on the first Newton iteration when lambda = 0
+        std::unique_ptr<mfem::HypreParMatrix> J_uu;
+        if ( H && H->NumRows() > 0 ) {
+          J_uu.reset( mfem::Add( 1.0, *K_elastic, 1.0, *H ) );
+        } else {
+          J_uu.reset( new mfem::HypreParMatrix( *K_elastic ) );
+        }
+
+        // G^T for the (0,1) block
+        auto G_T = std::unique_ptr<mfem::HypreParMatrix>( G->Transpose() );
+
+        // ---- Apply essential BCs ----
+        // Zero out essential DOF rows/cols in J_uu
+        for ( int i = 0; i < ess_tdof_list.Size(); ++i ) {
+          R_u( ess_tdof_list[i] ) = 0.0;
+        }
+        J_uu->EliminateRowsCols( ess_tdof_list );
+
+        // Zero out essential DOF rows in G^T (cols in G)
+        // Use EliminateRows on G^T which is simpler than EliminateCols on G
+        G_T->EliminateRows( ess_tdof_list );
+
+        // Rebuild G from the modified G^T to stay consistent
+        G = std::unique_ptr<mfem::HypreParMatrix>( G_T->Transpose() );
+
+        // ---- Set up block system ----
+
+        mfem::Array<int> block_offsets( 3 );
+        block_offsets[0] = 0;
+        block_offsets[1] = disp_size;
+        block_offsets[2] = disp_size + contact_size;
+
+        mfem::BlockOperator J_block( block_offsets );
+        J_block.SetBlock( 0, 0, J_uu.get() );
+        J_block.SetBlock( 0, 1, G_T.get() );
+        J_block.SetBlock( 1, 0, G.get() );
+
+        // Block RHS = -[R_u; R_lambda]
+        mfem::BlockVector rhs( block_offsets );
+        rhs.GetBlock( 0 ) = R_u;
+        rhs.GetBlock( 0 ).Neg();
+        rhs.GetBlock( 1 ) = R_lambda;
+        rhs.GetBlock( 1 ).Neg();
+
+        // ---- Solve with unpreconditioned MINRES ----
+        // (keep it simple for debugging; add preconditioner once this works)
+
+        mfem::BlockVector delta( block_offsets );
+        delta = 0.0;
+
+        mfem::MINRESSolver solver( MPI_COMM_WORLD );
+        solver.SetRelTol( 1.0e-10 );
+        solver.SetAbsTol( 1.0e-14 );
+        solver.SetMaxIter( 5000 );
+        solver.SetPrintLevel( 3 );
+        solver.SetOperator( J_block );
+        solver.Mult( rhs, delta );
+
+        SLIC_INFO( "    Solver converged: " << solver.GetConverged() << " in " << solver.GetNumIterations()
+                                            << " iterations" );
+
+        // ---- Update solution ----
+
+        mfem::Vector& delta_u = delta.GetBlock( 0 );
+        mfem::Vector& delta_lambda = delta.GetBlock( 1 );
+
+        U += delta_u;
+        *lambda += delta_lambda;
+
+        // Re-enforce prescribed DOFs exactly (guard against solver drift)
+        for ( int i = 0; i < prescribed_tdof_list.Size(); ++i ) {
+          U( prescribed_tdof_list[i] ) = current_prescribed_disp;
+        }
+
+      }  // end Newton loop
+
+      SLIC_INFO( "Timestep " << step << "/" << num_timesteps_ << " | prescribed disp = " << current_prescribed_disp );
+
+      // Save VisIt output
+      {
+        auto& P = *par_fe_space.GetProlongationMatrix();
+        P.Mult( U, displacement );
+      }
+
+    }  // end timestep loop
+
+    // Clean up
+    delete lambda;
+
+    // ---- Get final displacement ----
+    {
+      auto& P = *par_fe_space.GetProlongationMatrix();
+      P.Mult( U, displacement );
+    }
+
+    auto local_max = displacement.Max();
+    max_disp_ = 0.0;
+    MPI_Allreduce( &local_max, &max_disp_, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
+    SLIC_INFO( "Max displacement: " << max_disp_ );
+
+    // -----------------------------------------------------------------
+    // Analytical solution comparison
+    // -----------------------------------------------------------------
+    double total_height = 2.0;
+    double eps_yy = total_prescribed_disp_ / total_height;
+    double eps_xx = -lam_ / ( lam_ + 2.0 * mu_ ) * eps_yy;
+
+    SLIC_INFO( "Analytical: eps_yy = " << eps_yy << ", eps_xx = " << eps_xx );
+
+    mfem::VectorFunctionCoefficient exact_sol_coeff( 2, [eps_xx, eps_yy]( const mfem::Vector& x, mfem::Vector& u ) {
+      u[0] = eps_xx * x[0];
+      u[1] = eps_yy * x[1];
+    } );
+
+    exact_disp.ProjectCoefficient( exact_sol_coeff );
+
+    visit_dc.SetCycle( 1 );
+    visit_dc.SetTime( 1.0 );
+    visit_dc.Save();
+
+    // Vector error
+    mfem::ParGridFunction error_vec( exact_disp );
+    error_vec -= displacement;
+    l2_err_vec_ = mfem::ParNormlp( error_vec, 2, MPI_COMM_WORLD );
+
+    // Component-wise errors
+    const mfem::FiniteElementCollection* fec = par_fe_space.FEColl();
+    mfem::ParFiniteElementSpace scalar_fes( &mesh, fec, 1, par_fe_space.GetOrdering() );
+    const int n = scalar_fes.GetNDofs();
+
+    mfem::ParGridFunction ux_exact( &scalar_fes ), ux_num( &scalar_fes );
+    mfem::ParGridFunction uy_exact( &scalar_fes ), uy_num( &scalar_fes );
+
+    for ( int i = 0; i < n; ++i ) {
+      ux_exact( i ) = exact_disp( i );
+      ux_num( i ) = displacement( i );
+      uy_exact( i ) = exact_disp( n + i );
+      uy_num( i ) = displacement( n + i );
+    }
+
+    mfem::ParGridFunction ux_err( ux_exact );
+    ux_err -= ux_num;
+    l2_err_x_ = mfem::ParNormlp( ux_err, 2, MPI_COMM_WORLD );
+
+    mfem::ParGridFunction uy_err( uy_exact );
+    uy_err -= uy_num;
+    l2_err_y_ = mfem::ParNormlp( uy_err, 2, MPI_COMM_WORLD );
+
+    SLIC_INFO( "L2 error (vector): " << l2_err_vec_ );
+    SLIC_INFO( "L2 error (x):      " << l2_err_x_ );
+    SLIC_INFO( "L2 error (y):      " << l2_err_y_ );
+    SLIC_INFO( "Consistency check |err_vec^2 - (err_x^2 + err_y^2)| = "
+               << std::abs( l2_err_vec_ * l2_err_vec_ - ( l2_err_x_ * l2_err_x_ + l2_err_y_ * l2_err_y_ ) ) );
+  }
+};
+
+TEST_P( MfemMortarEnergyLagrangePatchTest, check_patch_test )
+{
+  EXPECT_GT( max_disp_, 0.0 );
+  EXPECT_NEAR( 0.0, l2_err_vec_, 1.0e-2 );
+  EXPECT_NEAR( 0.0, l2_err_x_, 1.0e-2 );
+  EXPECT_NEAR( 0.0, l2_err_y_, 1.0e-2 );
+
+  MPI_Barrier( MPI_COMM_WORLD );
+}
+
+INSTANTIATE_TEST_SUITE_P( tribol, MfemMortarEnergyLagrangePatchTest, testing::Values( std::make_tuple( 2 ) ) );
+
+//------------------------------------------------------------------------------
+#include "axom/slic/core/SimpleLogger.hpp"
+
+int main( int argc, char* argv[] )
+{
+  int result = 0;
+
+  MPI_Init( &argc, &argv );
+  ::testing::InitGoogleTest( &argc, argv );
+ 
+#ifdef TRIBOL_USE_UMPIRE
+  umpire::ResourceManager::getInstance();
+#endif
+
+  axom::slic::SimpleLogger logger;
+  result = RUN_ALL_TESTS();
+
+  tribol::finalize();
+  MPI_Finalize();
+
+  return result;
+}
diff --git a/src/tribol/CMakeLists.txt b/src/tribol/CMakeLists.txt
index aba0db2c..e11c7dbf 100644
--- a/src/tribol/CMakeLists.txt
+++ b/src/tribol/CMakeLists.txt
@@ -40,7 +40,11 @@ set(tribol_headers
 
     physics/AlignedMortar.hpp
     physics/CommonPlane.hpp
+    physics/ContactFormulation.hpp
+    physics/ContactFormulationFactory.hpp
     physics/Mortar.hpp
+    physics/EnergyMortar.hpp
+    physics/EnergyMortarAdapter.hpp
     physics/Physics.hpp
 
     search/InterfacePairFinder.hpp
@@ -50,7 +54,8 @@ set(tribol_headers
     utils/DataManager.hpp
     utils/Math.hpp
     utils/TestUtils.hpp
-)
+
+    )
 
 ## list of sources
 set(tribol_sources
@@ -73,7 +78,10 @@ set(tribol_sources
 
     physics/AlignedMortar.cpp
     physics/CommonPlane.cpp
+    physics/ContactFormulationFactory.cpp
     physics/Mortar.cpp
+    physics/EnergyMortar.cpp
+    physics/EnergyMortarAdapter.cpp
     physics/Physics.cpp
      
     search/InterfacePairFinder.cpp
@@ -81,6 +89,7 @@ set(tribol_sources
     utils/ContactPlaneOutput.cpp
     utils/Math.cpp
     utils/TestUtils.cpp
+
     )
 
 if (ENABLE_FORTRAN)
diff --git a/src/tribol/common/BasicTypes.hpp b/src/tribol/common/BasicTypes.hpp
index 7947a991..51c0d10d 100644
--- a/src/tribol/common/BasicTypes.hpp
+++ b/src/tribol/common/BasicTypes.hpp
@@ -14,6 +14,80 @@ using CommT = shared::CommT;
 using IndexT = shared::IndexT;
 using SizeT = shared::SizeT;
 using RealT = shared::RealT;
+#ifdef TRIBOL_USE_MPI
+
+using CommT = MPI_Comm;
+#define TRIBOL_COMM_WORLD MPI_COMM_WORLD
+#define TRIBOL_COMM_NULL MPI_COMM_NULL
+
+#else
+
+using CommT = int;
+#define TRIBOL_COMM_WORLD 0
+#define TRIBOL_COMM_NULL -1
+
+#endif
+
+// match index type used in axom (since data is held in axom data structures)
+using IndexT = axom::IndexType;
+
+// size type matching size of addressable memory
+using SizeT = size_t;
+
+#ifdef TRIBOL_USE_SINGLE_PRECISION
+
+#error "Tribol does not support single precision."
+using RealT = float;
+
+#else
+
+using RealT = double;
+
+#endif
+
+// mfem's real_t should match ours
+static_assert( std::is_same<RealT, mfem::real_t>::value, "tribol::RealT and mfem::real_t are required to match" );
+
+#define TRIBOL_UNUSED_VAR AXOM_UNUSED_VAR
+#define TRIBOL_UNUSED_PARAM AXOM_UNUSED_PARAM
+
+// Execution space specifiers
+#if defined( TRIBOL_USE_CUDA ) || defined( TRIBOL_USE_HIP )
+#ifndef __device__
+#error "TRIBOL_USE_CUDA or TRIBOL_USE_HIP but __device__ is undefined.  Check include files"
+#endif
+#define TRIBOL_DEVICE __device__
+#define TRIBOL_HOST_DEVICE __host__ __device__
+#else
+#define TRIBOL_DEVICE
+#define TRIBOL_HOST_DEVICE
+#endif
+
+// Execution space identifier for defaulted constructors and destructors
+#ifdef TRIBOL_USE_HIP
+#define TRIBOL_DEFAULT_DEVICE __device__
+#define TRIBOL_DEFAULT_HOST_DEVICE __host__ __device__
+#else
+#define TRIBOL_DEFAULT_DEVICE
+#define TRIBOL_DEFAULT_HOST_DEVICE
+#endif
+
+// Defined when Tribol doesn't have a device available
+#if !( defined( TRIBOL_USE_CUDA ) || defined( TRIBOL_USE_HIP ) )
+#define TRIBOL_USE_HOST
+#endif
+
+// Define variable when in device code
+#if defined( __CUDA_ARCH__ ) || defined( __HIP_DEVICE_COMPILE__ )
+#define TRIBOL_DEVICE_CODE
+#endif
+
+// Ignore host code in __host__ __device__ code warning on NVCC
+#ifdef TRIBOL_USE_CUDA
+#define TRIBOL_NVCC_EXEC_CHECK_DISABLE #pragma nv_exec_check_disable
+#else
+#define TRIBOL_NVCC_EXEC_CHECK_DISABLE
+#endif
 
 }  // namespace tribol
 
diff --git a/src/tribol/common/Enzyme.hpp b/src/tribol/common/Enzyme.hpp
index 220ece66..a3e4062a 100644
--- a/src/tribol/common/Enzyme.hpp
+++ b/src/tribol/common/Enzyme.hpp
@@ -13,7 +13,49 @@
 #include "tribol/common/BasicTypes.hpp"
 
 #ifdef TRIBOL_USE_ENZYME
+#ifdef MFEM_USE_ENZYME
 #include "mfem/general/enzyme.hpp"
+#else  // MFEM_USE_ENZYME
+// NOTE: contents copied from MFEM's general/enzyme.hpp
+
+/*
+ * Variables prefixed with enzyme_* or function types prefixed with __enzyme_*,
+ * are variables which will get preprocessed in the LLVM intermediate
+ * representation when the Enzyme LLVM plugin is loaded. See the Enzyme
+ * documentation (https://enzyme.mit.edu) for more information.
+ */
+
+extern int enzyme_dup;
+extern int enzyme_dupnoneed;
+extern int enzyme_out;
+extern int enzyme_const;
+extern int enzyme_interleave;
+
+#if defined( MFEM_USE_CUDA ) || defined( MFEM_USE_HIP )
+#define MFEM_DEVICE_EXTERN_STMT( name ) extern __device__ int name;
+#else
+#define MFEM_DEVICE_EXTERN_STMT( name )
+#endif
+
+MFEM_DEVICE_EXTERN_STMT( enzyme_dup )
+MFEM_DEVICE_EXTERN_STMT( enzyme_dupnoneed )
+MFEM_DEVICE_EXTERN_STMT( enzyme_out )
+MFEM_DEVICE_EXTERN_STMT( enzyme_const )
+MFEM_DEVICE_EXTERN_STMT( enzyme_interleave )
+
+// warning: if inlined, triggers function '__enzyme_autodiff' is not defined
+template <typename return_type, typename... Args>
+MFEM_HOST_DEVICE return_type __enzyme_autodiff( Args... );
+
+// warning: if inlined, triggers function '__enzyme_fwddiff' is not defined
+template <typename return_type, typename... Args>
+MFEM_HOST_DEVICE return_type __enzyme_fwddiff( Args... );
+
+#define MFEM_ENZYME_INACTIVENOFREE __attribute__( ( enzyme_inactive, enzyme_nofree ) )
+#define MFEM_ENZYME_INACTIVE __attribute__( ( enzyme_inactive ) )
+#define MFEM_ENZYME_FN_LIKE( x ) __attribute__( ( enzyme_function_like( #x ) ) )
+
+#endif  // MFEM_USE_ENZYME
 
 #if !defined( TRIBOL_USE_HOST ) && !defined( TRIBOL_DEVICE_CODE )
 // When compiling with NVCC or HIPCC, the compiler performs multiple passes.
@@ -35,7 +77,7 @@ extern int tribol_host_enzyme_dupnoneed asm( "enzyme_dupnoneed" );
 #define TRIBOL_ENZYME_OUT tribol_host_enzyme_out
 #define TRIBOL_ENZYME_DUPNONEED tribol_host_enzyme_dupnoneed
 
-#else
+#else  // !defined( TRIBOL_USE_HOST ) && !defined( TRIBOL_DEVICE_CODE )
 // We are either:
 // 1. In a device compilation pass (__CUDA_ARCH__ or __HIP_DEVICE_COMPILE__ defined).
 // 2. Using a standard host compiler (GCC, Clang, etc.).
@@ -44,14 +86,14 @@ extern int tribol_host_enzyme_dupnoneed asm( "enzyme_dupnoneed" );
 #define TRIBOL_ENZYME_DUP enzyme_dup
 #define TRIBOL_ENZYME_OUT enzyme_out
 #define TRIBOL_ENZYME_DUPNONEED enzyme_dupnoneed
-#endif
+#endif  // !defined( TRIBOL_USE_HOST ) && !defined( TRIBOL_DEVICE_CODE )
 
-#else
+#else  // TRIBOL_USE_ENZYME
 // Fallback definitions if Enzyme is disabled
 #define TRIBOL_ENZYME_CONST 0
 #define TRIBOL_ENZYME_DUP 0
 #define TRIBOL_ENZYME_OUT 0
 #define TRIBOL_ENZYME_DUPNONEED 0
-#endif
+#endif  // TRIBOL_USE_ENZYME
 
 #endif /* SRC_TRIBOL_COMMON_ENZYME_HPP_ */
diff --git a/src/tribol/common/Parameters.hpp b/src/tribol/common/Parameters.hpp
index 2f5789d2..6bb52366 100644
--- a/src/tribol/common/Parameters.hpp
+++ b/src/tribol/common/Parameters.hpp
@@ -117,6 +117,7 @@ enum ContactMethod  // all mortar methods go first
   SINGLE_MORTAR,   ///! Single mortar per Puso 2003
   ALIGNED_MORTAR,  ///! Aligned mortar to be used with ContactCase = NO_SLIDING
   MORTAR_WEIGHTS,  ///! Method that only returns mortar weights per single mortar method
+  ENERGY_MORTAR,   ///! Energy-based mortar method
   COMMON_PLANE,    ///! Common plane method, currently with single integration point
   NUM_CONTACT_METHODS
 };
diff --git a/src/tribol/interface/mfem_tribol.cpp b/src/tribol/interface/mfem_tribol.cpp
index 96743d32..f35827a9 100644
--- a/src/tribol/interface/mfem_tribol.cpp
+++ b/src/tribol/interface/mfem_tribol.cpp
@@ -69,7 +69,7 @@ void registerMfemCouplingScheme( IndexT cs_id, int mesh_id_1, int mesh_id_2, con
   // Set data required for use with Lagrange multiplier enforcement option.
   // Coupling scheme validity will be checked later, but here some initial
   // data is created/initialized for use with LMs.
-  if ( enforcement_method == LAGRANGE_MULTIPLIER ) {
+  if ( enforcement_method == LAGRANGE_MULTIPLIER || contact_method == ENERGY_MORTAR ) {
     std::unique_ptr<mfem::FiniteElementCollection> pressure_fec = std::make_unique<mfem::H1_FECollection>(
         current_coords.FESpace()->FEColl()->GetOrder(), mesh.SpaceDimension() );
     int pressure_vdim = 0;
@@ -96,8 +96,10 @@ void registerMfemCouplingScheme( IndexT cs_id, int mesh_id_1, int mesh_id_2, con
                                                               isOnDevice( exec_mode ) ) );
     // set up Jacobian transfer if the coupling scheme requires it
     auto lm_options = cs.getEnforcementOptions().lm_implicit_options;
-    if ( lm_options.enforcement_option_set && ( lm_options.eval_mode == ImplicitEvalMode::MORTAR_JACOBIAN ||
-                                                lm_options.eval_mode == ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN ) ) {
+    if ( ( lm_options.enforcement_option_set &&
+           ( lm_options.eval_mode == ImplicitEvalMode::MORTAR_JACOBIAN ||
+             lm_options.eval_mode == ImplicitEvalMode::MORTAR_RESIDUAL_JACOBIAN ) ) ||
+         contact_method == ENERGY_MORTAR ) {
       // create matrix transfer operator between redecomp and
       // parent/parent-linked boundary submesh
       cs.setMfemJacobianData(
@@ -315,12 +317,24 @@ void getMfemResponse( IndexT cs_id, mfem::Vector& r )
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+
   SLIC_ERROR_ROOT_IF( !cs->hasMfemData(),
                       "Coupling scheme does not contain MFEM data. "
                       "Create the coupling scheme using registerMfemCouplingScheme() to return a response vector." );
   cs->getMfemMeshData()->GetParentResponse( r );
 }
 
+mfem::HypreParVector getMfemTDofForce( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
+  return cs->getContactFormulation()->getMfemForce();
+}
+
 std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id )
 {
   CouplingScheme* cs = CouplingSchemeManager::getInstance().findData( cs_id );
@@ -328,6 +342,31 @@ std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id )
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    auto* formulation = cs->getContactFormulation();
+    // Use formulation derivatives
+    auto DfDx = formulation->getMfemDfDx();
+    auto DfDp = formulation->getMfemDfDp();
+    auto DgDx = formulation->getMfemDgDx();
+
+    // Determine sizes
+    mfem::Array<int> offsets( 3 );
+    offsets[0] = 0;
+    offsets[1] = DfDx->Height();                                                     // Force rows (displacement dofs)
+    offsets[2] = offsets[1] + ( DfDp ? DfDp->Width() : DgDx ? DgDx->Height() : 0 );  // Pressure cols (pressure dofs)
+
+    auto blockOp = std::make_unique<mfem::BlockOperator>( offsets );
+    if ( DfDx ) blockOp->SetBlock( 0, 0, DfDx.release() );
+    if ( DfDp ) blockOp->SetBlock( 0, 1, DfDp.release() );
+    if ( DgDx ) blockOp->SetBlock( 1, 0, DgDx.release() );
+    // 1,1 block (DgDp) is implicitly zero for standard contact
+
+    // Manually set ownership to avoid leaks, as BlockOperator owns nothing by default
+    blockOp->owns_blocks = 1;
+    return blockOp;
+  }
+
   SparseMode sparse_mode = cs->getEnforcementOptions().lm_implicit_options.sparse_mode;
   if ( sparse_mode != SparseMode::MFEM_ELEMENT_DENSE ) {
     SLIC_ERROR_ROOT(
@@ -369,6 +408,91 @@ std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id )
   }
 }
 
+std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    return cs->getContactFormulation()->getMfemDfDx();
+  }
+  SLIC_ERROR_ROOT( "getMfemDfDx() is only supported for coupling schemes with a ContactFormulation." );
+  return nullptr;
+}
+
+std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    return cs->getContactFormulation()->getMfemDfDp();
+  }
+  SLIC_ERROR_ROOT( "getMfemDfDp() is only supported for coupling schemes with a ContactFormulation." );
+  return nullptr;
+}
+
+std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+
+  if ( cs->hasContactFormulation() ) {
+    return cs->getContactFormulation()->getMfemDgDx();
+  }
+  SLIC_ERROR_ROOT( "getMfemDgDx() is only supported for coupling schemes with a ContactFormulation." );
+  return nullptr;
+}
+
+//**************** */NEW LAGRANGE FUNTIONS:
+mfem::ParFiniteElementSpace& getMfemContactFESpace( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
+                      axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM submesh data.", cs_id ) );
+  return const_cast<mfem::ParFiniteElementSpace&>( cs->getMfemSubmeshData()->GetSubmeshFESpace() );
+}
+
+void evaluateContactResidual( IndexT cs_id, const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
+                              mfem::HypreParVector& r_gap )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
+  cs->getContactFormulation()->evaluateContactResidual( lambda, r_force, r_gap );
+}
+
+void evaluateContactJacobian( IndexT cs_id, const mfem::HypreParVector& lambda,
+                              std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                              std::unique_ptr<mfem::HypreParMatrix>& df_dlambda )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
+  cs->getContactFormulation()->evaluateContactJacobian( lambda, df_du, df_dlambda );
+}
+
+/////***************** */END LAGRANGE FUNCTIONS
+
 void getMfemGap( IndexT cs_id, mfem::Vector& g )
 {
   auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
@@ -376,6 +500,7 @@ void getMfemGap( IndexT cs_id, mfem::Vector& g )
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+
   SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
                       axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM pressure field data. "
                                          "Create the coupling scheme using registerMfemCouplingScheme() and set the "
@@ -384,6 +509,17 @@ void getMfemGap( IndexT cs_id, mfem::Vector& g )
   cs->getMfemSubmeshData()->GetSubmeshGap( g );
 }
 
+mfem::HypreParVector getMfemTDofGap( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
+  return cs->getContactFormulation()->getMfemGap();
+}
+
 mfem::ParGridFunction& getMfemPressure( IndexT cs_id )
 {
   auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
@@ -391,6 +527,7 @@ mfem::ParGridFunction& getMfemPressure( IndexT cs_id )
       !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
                               "to create a coupling scheme with this cs_id.",
                               cs_id ) );
+
   SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(),
                       axom::fmt::format( "Coupling scheme cs_id={0} does not contain MFEM pressure field data. "
                                          "Create the coupling scheme using registerMfemCouplingScheme() and set the "
@@ -399,6 +536,17 @@ mfem::ParGridFunction& getMfemPressure( IndexT cs_id )
   return cs->getMfemSubmeshData()->GetSubmeshPressure();
 }
 
+mfem::HypreParVector getMfemTDofPressure( IndexT cs_id )
+{
+  auto cs = CouplingSchemeManager::getInstance().findData( cs_id );
+  SLIC_ERROR_ROOT_IF(
+      !cs, axom::fmt::format( "Coupling scheme cs_id={0} does not exist. Call tribol::registerMfemCouplingScheme() "
+                              "to create a coupling scheme with this cs_id.",
+                              cs_id ) );
+  SLIC_ERROR_ROOT_IF( !cs->hasContactFormulation(), "Coupling scheme does not contain a contact formulation." );
+  return cs->getContactFormulation()->getMfemPressure();
+}
+
 void updateMfemParallelDecomposition( int n_ranks, bool force_new_redecomp )
 {
   for ( auto& cs_pair : CouplingSchemeManager::getInstance() ) {
@@ -441,9 +589,10 @@ void updateMfemParallelDecomposition( int n_ranks, bool force_new_redecomp )
         registerNodalReferenceCoords( mesh_ids[0], xref_ptrs[0], xref_ptrs[1], xref_ptrs[2] );
         registerNodalReferenceCoords( mesh_ids[1], xref_ptrs[0], xref_ptrs[1], xref_ptrs[2] );
       }
-      if ( cs.getEnforcementMethod() == LAGRANGE_MULTIPLIER ) {
+      if ( cs.getEnforcementMethod() == LAGRANGE_MULTIPLIER || cs.getContactMethod() == ENERGY_MORTAR ) {
         SLIC_ERROR_ROOT_IF( cs.getContactModel() != FRICTIONLESS, "Only frictionless contact is supported." );
-        SLIC_ERROR_ROOT_IF( cs.getContactMethod() != SINGLE_MORTAR, "Only single mortar contact is supported." );
+        SLIC_ERROR_ROOT_IF( cs.getContactMethod() != SINGLE_MORTAR && cs.getContactMethod() != ENERGY_MORTAR,
+                            "Only single mortar or ENERGY_MORTAR contact is supported." );
         auto submesh_data = cs.getMfemSubmeshData();
         // updates submesh-native grid functions and transfer operators on
         // the new redecomp mesh
@@ -452,7 +601,7 @@ void updateMfemParallelDecomposition( int n_ranks, bool force_new_redecomp )
         registerMortarGaps( mesh_ids[1], g_ptrs[0] );
         auto p_ptrs = submesh_data->GetRedecompPressurePtrs();
         registerMortarPressures( mesh_ids[1], p_ptrs[0] );
-        if ( cs.hasMfemJacobianData() && new_redecomp ) {
+        if ( ( cs.hasMfemJacobianData() || cs.getContactMethod() == ENERGY_MORTAR ) && new_redecomp ) {
           // updates Jacobian transfer operator for new redecomp mesh
           cs.getMfemJacobianData()->UpdateJacobianXfer();
         }
diff --git a/src/tribol/interface/mfem_tribol.hpp b/src/tribol/interface/mfem_tribol.hpp
index 2a142962..ed300987 100644
--- a/src/tribol/interface/mfem_tribol.hpp
+++ b/src/tribol/interface/mfem_tribol.hpp
@@ -240,6 +240,8 @@ void registerMfemReferenceCoords( IndexT cs_id, const mfem::ParGridFunction& ref
  */
 void getMfemResponse( IndexT cs_id, mfem::Vector& r );
 
+mfem::HypreParVector getMfemTDofForce( IndexT cs_id );
+
 /**
  * @brief Get assembled contact contributions for the Jacobian matrix
  *
@@ -268,6 +270,30 @@ void getMfemResponse( IndexT cs_id, mfem::Vector& r );
  */
 std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id );
 
+/**
+ * @brief Get the derivative of the force with respect to displacement
+ *
+ * @param cs_id Coupling scheme id with a registered MFEM mesh
+ * @return Unique pointer to MFEM HypreParMatrix
+ */
+std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx( IndexT cs_id );
+
+/**
+ * @brief Get the derivative of the force with respect to pressure
+ *
+ * @param cs_id Coupling scheme id with a registered MFEM mesh
+ * @return Unique pointer to MFEM HypreParMatrix
+ */
+std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp( IndexT cs_id );
+
+/**
+ * @brief Get the derivative of the gap with respect to displacement
+ *
+ * @param cs_id Coupling scheme id with a registered MFEM mesh
+ * @return Unique pointer to MFEM HypreParMatrix
+ */
+std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx( IndexT cs_id );
+
 /**
  * @brief Returns gap vector to a given mfem::Vector
  *
@@ -283,6 +309,8 @@ std::unique_ptr<mfem::BlockOperator> getMfemBlockJacobian( IndexT cs_id );
  */
 void getMfemGap( IndexT cs_id, mfem::Vector& g );
 
+mfem::HypreParVector getMfemTDofGap( IndexT cs_id );
+
 /**
  * @brief Returns reference to nodal pressure vector on the submesh surface
  *
@@ -295,6 +323,17 @@ void getMfemGap( IndexT cs_id, mfem::Vector& g );
  */
 mfem::ParGridFunction& getMfemPressure( IndexT cs_id );
 
+mfem::HypreParVector getMfemTDofPressure( IndexT cs_id );
+
+mfem::ParFiniteElementSpace& getMfemContactFESpace( IndexT cs_id );
+
+void evaluateContactResidual( IndexT cs_id, const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
+                              mfem::HypreParVector& r_gap );
+
+void evaluateContactJacobian( IndexT cs_id, const mfem::HypreParVector& lambda,
+                              std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                              std::unique_ptr<mfem::HypreParMatrix>& df_dlambda );
+
 /**
  * @brief Updates mesh parallel decomposition and related grid functions/Jacobian when coordinates are updated
  *
diff --git a/src/tribol/mesh/CouplingScheme.cpp b/src/tribol/mesh/CouplingScheme.cpp
index 85e89775..a425514e 100644
--- a/src/tribol/mesh/CouplingScheme.cpp
+++ b/src/tribol/mesh/CouplingScheme.cpp
@@ -26,6 +26,8 @@
 #include "tribol/search/InterfacePairFinder.hpp"
 #include "tribol/common/Parameters.hpp"
 #include "tribol/physics/Physics.hpp"
+#include "tribol/physics/ContactFormulationFactory.hpp"
+
 #include "tribol/integ/FE.hpp"
 namespace tribol {
 
@@ -1026,6 +1028,16 @@ void CouplingScheme::performBinning()
 //------------------------------------------------------------------------------
 int CouplingScheme::apply( int cycle, RealT t, RealT& dt )
 {
+  if ( m_formulation_impl ) {
+    if ( m_interface_pairs.size() > 0 ) {
+      m_formulation_impl->setInterfacePairs( std::move( m_interface_pairs ), 0 );
+    }
+    m_formulation_impl->updateNodalGaps();
+    m_formulation_impl->updateNodalForces();
+    dt = m_formulation_impl->computeTimeStep();
+    return 0;
+  }
+
   auto& params = m_parameters;
 
   // loop over number of interface pairs
@@ -1144,8 +1156,20 @@ int CouplingScheme::apply( int cycle, RealT t, RealT& dt )
 //------------------------------------------------------------------------------
 bool CouplingScheme::init()
 {
-  // check for valid coupling scheme only for non-null-meshes
-  this->m_isValid = this->isValidCouplingScheme();
+  if ( m_contactMethod == ENERGY_MORTAR ) {
+    // these calls still need to be made to set mesh pointers and allocator id
+    if ( !setMeshPointers() || checkExecutionModeData() != 0 ) {
+      return false;
+    }
+    m_formulation_impl = createContactFormulation( this );
+  }
+
+  if ( m_formulation_impl ) {
+    this->m_isValid = true;
+  } else {
+    // check for valid coupling scheme only for non-null-meshes
+    this->m_isValid = this->isValidCouplingScheme();
+  }
 
   if ( this->m_isValid ) {
     // set individual coupling scheme logging level
diff --git a/src/tribol/mesh/CouplingScheme.hpp b/src/tribol/mesh/CouplingScheme.hpp
index abc16d5f..9e24f99f 100644
--- a/src/tribol/mesh/CouplingScheme.hpp
+++ b/src/tribol/mesh/CouplingScheme.hpp
@@ -19,6 +19,7 @@
 #include "tribol/mesh/MethodCouplingData.hpp"
 #include "tribol/mesh/MfemData.hpp"
 #include "tribol/physics/Physics.hpp"
+#include "tribol/physics/ContactFormulation.hpp"
 #include "tribol/utils/DataManager.hpp"
 #include "tribol/mesh/InterfacePairs.hpp"
 #include "tribol/geom/CompGeom.hpp"
@@ -732,6 +733,30 @@ class CouplingScheme {
    */
   MethodData* getDnDxMethodData() const { return m_dndxJacobian.get(); }
 
+  /**
+   * @brief Set the ContactFormulation implementation
+   *
+   * @param formulation Unique pointer to the formulation
+   */
+  void setContactFormulation( std::unique_ptr<ContactFormulation> formulation )
+  {
+    m_formulation_impl = std::move( formulation );
+  }
+
+  /**
+   * @brief Check if a ContactFormulation implementation is set
+   *
+   * @return true if set
+   */
+  bool hasContactFormulation() const { return m_formulation_impl != nullptr; }
+
+  /**
+   * @brief Get the ContactFormulation implementation
+   *
+   * @return ContactFormulation*
+   */
+  ContactFormulation* getContactFormulation() const { return m_formulation_impl.get(); }
+
 #ifdef BUILD_REDECOMP
 
   /**
@@ -925,6 +950,8 @@ class CouplingScheme {
   std::unique_ptr<MethodData> m_dfdnJacobian;  ///< Store derivative of force w.r.t. normal on element pairs
   std::unique_ptr<MethodData> m_dndxJacobian;  ///< Store derivative of normal w.r.t. nodal coordinates on element pairs
 
+  std::unique_ptr<ContactFormulation> m_formulation_impl;  ///< Polymorphic contact formulation implementation
+
   ArrayT<InterfacePair> m_interface_pairs;  ///< List of interface pairs
 
   CompGeom m_cg_pairs;  ///< Computational geometry container object
@@ -1035,6 +1062,10 @@ TRIBOL_HOST_DEVICE inline RealT CouplingScheme::Viewer::getGapTol( int fid1, int
 //------------------------------------------------------------------------------
 TRIBOL_HOST_DEVICE inline bool CouplingScheme::Viewer::pruneMethodFacePair( const IndexT fid1, const IndexT fid2 ) const
 {
+  if ( m_contact_method == ENERGY_MORTAR ) {
+    return false;
+  }
+
   constexpr int max_dim = 3;
   constexpr int max_nodes_per_face = 4;
 
diff --git a/src/tribol/mesh/MfemData.hpp b/src/tribol/mesh/MfemData.hpp
index e089c747..98ce6738 100644
--- a/src/tribol/mesh/MfemData.hpp
+++ b/src/tribol/mesh/MfemData.hpp
@@ -1549,6 +1549,13 @@ class MfemSubmeshData {
    */
   const mfem::GridFunction& GetRedecompGap() const { return redecomp_gap_; }
 
+  /**
+   * @brief Get the gap grid function on the redecomp mesh
+   *
+   * @return const mfem::GridFunction&
+   */
+  mfem::GridFunction& GetRedecompGap() { return redecomp_gap_; }
+
   /**
    * @brief Get the gap vector on the parent-linked boundary submesh
    *
diff --git a/src/tribol/physics/ContactFormulation.hpp b/src/tribol/physics/ContactFormulation.hpp
new file mode 100644
index 00000000..039fd174
--- /dev/null
+++ b/src/tribol/physics/ContactFormulation.hpp
@@ -0,0 +1,150 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_
+#define SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_
+
+#include "tribol/config.hpp"
+
+#include "tribol/common/Parameters.hpp"
+#include "tribol/common/ArrayTypes.hpp"
+#include "tribol/mesh/InterfacePairs.hpp"
+
+#include <memory>
+
+// Forward declarations for MFEM types
+namespace mfem {
+class Vector;
+class HypreParMatrix;
+class ParGridFunction;
+}  // namespace mfem
+
+namespace tribol {
+
+// Forward declaration
+class MethodData;
+
+/*!
+ * \brief Base class for contact formulations.
+ *
+ * This class provides a polymorphic interface for contact algorithms,
+ * allowing for modular implementation of new physics and formulations.
+ */
+class ContactFormulation {
+ public:
+  /**
+   * @brief Virtual destructor
+   */
+  virtual ~ContactFormulation() = default;
+
+  /**
+   * @brief Sets the initial set of candidate interface pairs
+   *
+   * @param pairs View of the coarse-binned interface pairs
+   * @param check_level In general, higher values mean more checks and 0 means don't do checks. See specific methods for
+   * details.
+   */
+  virtual void setInterfacePairs( ArrayT<InterfacePair>&& pairs, int check_level ) = 0;
+
+  /**
+   * @brief Updates the integration rule
+   *
+   * Determines overlapping contact pairs and computes necessary integration data (e.g. quadrature points, weights).
+   *
+   * @note Requires setInterfacePairs() to be called first.
+   */
+  virtual void updateIntegrationRule() = 0;
+
+  /**
+   * @brief Updates nodal gaps
+   *
+   * @note Requires updateIntegrationRule() to be called first.
+   */
+  virtual void updateNodalGaps() = 0;
+
+  /**
+   * @brief Updates nodal forces/residual
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  virtual void updateNodalForces() = 0;
+
+  /**
+   * @brief Computes the maximum allowable timestep for the formulation
+   *
+   * @return maximum allowable timestep
+   */
+  virtual RealT computeTimeStep() = 0;
+
+  /**
+   * @brief Returns the energy stored by the contact constraints (if supported by the method)
+   *
+   * @note Requires updateNodalForces() to be called first.
+   *
+   * @return contact energy
+   */
+  virtual RealT getEnergy() const = 0;
+
+#ifdef BUILD_REDECOMP
+  /**
+   * @brief Returns t-dof vector of forces on parent mesh
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  virtual const mfem::HypreParVector& getMfemForce() const = 0;
+
+  /**
+   * @brief Returns t-dof vector of gaps on submesh
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  virtual const mfem::HypreParVector& getMfemGap() const = 0;
+
+  /**
+   * @brief Returns a reference to the MFEM pressure t-dof vector
+   *
+   * @return Reference to the pressure t-dof vector
+   */
+  virtual mfem::HypreParVector& getMfemPressure() = 0;
+
+  /**
+   * @brief Get the derivative of force with respect to displacement
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const = 0;
+
+  /**
+   * @brief Get the derivative of gap with respect to displacement
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const = 0;
+
+  /**
+   * @brief Get the derivative of force with respect to pressure
+   *
+   * @return Unique pointer to mfem::HypreParMatrix
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  virtual std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const = 0;
+
+  virtual void evaluateContactResidual( const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
+                                        mfem::HypreParVector& r_gap ) = 0;
+
+  virtual void evaluateContactJacobian( const mfem::HypreParVector& lambda,
+                                        std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                                        std::unique_ptr<mfem::HypreParMatrix>& df_dlambda ) = 0;
+#endif
+};
+
+}  // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_CONTACTFORMULATION_HPP_ */
\ No newline at end of file
diff --git a/src/tribol/physics/ContactFormulationFactory.cpp b/src/tribol/physics/ContactFormulationFactory.cpp
new file mode 100644
index 00000000..76d8fce0
--- /dev/null
+++ b/src/tribol/physics/ContactFormulationFactory.cpp
@@ -0,0 +1,50 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include "tribol/physics/ContactFormulationFactory.hpp"
+#include "tribol/physics/EnergyMortarAdapter.hpp"
+#include "tribol/mesh/CouplingScheme.hpp"
+#include "tribol/common/Parameters.hpp"
+
+namespace tribol {
+
+std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs )
+{
+  if ( !cs ) {
+    return nullptr;
+  }
+
+  if ( cs->getContactMethod() == ENERGY_MORTAR ) {
+    // Default parameters for now, or extract from CouplingScheme if available
+    double k = 1000.0;
+    double delta = 0.1;
+    int N = 3;
+    bool enzyme_quadrature = true;
+    bool use_penalty_ = ( cs->getEnforcementMethod() == PENALTY );
+
+#if defined( TRIBOL_USE_ENZYME ) && defined( BUILD_REDECOMP )
+    if ( cs->hasMfemData() ) {
+      // Attempt to get penalty from MfemMeshData if available
+      auto* k_ptr = cs->getMfemMeshData()->GetMesh1KinematicConstantPenalty();
+      if ( k_ptr ) {
+        k = *k_ptr;
+      }
+    }
+
+    SLIC_ERROR_ROOT_IF( !cs->hasMfemSubmeshData(), "ENERGY_MORTAR requires MFEM submesh data." );
+    SLIC_ERROR_ROOT_IF( !cs->hasMfemJacobianData(), "ENERGY_MORTAR requires MFEM Jacobian data." );
+
+    return std::make_unique<EnergyMortarAdapter>( *cs->getMfemSubmeshData(), *cs->getMfemJacobianData(), cs->getMesh1(),
+                                               cs->getMesh2(), k, delta, N, enzyme_quadrature, use_penalty_ );
+#else
+    SLIC_ERROR_ROOT( "ENERGY_MORTAR requires Enzyme and redecomp to be built." );
+    return nullptr;
+#endif
+  }
+
+  return nullptr;
+}
+
+}  // namespace tribol
diff --git a/src/tribol/physics/ContactFormulationFactory.hpp b/src/tribol/physics/ContactFormulationFactory.hpp
new file mode 100644
index 00000000..1ad6233d
--- /dev/null
+++ b/src/tribol/physics/ContactFormulationFactory.hpp
@@ -0,0 +1,27 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_CONTACTFORMULATIONFACTORY_HPP_
+#define SRC_TRIBOL_PHYSICS_CONTACTFORMULATIONFACTORY_HPP_
+
+#include "tribol/physics/ContactFormulation.hpp"
+#include <memory>
+
+namespace tribol {
+
+// Forward declaration
+class CouplingScheme;
+
+/**
+ * @brief Factory function to create a ContactFormulation based on the CouplingScheme settings.
+ *
+ * @param cs Pointer to the CouplingScheme
+ * @return std::unique_ptr<ContactFormulation> The created formulation, or nullptr if no formulation applies.
+ */
+std::unique_ptr<ContactFormulation> createContactFormulation( CouplingScheme* cs );
+
+}  // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_CONTACTFORMULATIONFACTORY_HPP_ */
diff --git a/src/tribol/physics/EnergyMortar.cpp b/src/tribol/physics/EnergyMortar.cpp
new file mode 100644
index 00000000..511367af
--- /dev/null
+++ b/src/tribol/physics/EnergyMortar.cpp
@@ -0,0 +1,918 @@
+#include "EnergyMortar.hpp"
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <array>
+#include <cmath>
+#include <algorithm>
+#include <cassert>
+#include <iomanip>
+#include "tribol/common/ArrayTypes.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/geom/GeomUtilities.hpp"
+#include "tribol/common/Enzyme.hpp"
+#include "tribol/mesh/MeshData.hpp"
+#include <set>
+#include <map>
+
+namespace tribol {
+
+#ifdef TRIBOL_USE_ENZYME
+
+namespace {
+
+static ContactSmoothing smoother( ContactParams{} );
+static ContactEvaluator eval(ContactParams{});
+
+// struct Gparams {
+//   int N;
+//   const double* qp; 
+//   const double* w;
+//   const double* x2;
+// };
+
+
+void find_normal( const double* coord1, const double* coord2, double* normal )
+{
+  double dx = coord2[0] - coord1[0];
+  double dy = coord2[1] - coord1[1];
+  double len = std::sqrt( dy * dy + dx * dx );
+  dx /= len;
+  dy /= len;
+  normal[0] = dy;
+  normal[1] = -dx;
+}
+
+void determine_legendre_nodes( int N, std::array<double, 3>& x )
+{
+  // x.resize( N );
+  if ( N == 1 ) {
+    x[0] = 0.0;
+  } else if ( N == 2 ) {
+    const double a = 1.0 / std::sqrt( 3.0 );
+    x[0] = -a;
+    x[1] = a;
+  } else if ( N == 3 ) {
+    const double a = std::sqrt( 3.0 / 5.0 );
+    x[0] = -a;
+    x[1] = 0.0;
+    x[2] = a;
+  } else if ( N == 4 ) {
+    const double a = std::sqrt( ( 3.0 - 2.0 * std::sqrt( 6.0 / 5.0 ) ) / 7.0 );
+    const double b = std::sqrt( ( 3.0 + 2.0 * std::sqrt( 6.0 / 5.0 ) ) / 7.0 );
+    x[0] = -b;
+    x[1] = -a;
+    x[2] = a;
+    x[3] = b;
+  } else if ( N == 5 ) {
+    const double a = std::sqrt( 5.0 - 2.0 * std::sqrt( 10.0 / 7.0 ) ) / 3.0;
+    const double b = std::sqrt( 5.0 + 2.0 * std::sqrt( 10.0 / 7.0 ) ) / 3.0;
+    x[0] = -b;
+    x[1] = -a;
+    x[2] = 0.0;
+    x[3] = a;
+    x[4] = b;
+  } else {
+    assert( false && "Unsupported quadrature order" );
+  }
+}
+
+void determine_legendre_weights( int N, std::array<double, 3>& W )
+{
+  // W.resize( N );
+  if ( N == 1 ) {
+    W[0] = 2.0;
+  } else if ( N == 2 ) {
+    W[0] = 1.0;
+    W[1] = 1.0;
+  } else if ( N == 3 ) {
+    W[0] = 5.0 / 9.0;
+    W[1] = 8.0 / 9.0;
+    W[2] = 5.0 / 9.0;
+  } else if ( N == 4 ) {
+    W[0] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+    W[1] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[2] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[3] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+  } else if ( N == 5 ) {
+    W[0] = ( 322.0 - 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[1] = ( 322.0 + 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[2] = 128.0 / 225.0;
+    W[3] = ( 322.0 + 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[4] = ( 322.0 - 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+  } else {
+    assert( false && "Unsupported quadrature order" );
+  }
+} 
+
+void iso_map( const double* coord1, const double* coord2, double xi, double* mapped_coord )
+{
+  double N1 = 0.5 - xi;
+  double N2 = 0.5 + xi;
+  mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+  mapped_coord[1] = N1 * coord1[1] + N2 * coord2[1];
+}
+
+inline void endpoints( const MeshData::Viewer& mesh, int elem_id, double P0[2], double P1[2] )
+{
+  double P0_P1[4];
+  mesh.getFaceCoords( elem_id, P0_P1 );
+  P0[0] = P0_P1[0];
+  P0[1] = P0_P1[1];
+  P1[0] = P0_P1[2];
+  P1[1] = P0_P1[3];
+}
+
+void find_intersection( const double* A0, const double* A1, const double* p, const double* nB, double* intersection )
+{
+  const double tA[2] = { A1[0] - A0[0], A1[1] - A0[1] };
+  const double d[2] = { p[0] - A0[0], p[1] - A0[1] };
+
+  const double nlen = std::sqrt( nB[0] * nB[0] + nB[1] * nB[1] );
+  if ( nlen < 1e-14 ) {
+    intersection[0] = p[0];
+    intersection[1] = p[1];
+    return;
+  }
+  const double n[2] = { nB[0] / nlen, nB[1] / nlen };
+
+  const double det = tA[0] * n[1] - tA[1] * n[0];
+
+  if ( std::abs( det ) < 1e-12 ) {
+    intersection[0] = p[0];
+    intersection[1] = p[1];
+    return;
+  }
+
+  const double inv_det = 1.0 / det;
+  double alpha = ( d[0] * n[1] - d[1] * n[0] ) * inv_det;
+
+  intersection[0] = A0[0] + alpha * tA[0];
+  intersection[1] = A0[1] + alpha * tA[1];
+}
+
+void get_projections( const double* A0, const double* A1, const double* B0, const double* B1, double* projections )
+{
+  double nB[2] = { 0.0, 0.0 };
+  find_normal( B0, B1, nB );
+
+  const double dxA = A1[0] - A0[0];
+  const double dyA = A1[1] - A0[1];
+  const double len2A = dxA * dxA + dyA * dyA;
+
+
+  const double* B_endpoints[2] = { B0, B1 };
+
+  double xi0 = 0.0, xi1 = 0.0;
+  for ( int i = 0; i < 2; ++i ) {
+    double q[2] = { 0.0, 0.0 };
+    find_intersection( A0, A1, B_endpoints[i], nB, q );
+
+    const double alphaA = ( ( q[0] - A0[0] ) * dxA + ( q[1] - A0[1] ) * dyA ) / len2A;
+    const double xiA = alphaA - 0.5;
+
+    if ( i == 0 )
+      xi0 = xiA;
+    else
+      xi1 = xiA;
+  }
+
+  double xi_min = std::min( xi0, xi1 );
+  double xi_max = std::max( xi0, xi1 );
+
+  projections[0] = xi_min;
+  projections[1] = xi_max;
+}
+
+void gtilde_kernel( const double* x, Gparams* gp, double* g_tilde_out, double* A_out )
+{
+  const double A0[2] = { x[0], x[1] };
+  const double A1[2] = { x[2], x[3] };
+  const double B0[2] = { x[4], x[5] };
+  const double B1[2] = { x[6], x[7] };
+
+  const double J = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  const double J_ref = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  double nB[2];
+  find_normal( B0, B1, nB );
+
+  double nA[2];
+  find_normal( A0, A1, nA );
+  double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  double eta = ( dot < 0 ) ? dot : 0.0;
+
+  double g1 = 0.0, g2 = 0.0;
+  double AI_1 = 0.0, AI_2 = 0.0;
+
+  for ( int i = 0; i < gp->N; ++i ) {
+    const double xiA = gp->qp[i];
+    const double w = gp->w[i];
+
+    const double N1 = 0.5 - xiA;
+    const double N2 = 0.5 + xiA;
+
+    // x1 on segment A
+    double x1[2];
+    iso_map( A0, A1, xiA, x1 );
+
+    double x2[2];
+    find_intersection( B0, B1, x1, nB, x2 );
+
+    const double dx = x1[0] - x2[0];
+    const double dy = x1[1] - x2[1];
+
+    // lagged normal on B
+    const double gn = -( dx * nB[0] + dy * nB[1] );
+    const double g = gn * eta;
+
+    g1 += w * N1 * g * J;
+    g2 += w * N2 * g * J;
+
+    AI_1 += w * N1 * J_ref;
+    AI_2 += w * N2 * J_ref;
+  }
+
+  g_tilde_out[0] = g1;
+  g_tilde_out[1] = g2;
+
+  A_out[0] = AI_1;
+  A_out[1] = AI_2;
+}
+
+//**************************************** */
+// Enzyme functions for constant quadrature:
+
+void gtilde_kernel_quad( const double* x, const Gparams* gp, double* g_tilde_out, double* A_out )
+{
+  const double A0[2] = { x[0], x[1] };
+  const double A1[2] = { x[2], x[3] };
+  const double B0[2] = { x[4], x[5] };
+  const double B1[2] = { x[6], x[7] };
+
+  const double J = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  const double J_ref = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  double nB[2];
+  find_normal( B0, B1, nB );
+
+  double nA[2];
+  find_normal( A0, A1, nA );
+  double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  double eta = ( dot < 0 ) ? dot : 0.0;
+
+  double g1 = 0.0, g2 = 0.0;
+  double AI_1 = 0.0, AI_2 = 0.0;
+
+  for ( int i = 0; i < gp->N; ++i ) {
+    const double xiA = gp->qp[i];
+    const double w = gp->w[i];
+
+    const double N1 = 0.5 - xiA;
+    const double N2 = 0.5 + xiA;
+
+    // x1 on segment A
+    double x1[2];
+    iso_map( A0, A1, xiA, x1 );
+
+    double x2[2];
+    find_intersection( B0, B1, x1, nB, x2 );
+
+    const double dx = x1[0] - x2[0];
+    const double dy = x1[1] - x2[1];
+
+    // lagged normal on B
+    const double gn = -( dx * nB[0] + dy * nB[1] );
+    const double g = gn * eta;
+
+    g1 += w * N1 * g * J;
+    g2 += w * N2 * g * J;
+
+    AI_1 += w * N1 * J_ref;
+    AI_2 += w * N2 * J_ref;
+  }
+
+  g_tilde_out[0] = g1;
+  g_tilde_out[1] = g2;
+
+  A_out[0] = AI_1;
+  A_out[1] = AI_2;
+}
+
+
+
+enum class KernelOutput { GTILDE1, GTILDE2, A1, A2 };
+
+template <KernelOutput Output>
+static void kernel_out( const double* x, const void* gp_void, double* out) {
+  const Gparams* gp = static_cast<const Gparams*>( gp_void );
+  double gt[2];
+  double A_out[2];
+  gtilde_kernel_quad( x, gp, gt, A_out);
+  if constexpr ( Output == KernelOutput::GTILDE1) *out = gt[0];
+  else if constexpr ( Output == KernelOutput::GTILDE2 ) *out = gt[1];
+  else if constexpr ( Output == KernelOutput::A1 ) *out = A_out[0];
+  else if constexpr ( Output == KernelOutput::A2 ) *out = A_out[1];
+}
+
+template <KernelOutput Output>
+void grad_kernel( const double* x, const Gparams* gp, double* dout_du) {
+  double dx[8] = {0.0};
+  double out = 0.0;
+  double dout = 1.0;
+
+  __enzyme_autodiff<void>( (void*)kernel_out<Output>, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout );
+
+  for ( int i = 0; i < 8; ++i ) dout_du[i] = dx[i];
+}
+
+
+
+//**************************************** */
+// Enzyme functions for varying quadrature:
+
+template <KernelOutput Output> 
+static void kernel_out_enzyme( const double* x, double* out) {
+  double A0[2], A1[2], B0[2], B1[2];
+  A0[0] = x[0];
+  A0[1] = x[1];
+  A1[0] = x[2];
+  A1[1] = x[3];
+  B0[0] = x[4];
+  B0[1] = x[5];
+  B1[0] = x[6];
+  B1[1] = x[7];
+
+
+  double projs[2] = { 0 };
+  get_projections( A0, A1, B0, B1, projs );
+  std::array<double, 2> projections = { projs[0], projs[1] };
+
+  auto bounds = ContactSmoothing::bounds_from_projections( projections, smoother.get_del() );
+  auto xi_bounds = ContactSmoothing::smooth_bounds( bounds, smoother.get_del() );
+
+  auto qp = ContactEvaluator::compute_quadrature( xi_bounds );
+
+  const int N = static_cast<int>( qp.qp.size() );
+
+  Gparams gp;
+  gp.N = N;
+  gp.qp = qp.qp.data();
+  gp.w = qp.w.data();
+  gp.x2 = nullptr;
+
+  double gt[2];
+  double A_out[2];
+  gtilde_kernel( x, &gp, gt, A_out );
+
+  if constexpr ( Output == KernelOutput::GTILDE1 ) *out = gt[0];
+  else if constexpr ( Output == KernelOutput::GTILDE2 ) *out = gt[1];
+  else if constexpr ( Output == KernelOutput::A1 )     *out = A_out[0];
+  else if constexpr ( Output == KernelOutput::A2 )     *out = A_out[1];
+
+}
+
+template <KernelOutput Output>
+void grad_kernel_enzyme( const double* x, double* dout_du )
+{
+  double dx[8] = { 0.0 };
+  double out = 0.0;
+  double dout = 1.0;
+
+  __enzyme_autodiff<void>( (void*)kernel_out_enzyme<Output>, enzyme_dup, x, dx, enzyme_dup, &out, &dout );
+
+  for ( int i = 0; i < 8; ++i ) {
+    dout_du[i] = dx[i];
+  } 
+}
+
+
+template <KernelOutput Output>
+void d2_kernel( const double* x, double* H )
+{
+  for ( int col = 0; col < 8; ++col ) {
+    double dx[8] = { 0.0 };
+    dx[col] = 1.0;
+
+    double grad[8] = { 0.0 };
+    double dgrad[8] = { 0.0 };
+
+    __enzyme_fwddiff<void>( (void*)grad_kernel_enzyme<Output>, enzyme_dup, x, dx, enzyme_dup, grad, dgrad );
+
+    for ( int row = 0; row < 8; ++row ) H[row * 8 + col] = dgrad[row];
+  }
+}
+
+
+
+template <KernelOutput Output> 
+void d2_kernel_quad( const double* x, const Gparams* gp, double* H ) 
+{
+  for ( int col = 0; col < 8; ++col) {
+    double dx[8] = {0.0};
+    dx[col] = 1.0;
+    double grad[8] = {0.0};
+    double dgrad[8] = {0.0};
+
+    __enzyme_fwddiff<void>( (void*)grad_kernel<Output>, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad  );
+    for ( int row = 0; row < 8; ++row ) H[row * 8 + col] = dgrad[row];
+  }
+}
+
+
+}  // namespace
+
+
+
+
+Gparams ContactEvaluator::construct_gparams(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
+
+      double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+    double nB[2] = { 0.0 };
+  find_normal( B0, B1, nB );
+
+    auto projs = eval.projections( pair, mesh1, mesh2 );
+    auto bounds = smoother.bounds_from_projections( projs, smoother.get_del() );
+    auto smooth_bounds = smoother.smooth_bounds( bounds, smoother.get_del() );
+
+    auto qp = eval.compute_quadrature( smooth_bounds );
+
+    const int N = static_cast<int>( qp.qp.size() );
+
+    std::vector<double> x2( 2 * N );
+
+    for ( int i = 0; i < N; ++i ) {
+      double x1[2] = { 0.0 };
+      iso_map( A0, A1, qp.qp[i], x1 );
+      double x2_i[2] = { 0.0 };
+      find_intersection( B0, B1, x1, nB, x2_i );
+      x2[2 * i] = x2_i[0];
+      x2[2 * i + 1] = x2_i[1];
+    }
+
+    Gparams gp;
+    gp.N = N;
+    gp.qp = qp.qp.data();
+    gp.w = qp.w.data();
+    gp.x2 = x2.data();
+
+    return gp;
+}
+
+
+std::array<double, 2> ContactEvaluator::projections( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                     const MeshData::Viewer& mesh2 ) const
+{
+  double A0[2];
+  double A1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  double B0[2];
+  double B1[2];
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double projs[2];
+  get_projections( A0, A1, B0, B1, projs );
+
+  return { projs[0], projs[1] };
+}
+
+std::array<double, 2> ContactSmoothing::bounds_from_projections( const std::array<double, 2>& proj, double del )
+{
+  double xi_min = std::min( proj[0], proj[1] );
+  double xi_max = std::max( proj[0], proj[1] );
+
+  if ( xi_max < -0.5 - del ) {
+    xi_max = -0.5 - del;
+  }
+  if ( xi_min > 0.5 + del ) {
+    xi_min = 0.5 + del;
+  }
+  if ( xi_min < -0.5 - del ) {
+    xi_min = -0.5 - del;
+  }
+  if ( xi_max > 0.5 + del ) {
+    xi_max = 0.5 + del;
+  }
+
+  return { xi_min, xi_max };
+}
+
+std::array<double, 2> ContactSmoothing::smooth_bounds( const std::array<double, 2>& bounds, double del )
+{
+  std::array<double, 2> smooth_bounds;
+  for ( int i = 0; i < 2; ++i ) {
+    double xi = 0.0;
+    double xi_hat = 0.0;
+    xi = bounds[i] + 0.5;
+    if ( del == 0.0 ) {
+      xi_hat = xi;
+    } else {
+      if ( 0.0 - del <= xi && xi <= del ) {
+        xi_hat = ( 1.0 / ( 4 * del ) ) * ( xi * xi ) + 0.5 * xi + del / 4.0;
+      } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 + del ) {
+
+        double b = -1.0 / ( 4.0 * del );
+        double c = 0.5 + 1.0 / ( 2.0 * del );
+        double d = 1.0 - del + ( 1.0 / ( 4.0 * del ) ) * pow( 1.0 - del, 2 ) - 0.5 * ( 1.0 - del ) -
+                   ( 1.0 - del ) / ( 2.0 * del );
+
+        xi_hat = b * xi * xi + c * xi + d;
+      } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
+        xi_hat = xi;
+      }
+    }
+    smooth_bounds[i] = xi_hat - 0.5;
+  }
+
+  return smooth_bounds;
+}
+
+QuadPoints ContactEvaluator::compute_quadrature( const std::array<double, 2>& xi_bounds )
+{
+  const int N = 3;
+  QuadPoints out;
+
+  std::array<double, 3> qpoints;
+  std::array<double, 3> weights;
+
+  determine_legendre_nodes( N, qpoints );
+  determine_legendre_weights( N, weights );
+
+  const double xi_min = xi_bounds[0];
+  const double xi_max = xi_bounds[1];
+  const double J = 0.5 * ( xi_max - xi_min );
+
+  for ( int i = 0; i < N; ++i ) {
+    out.qp[i] = 0.5 * ( xi_max - xi_min ) * qpoints[i] + 0.5 * ( xi_max + xi_min );
+    out.w[i] = weights[i] * J;
+  }
+
+  return out;
+}
+
+double ContactEvaluator::gap( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                              double xiA ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double nA[2] = { 0.0 };
+  double nB[2] = { 0.0 };
+  find_normal( A0, A1, nA );
+  find_normal( B0, B1, nB );
+
+  double x1[2] = { 0.0 };
+  iso_map( A0, A1, xiA, x1 );
+
+  double x2[2] = { 0.0 };
+  find_intersection( B0, B1, x1, nB, x2 );
+
+  double dx = x1[0] - x2[0];
+  double dy = x1[1] - x2[1];
+
+  double gn = -( dx * nB[0] + dy * nB[1] );  // signed normal gap
+  double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  double eta = ( dot < 0 ) ? dot : 0.0;
+
+  return gn * eta;
+}
+
+NodalContactData ContactEvaluator::compute_nodal_contact_data( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                               const MeshData::Viewer& mesh2 ) const
+{
+  double A0[2];
+  double A1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+
+  double J = std::sqrt( ( std::pow( ( A1[0] - A0[0] ), 2 ) + std::pow( ( A1[1] - A0[1] ), 2 ) ) );
+  double J_ref = std::sqrt( std::pow( A1[0] - A0[0], 2 ) + std::pow( A1[1] - A0[1], 2 ) );
+
+  auto projs = projections( pair, mesh1, mesh2 );
+
+  auto bounds = smoother_.bounds_from_projections( projs, smoother.get_del() );
+  auto smooth_bounds = smoother_.smooth_bounds( bounds, smoother.get_del() );
+
+  auto qp = compute_quadrature( smooth_bounds );
+
+
+  double g_tilde1 = 0.0;
+  double g_tilde2 = 0.0;
+  double AI_1 = 0.0;
+  double AI_2 = 0.0;
+
+  for ( size_t i = 0; i < qp.qp.size(); ++i ) {
+    double xiA = qp.qp[i];
+    double w = qp.w[i];
+    double N1 = 0.5 - xiA;
+    double N2 = 0.5 + xiA;
+    double gn = gap( pair, mesh1, mesh2, xiA );
+    double gn_active = gn;
+
+
+    g_tilde1 += w * N1 * gn_active * J;
+    g_tilde2 += w * N2 * gn_active * J;
+
+    AI_1 += w * N1 * J_ref;
+    AI_2 += w * N2 * J_ref;
+  }
+
+  NodalContactData contact_data;
+
+  contact_data.AI = { AI_1, AI_2 };
+  contact_data.g_tilde = { g_tilde1, g_tilde2 };
+
+  return contact_data;
+}
+
+std::array<double, 2> ContactEvaluator::compute_pressures( const NodalContactData& ncd ) const
+{
+  double gt1 = ncd.g_tilde[0];
+  double gt2 = ncd.g_tilde[1];
+
+  double A1 = ncd.AI[0];
+  double A2 = ncd.AI[1];
+
+  double g1 = gt1 / A1;
+  double g2 = gt2 / A2;
+
+  // //KKT Conditons
+  double p1 = ( g1 < 0.0 ) ? p_.k * g1 : 0.0;
+  double p2 = ( g2 < 0.0 ) ? p_.k * g2 : 0.0;
+  std::array<double, 2> pressures;
+
+  pressures = { p1, p2 };
+
+  for ( int i = 0; i < 2; ++i ) {
+    if ( ncd.AI[i] < 1e-12 ) {
+      pressures[i] = 0.0;
+    }
+  }
+
+  return pressures;
+}
+
+double ContactEvaluator::compute_contact_energy( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                 const MeshData::Viewer& mesh2 ) const
+{
+  NodalContactData contact_data;
+  contact_data = compute_nodal_contact_data( pair, mesh1, mesh2 );
+
+  std::array<double, 2> pressures;
+  pressures = compute_pressures( contact_data );
+
+  double contact_energy = pressures[0] * contact_data.g_tilde[0] + pressures[1] * contact_data.g_tilde[1];
+  return contact_energy;
+}
+
+void ContactEvaluator::gtilde_and_area( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                        const MeshData::Viewer& mesh2, double gtilde[2], double area[2] ) const
+{
+  auto ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+  gtilde[0] = ncd.g_tilde[0];
+  gtilde[1] = ncd.g_tilde[1];
+  area[0] = ncd.AI[0];
+  area[1] = ncd.AI[1];
+}
+
+void ContactEvaluator::grad_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                    const MeshData::Viewer& mesh2, double dgt1_dx[8], double dgt2_dx[8] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
+
+
+  double dg1_du[8] = { 0.0 };
+  double dg2_du[8] = { 0.0 };
+
+  if ( !p_.enzyme_quadrature ) {
+
+    Gparams gp = construct_gparams(pair, mesh1, mesh2);
+    grad_kernel<KernelOutput::GTILDE1>( x, &gp, dg1_du );
+    grad_kernel<KernelOutput::GTILDE2>(  x, &gp, dg2_du);
+
+  } else {
+    grad_kernel_enzyme<KernelOutput::GTILDE1>(x, dg1_du);
+    grad_kernel_enzyme<KernelOutput::GTILDE2>(x, dg2_du);
+  }
+
+  for ( int i = 0; i < 8; ++i ) {
+    dgt1_dx[i] = dg1_du[i];
+    dgt2_dx[i] = dg2_du[i];
+  }
+}
+
+void ContactEvaluator::grad_trib_area( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                       const MeshData::Viewer& mesh2, double dA1_dx[8], double dA2_dx[8] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
+
+  if ( !p_.enzyme_quadrature ) {
+
+    Gparams gp = construct_gparams(pair, mesh1, mesh2);
+    grad_kernel<KernelOutput::A1>( x, &gp, dA1_dx);
+    grad_kernel<KernelOutput::A2>( x, &gp, dA2_dx);
+  } else {
+
+    grad_kernel_enzyme<KernelOutput::A1>(x, dA1_dx);
+    grad_kernel_enzyme<KernelOutput::A2>(x, dA2_dx);
+  }
+}
+
+std::array<double, 8> ContactEvaluator::compute_contact_forces( const InterfacePair& pair,
+                                                                const MeshData::Viewer& mesh1,
+                                                                const MeshData::Viewer& mesh2 ) const
+{
+  double dg_tilde1[8] = { 0.0 };
+  double dg_tilde2[8] = { 0.0 };
+  double dA1[8] = { 0.0 };
+  double dA2[8] = { 0.0 };
+  std::array<double*, 2> dg_t;
+  std::array<double*, 2> dA_I;
+  dg_t = { dg_tilde1, dg_tilde2 };
+  dA_I = { dA1, dA2 };
+
+  grad_gtilde( pair, mesh1, mesh2, dg_tilde1, dg_tilde2 );
+  grad_trib_area( pair, mesh1, mesh2, dA1, dA2 );
+
+  NodalContactData ncd;
+  ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+
+  std::array<double, 2> pressures;
+  pressures = compute_pressures( ncd );
+
+  std::array<double, 8> f = { 0.0 };
+
+  for ( int i = 0; i < 8; ++i ) {
+    for ( int j = 0; j < 2; ++j ) {
+      double g = 0.0;
+      g = ncd.g_tilde[j] / ncd.AI[j];
+      if ( ncd.AI[j] < 1e-12 ) {
+        g = 0.0;
+      }
+      f[i] += ( 2 * pressures[j] * dg_t[j][i] - pressures[j] * g * dA_I[j][i] );
+    }
+  }
+  return f;
+}
+
+void ContactEvaluator::d2_g2tilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                   const MeshData::Viewer& mesh2, double H1[64], double H2[64] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
+
+  double d2g1_d2u[64] = { 0.0 };
+  double d2g2_d2u[64] = { 0.0 };
+
+  if ( !p_.enzyme_quadrature ) {
+    Gparams gp = construct_gparams(pair, mesh1, mesh2);
+
+    d2_kernel_quad<KernelOutput::GTILDE1>(x,&gp,d2g1_d2u);
+    d2_kernel_quad<KernelOutput::GTILDE2>(x,&gp,d2g2_d2u);
+
+  } 
+  else{
+    d2_kernel<KernelOutput::GTILDE1>( x, d2g1_d2u);
+    d2_kernel<KernelOutput::GTILDE2>( x, d2g2_d2u);
+  }
+
+  for ( int i = 0; i < 64; ++i ) {
+    H1[i] = d2g1_d2u[i];
+    H2[i] = d2g2_d2u[i];
+  }
+}
+
+void ContactEvaluator::compute_d2A_d2u( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                        const MeshData::Viewer& mesh2, double d2A1[64], double d2A2[64] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+
+  double nB[2], nA[2];
+
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
+
+  double d2A1_d2u[64] = { 0.0 };
+  double d2A2_d2u[64] = { 0.0 };
+
+    if ( !p_.enzyme_quadrature ) {
+    Gparams gp = construct_gparams(pair, mesh1, mesh2);
+    d2_kernel_quad<KernelOutput::A1>(x,&gp,d2A1_d2u);
+    d2_kernel_quad<KernelOutput::A2>(x,&gp,d2A2_d2u);
+  } 
+    else {
+  d2_kernel<KernelOutput::A1>( x, d2A1_d2u);
+  d2_kernel<KernelOutput::A2>( x, d2A2_d2u);
+  }
+  
+
+  for ( int i = 0; i < 64; ++i ) {
+    d2A1[i] = d2A1_d2u[i];
+    d2A2[i] = d2A2_d2u[i];
+  }
+}
+
+std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix( const InterfacePair& pair,
+                                                                                 const MeshData::Viewer& mesh1,
+                                                                                 const MeshData::Viewer& mesh2 ) const
+{
+  NodalContactData ncd;
+  ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+
+  std::array<double, 2> gI;
+  for ( int i = 0; i < 2; ++i ) {
+    gI[i] = ncd.g_tilde[i] / ncd.AI[i];
+  }
+
+  double dg_tilde1[8], dg_tilde2[8], dAI1[8], dAI2[8];
+
+  grad_gtilde( pair, mesh1, mesh2, dg_tilde1, dg_tilde2 );
+  grad_trib_area( pair, mesh1, mesh2, dAI1, dAI2 );
+
+  double d2_gtilde1[64], d2_gtilde2[64], d2_dA1[64], d2_dA2[64];
+
+  d2_g2tilde( pair, mesh1, mesh2, d2_gtilde1, d2_gtilde2 );
+  compute_d2A_d2u( pair, mesh1, mesh2, d2_dA1, d2_dA2 );
+
+  std::array<double*, 2> dg_t = { dg_tilde1, dg_tilde2 };
+  std::array<double*, 2> dA = { dAI1, dAI2 };
+
+  std::array<double*, 2> ddg_t = { d2_gtilde1, d2_gtilde2 };
+  std::array<double*, 2> ddA = { d2_dA1, d2_dA2 };
+
+  std::array<std::array<double, 8>, 8> K_mat = { { { 0.0 } } };
+
+  for ( int i = 0; i < 2; ++i ) {
+    for ( int k = 0; k < 8; ++k ) {
+      for ( int j = 0; j < 8; ++j ) {
+        // term 1:
+        K_mat[k][j] += p_.k * ( 2 / ncd.AI[i] ) * dg_t[i][k] * dg_t[i][j];
+
+        // term2:
+        K_mat[k][j] += -p_.k * ( 2 * gI[i] / ncd.AI[i] ) * dg_t[i][k] * dA[i][j];
+
+        // term3:
+        K_mat[k][j] += -p_.k * ( 2 * gI[i] / ncd.AI[i] ) * dA[i][k] * dg_t[i][j];
+
+        // term 4:
+        K_mat[k][j] += p_.k * ( 2 * gI[i] * gI[i] / ncd.AI[i] ) * dA[i][k] * dA[i][j];
+
+        // term 5;
+        K_mat[k][j] += p_.k * 2.0 * gI[i] * ddg_t[i][k * 8 + j];
+
+        // term 6:
+        K_mat[k][j] += -p_.k * gI[i] * gI[i] * ddA[i][k * 8 + j];
+
+        if ( ncd.AI[i] < 1e-12 ) {
+          K_mat[k][j] = 0.0;
+        }
+      }
+    }
+  }
+  return K_mat;
+}
+
+#endif  // TRIBOL_USE_ENZYME
+
+}  // namespace tribol
diff --git a/src/tribol/physics/EnergyMortar.hpp b/src/tribol/physics/EnergyMortar.hpp
new file mode 100644
index 00000000..125bf0da
--- /dev/null
+++ b/src/tribol/physics/EnergyMortar.hpp
@@ -0,0 +1,156 @@
+#pragma once
+#include <vector>
+#include <array>
+
+#include "tribol/config.hpp"
+
+#include "tribol/mesh/InterfacePairs.hpp"
+#include "tribol/mesh/MeshData.hpp"
+
+namespace tribol {
+
+#ifdef TRIBOL_USE_ENZYME
+
+struct Node {
+  double x, y;
+  int id;
+};
+
+struct Element {
+  int id;
+  std::array<int, 2> node_ids;
+};
+
+struct Mesh {
+  std::vector<Node> nodes;
+  std::vector<Element> elements;
+
+  const Node& node( int i ) const { return nodes[i]; }
+  Node& node( int i ) { return nodes[i]; }
+};
+
+struct QuadPoints {
+  std::array<double, 3> qp;
+  std::array<double, 3> w;  // weights
+};
+
+struct ContactParams {
+  double del;
+  double k;
+  int N;
+  bool enzyme_quadrature;
+};
+
+struct NodalContactData {
+  std::array<double, 2> AI;
+  std::array<double, 2> g_tilde;
+};
+
+struct FDResult {
+  std::array<double, 2> dgt;
+};
+
+struct FiniteDiffResult {
+  std::vector<double> fd_gradient_g1;
+  std::vector<double> fd_gradient_g2;
+  std::vector<double> analytical_gradient_g1;
+  std::vector<double> analytical_gradient_g2;
+  std::vector<int> node_ids;
+  double g_tilde1_baseline;
+  double g_tilde2_baseline;
+};
+
+struct Gparams {
+  int N;
+  const double* qp; 
+  const double* w;
+  const double* x2;
+};
+
+class ContactSmoothing {
+ public:
+  explicit ContactSmoothing( const ContactParams& p ) : p_( p ) {}  // Constructor
+
+  double get_del() const { return p_.del; }
+
+  static std::array<double, 2> bounds_from_projections( const std::array<double, 2>& proj, double del );
+
+  static std::array<double, 2> smooth_bounds( const std::array<double, 2>& bounds, double del );
+
+ private: 
+  ContactParams p_;
+};
+
+class ContactEvaluator {
+ public:
+  explicit ContactEvaluator( const ContactParams& p )
+      : p_( p ), smoother_( p ) {}  // constructor - copies params into the object
+
+  double compute_contact_energy( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                 const MeshData::Viewer& mesh2 ) const;
+
+  static QuadPoints compute_quadrature( const std::array<double, 2>& xi_bounds );
+
+  void gtilde_and_area( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                        double gtilde[2], double area[2] ) const;
+
+  void grad_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                    double dgt1_dx[8], double dgt2_dx[8] ) const;
+
+  void grad_trib_area( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                       double dA1_dx[8], double dA2_dx[8] ) const;
+
+  void d2_g2tilde( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                   double dgt1_dx[64], double dgt2_dx[64] ) const;
+
+  void compute_d2A_d2u( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                        double dgt1_dx[64], double dgt2_dx[64] ) const;
+
+  std::pair<double, double> eval_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                         const MeshData::Viewer& mesh2 ) const;
+
+  FiniteDiffResult validate_g_tilde( const InterfacePair& pair, MeshData& mesh1, MeshData& mesh2,
+                                     double epsilon = 1e-7 ) const;
+
+  std::pair<double, double> eval_gtilde_fixed_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                  const MeshData::Viewer& mesh2, const QuadPoints& qp_fixed ) const;
+
+  FiniteDiffResult validate_hessian( const InterfacePair& pair, MeshData& mesh1, MeshData& mesh2,
+                                     double epsilon = 1e-7 ) const;
+
+ private:
+  ContactParams p_;
+  ContactSmoothing smoother_;
+
+  Gparams construct_gparams( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                           const MeshData::Viewer& mesh2 ) const;
+
+
+  std::array<double, 8> compute_contact_forces( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                const MeshData::Viewer& mesh2 ) const;
+
+  std::array<std::array<double, 8>, 8> compute_stiffness_matrix( const InterfacePair& pair,
+                                                                 const MeshData::Viewer& mesh1,
+                                                                 const MeshData::Viewer& mesh2 ) const;
+
+    std::array<double, 2> projections( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                     const MeshData::Viewer& mesh2 ) const;
+
+
+
+  void grad_gtilde_with_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                            const QuadPoints& qp_fixed, double dgt1_dx[8], double dgt2_dx[8] ) const;
+
+
+  double gap( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+              double xiA ) const;
+
+  NodalContactData compute_nodal_contact_data( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                               const MeshData::Viewer& mesh2 ) const;
+
+  std::array<double, 2> compute_pressures( const NodalContactData& ncd ) const;
+};
+
+#endif  // TRIBOL_USE_ENZYME
+
+}  // namespace tribol
diff --git a/src/tribol/physics/EnergyMortarAdapter.cpp b/src/tribol/physics/EnergyMortarAdapter.cpp
new file mode 100644
index 00000000..bdc198f6
--- /dev/null
+++ b/src/tribol/physics/EnergyMortarAdapter.cpp
@@ -0,0 +1,407 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include "tribol/physics/EnergyMortarAdapter.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/mesh/MfemData.hpp"
+
+namespace tribol {
+
+#ifdef TRIBOL_USE_ENZYME
+
+EnergyMortarAdapter::EnergyMortarAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
+                                    MeshData& mesh2, double k, double delta, int N, bool enzyme_quadrature,
+                                    bool use_penalty_ )
+    // NOTE: mesh1 maps to mesh2_ and mesh2 maps to mesh1_. This is to keep consistent with mesh1_ being non-mortar and
+    // mesh2_ being mortar as is typical in the literature, but different from Tribol convention.
+    : submesh_data_( submesh_data ),
+      jac_data_( jac_data ),
+      mesh1_( mesh2 ),
+      mesh2_( mesh1 ),
+      use_penalty_( use_penalty_ )
+{
+  if ( mesh1.numberOfNodes() > 0 && mesh2.numberOfNodes() > 0 ) {
+    SLIC_ERROR_ROOT_IF( mesh1.spatialDimension() != 2 || mesh2.spatialDimension() != 2,
+                        "ENERGY_MORTAR requires 2D meshes." );
+  }
+
+  params_.k = k;
+  params_.del = delta;
+  params_.N = N;
+  params_.enzyme_quadrature = enzyme_quadrature;
+
+  evaluator_ = std::make_unique<ContactEvaluator>( params_ );
+}
+
+void EnergyMortarAdapter::setInterfacePairs( ArrayT<InterfacePair>&& pairs, int /*check_level*/ )
+{
+  // TODO: improved pair identification
+  pairs_ = std::move( pairs );
+}
+
+void EnergyMortarAdapter::updateIntegrationRule()
+{
+  // TODO: break out integration rule as a separate method
+}
+
+void EnergyMortarAdapter::updateNodalGaps()
+{
+  // NOTE: user should have called updateMfemParallelDecomposition() with updated coords before calling this
+
+  // Tribol level data structures for storing gap, area, and derivatives
+  auto& redecomp_gap = submesh_data_.GetRedecompGap();
+  mfem::GridFunction redecomp_area( redecomp_gap.FESpace() );
+  redecomp_area = 0.0;
+
+  JacobianContributions dg_tilde_dx_contribs( { { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::NONMORTAR },
+                                                { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::MORTAR } } );
+  JacobianContributions dA_dx_contribs( { { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::NONMORTAR },
+                                          { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::MORTAR } } );
+
+  dg_tilde_dx_contribs.reserve( pairs_.size(), 8 );
+  dA_dx_contribs.reserve( pairs_.size(), 8 );
+
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+
+  // Compute local contributions
+  for ( const auto& pair : pairs_ ) {
+    // These need to be flipped, since the pairs are determined with element 1 associated with mesh 1, and we flipped
+    // the mesh numbers to be consistent with the literature and since the underlying method integrates on element 1
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
+
+    double g_tilde_elem[2];
+    double A_elem[2];
+
+    evaluator_->gtilde_and_area( flipped_pair, mesh1_view, mesh2_view, g_tilde_elem, A_elem );
+
+    if ( A_elem[0] <= 0.0 && A_elem[1] <= 0.0 ) {
+      continue;
+    }
+
+    auto A_conn = mesh1_view.getConnectivity()( elem1 );
+
+    // Add to nodes of Element A
+    redecomp_gap( A_conn[0] ) += g_tilde_elem[0];
+    redecomp_gap( A_conn[1] ) += g_tilde_elem[1];
+
+    redecomp_area( A_conn[0] ) += A_elem[0];
+    redecomp_area( A_conn[1] ) += A_elem[1];
+
+    // compute g_tilde first derivative
+    double dg_dx_node1[8];
+    double dg_dx_node2[8];
+    // TODO: make grad_gtilde return directly in dg_tilde_dx_blocks format
+    evaluator_->grad_gtilde( flipped_pair, mesh1_view, mesh2_view, dg_dx_node1, dg_dx_node2 );
+    double dg_tilde_dx_blocks[2][8];
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dg_tilde_dx_blocks[0][i * 2] = dg_dx_node1[node_idx[i]];
+      dg_tilde_dx_blocks[0][i * 2 + 1] = dg_dx_node2[node_idx[i]];
+      dg_tilde_dx_blocks[1][i * 2] = dg_dx_node1[node_idx[i + 4]];
+      dg_tilde_dx_blocks[1][i * 2 + 1] = dg_dx_node2[node_idx[i + 4]];
+    }
+    dg_tilde_dx_contribs.push_back( 0, elem1, elem1, dg_tilde_dx_blocks[0], 8 );
+    dg_tilde_dx_contribs.push_back( 1, elem1, elem2, dg_tilde_dx_blocks[1], 8 );
+
+    double dA_dx_node1[8];
+    double dA_dx_node2[8];
+    // TODO: make grad_trib_area return directly in dA_dx_blocks format
+    evaluator_->grad_trib_area( flipped_pair, mesh1_view, mesh2_view, dA_dx_node1, dA_dx_node2 );
+    double dA_dx_blocks[2][8];
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dA_dx_blocks[0][i * 2] = dA_dx_node1[node_idx[i]];
+      dA_dx_blocks[0][i * 2 + 1] = dA_dx_node2[node_idx[i]];
+      dA_dx_blocks[1][i * 2] = dA_dx_node1[node_idx[i + 4]];
+      dA_dx_blocks[1][i * 2 + 1] = dA_dx_node2[node_idx[i + 4]];
+    }
+    dA_dx_contribs.push_back( 0, elem1, elem1, dA_dx_blocks[0], 8 );
+    dA_dx_contribs.push_back( 1, elem1, elem2, dA_dx_blocks[1], 8 );
+  }
+
+  // Move gap and area to submesh level vectors
+  mfem::ParLinearForm g_tilde_linear_form(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_data_.GetSubmeshGap( g_tilde_linear_form );
+  auto& P_submesh = *submesh_data_.GetSubmeshFESpace().GetProlongationMatrix();
+  g_tilde_vec_ = shared::ParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  g_tilde_vec_.Fill( 0.0 );
+  P_submesh.MultTranspose( g_tilde_linear_form, g_tilde_vec_.get() );
+
+  mfem::Array<int> rows_to_elim;
+  if ( !tied_contact_ && use_penalty_ ) {
+    rows_to_elim.Reserve( g_tilde_vec_.Size() );
+    for ( int i{ 0 }; i < g_tilde_vec_.Size(); ++i ) {
+      if ( g_tilde_vec_[i] > 0.0 ) {
+        g_tilde_vec_[i] = 0.0;
+        rows_to_elim.push_back( i );
+      }
+    }
+  }
+
+  mfem::ParLinearForm A_linear_form( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_data_.GetPressureTransfer().RedecompToSubmesh( redecomp_area, A_linear_form );
+  A_vec_ = shared::ParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  A_vec_.Fill( 0.0 );
+  P_submesh.MultTranspose( A_linear_form, A_vec_.get() );
+
+  gap_vec_ = g_tilde_vec_.divide( A_vec_, area_tol_ );
+
+  // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
+  dg_tilde_dx_ = jac_data_.GetMfemJacobian( dg_tilde_dx_contribs.get() );
+  if ( !tied_contact_ && use_penalty_ ) {
+    // technically, we should do this on all the vectors/matrices below, but it looks like the mutliplication operators
+    // below will zero them out anyway
+    dg_tilde_dx_.EliminateRows( rows_to_elim );
+  }
+
+  dA_dx_ = jac_data_.GetMfemJacobian( dA_dx_contribs.get() );
+}
+
+void EnergyMortarAdapter::updateNodalForces()
+{
+  // NOTE: user should have called updateNodalGaps() with updated coords before calling this
+
+  // compute nodal pressures. these are used in the Hessian vector product below so we don't have to assemble a Hessian
+  // NOTE: in general, pressure should likely be set by the host code
+
+  pressure_vec_ = params_.k * gap_vec_;
+
+  energy_ = pressure_vec_.dot( g_tilde_vec_ );
+
+  auto k_over_a = params_.k * A_vec_.inverse( area_tol_ );
+
+  auto p_over_a = pressure_vec_.divide( A_vec_, area_tol_ );
+
+  shared::ParSparseMat dp_dx( dg_tilde_dx_.get() );
+  dp_dx->ScaleRows( k_over_a.get() );
+  shared::ParSparseMat dp_dx_temp( dA_dx_.get() );
+  dp_dx_temp->ScaleRows( p_over_a.get() );
+  dp_dx -= dp_dx_temp;
+
+  force_vec_ = ( pressure_vec_ * dg_tilde_dx_ ) + ( g_tilde_vec_ * dp_dx );
+
+  JacobianContributions df_dx_contribs( { { BlockSpace::NONMORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::NONMORTAR, BlockSpace::MORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::MORTAR } } );
+
+  df_dx_contribs.reserve( pairs_.size(), 16 );
+
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
+
+  mfem::GridFunction redecomp_pressure( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_pressure(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_pressure.SetFromTrueDofs( pressure_vec_.get() );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_pressure, redecomp_pressure );
+
+  mfem::GridFunction redecomp_g_tilde( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_g_tilde(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_g_tilde.SetFromTrueDofs( g_tilde_vec_.get() );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_g_tilde, redecomp_g_tilde );
+
+  mfem::GridFunction redecomp_A( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_A( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_A.SetFromTrueDofs( A_vec_.get() );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_A, redecomp_A );
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+
+  // get pairwise action of second derivatives of gaps and pressure for stiffness contribution
+  for ( auto& pair : pairs_ ) {
+    // These need to be flipped, since the pairs are determined with element 1 associated with mesh 1, and we flipped
+    // the mesh numbers to be consistent with the literature and since the underlying method integrates on element 1
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
+    const auto node11 = mesh1_view.getConnectivity()( elem1, 0 );
+    const auto node12 = mesh1_view.getConnectivity()( elem1, 1 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
+
+    const RealT pressure1 = 2.0 * redecomp_pressure( node11 );
+    const RealT pressure2 = 2.0 * redecomp_pressure( node12 );
+
+    if ( pressure1 == 0.0 && pressure2 == 0.0 ) {
+      continue;
+    }
+
+    const RealT g_p_ainv1 = -redecomp_g_tilde( node11 ) * redecomp_pressure( node11 ) / redecomp_A( node11 );
+    const RealT g_p_ainv2 = -redecomp_g_tilde( node12 ) * redecomp_pressure( node12 ) / redecomp_A( node12 );
+
+    double d2g_dx2_node1[64];
+    double d2g_dx2_node2[64];
+    // ordering: [dg/(dx0dx0) dg/(dy0dx0) dg/(dx1dx0) ...]
+    evaluator_->d2_g2tilde( flipped_pair, mesh1_view, mesh2_view, d2g_dx2_node1, d2g_dx2_node2 );
+
+    double d2A_dx2_node1[64];
+    double d2A_dx2_node2[64];
+    evaluator_->compute_d2A_d2u( flipped_pair, mesh1_view, mesh2_view, d2A_dx2_node1, d2A_dx2_node2 );
+
+    double df_dx_blocks[2][2][16];
+    for ( int i{ 0 }; i < 2; ++i ) {
+      for ( int j{ 0 }; j < 2; ++j ) {
+        for ( int k{ 0 }; k < 4; ++k ) {
+          for ( int l{ 0 }; l < 4; ++l ) {
+            df_dx_blocks[i][j][l + k * 4] = pressure1 * d2g_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            pressure2 * d2g_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            g_p_ainv1 * d2A_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            g_p_ainv2 * d2A_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8];
+          }
+        }
+      }
+    }
+
+    df_dx_contribs.push_back( 0, elem1, elem1, df_dx_blocks[0][0], 16 );
+    df_dx_contribs.push_back( 1, elem1, elem2, df_dx_blocks[0][1], 16 );
+    df_dx_contribs.push_back( 2, elem2, elem1, df_dx_blocks[1][0], 16 );
+    df_dx_contribs.push_back( 3, elem2, elem2, df_dx_blocks[1][1], 16 );
+  }
+
+  // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
+  df_dx_ = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
+
+  auto pg2_over_asq = ( 2.0 * pressure_vec_ )
+                          .multiplyInPlace( g_tilde_vec_ )
+                          .divideInPlace( A_vec_, area_tol_ )
+                          .divideInPlace( A_vec_, area_tol_ );
+
+  auto& submesh_fes = submesh_data_.GetSubmeshFESpace();
+  auto p_over_a_diag = shared::ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
+                                                             submesh_fes.GetTrueDofOffsets(), p_over_a.get() );
+  auto pg2_over_asq_diag = shared::ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
+                                                                 submesh_fes.GetTrueDofOffsets(), pg2_over_asq.get() );
+
+  df_dx_ -= shared::ParSparseMat::RAP( dg_tilde_dx_, p_over_a_diag, dA_dx_ );
+  df_dx_ -= shared::ParSparseMat::RAP( dA_dx_, p_over_a_diag, dg_tilde_dx_ );
+  df_dx_ += shared::ParSparseMat::RAP( dA_dx_, pg2_over_asq_diag, dg_tilde_dx_ );
+  df_dx_ += dp_dx.transpose() * dg_tilde_dx_;
+  df_dx_ += dg_tilde_dx_.transpose() * dp_dx;
+}
+
+RealT EnergyMortarAdapter::computeTimeStep()
+{
+  // TODO: implement timestep calculation
+  return 1.0;
+}
+
+void EnergyMortarAdapter::compute_df_du_lagrange( const mfem::HypreParVector& lambda,
+                                               std::unique_ptr<mfem::HypreParMatrix>& df_du )
+{
+  // Convert Lambda to redecomp space for element wise access
+  mfem::GridFunction redecomp_lambda( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_lambda(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_lambda.SetFromTrueDofs( lambda );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_lambda, redecomp_lambda );
+
+  JacobianContributions df_dx_contribs( { { BlockSpace::NONMORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::NONMORTAR, BlockSpace::MORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::MORTAR } } );
+
+  df_dx_contribs.reserve( pairs_.size(), 16 );
+
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+  // Loop over element pairs and compute Hessian contributions
+  for ( auto& pair : pairs_ ) {
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
+    const auto node11 = mesh1_view.getConnectivity()( elem1, 0 );
+    const auto node12 = mesh1_view.getConnectivity()( elem1, 1 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
+
+    // Get lambda values at nodes (no factor of 2 in Lagrange mode)
+    const RealT lambda1 = redecomp_lambda( node11 );
+    const RealT lambda2 = redecomp_lambda( node12 );
+
+    // if ( lambda1 == 0.0 && lambda2 == 0.0 ) {
+    //   continue;
+    // }
+
+    // Compute Hessian of g̃
+    double d2g_dx2_node1[64];
+    double d2g_dx2_node2[64];
+    evaluator_->d2_g2tilde( flipped_pair, mesh1_view, mesh2_view, d2g_dx2_node1, d2g_dx2_node2 );
+
+    // Assemble df/du blocks: df/du = λ · d²g̃/du²
+    double df_dx_blocks[2][2][16];
+    for ( int i{ 0 }; i < 2; ++i ) {
+      for ( int j{ 0 }; j < 2; ++j ) {
+        for ( int k{ 0 }; k < 4; ++k ) {
+          for ( int l{ 0 }; l < 4; ++l ) {
+            df_dx_blocks[i][j][l + k * 4] = lambda1 * d2g_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            lambda2 * d2g_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8];
+          }
+        }
+      }
+    }
+
+    df_dx_contribs.push_back( 0, elem1, elem1, df_dx_blocks[0][0], 16 );
+    df_dx_contribs.push_back( 1, elem1, elem2, df_dx_blocks[0][1], 16 );
+    df_dx_contribs.push_back( 2, elem2, elem1, df_dx_blocks[1][0], 16 );
+    df_dx_contribs.push_back( 3, elem2, elem2, df_dx_blocks[1][1], 16 );
+  }
+
+  auto df_dx_temp = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
+
+  df_du = std::unique_ptr<mfem::HypreParMatrix>( df_dx_temp.release() );
+}
+
+void EnergyMortarAdapter::evaluateContactResidual( const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
+                                                mfem::HypreParVector& r_gap )
+{
+  SLIC_ERROR_ROOT_IF( use_penalty_, "evaluateContactResidual() should only be  called in lagrange multiplier mode" );
+
+  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0, "updateNodalGaps() must be called before evaluateContactResidual()" );
+
+  // Force residual = r_f = lambda * dg_tilde/du
+  dg_tilde_dx_->MultTranspose( lambda, r_force );
+
+  // gap residual
+  r_gap = g_tilde_vec_.get();
+}
+
+void EnergyMortarAdapter::evaluateContactJacobian( const mfem::HypreParVector& lambda,
+                                                std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                                                std::unique_ptr<mfem::HypreParMatrix>& df_dlambda )
+{
+  SLIC_ERROR_ROOT_IF( use_penalty_, "evaluateContactJacobian() should only be called in Lagrange multiplier mode" );
+
+  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0, "updateNodalGaps() must be called before evaluateContactJacobian()" );
+
+  // df/dlambda = dg_tilde/du:
+  df_dlambda = std::unique_ptr<mfem::HypreParMatrix>( dg_tilde_dx_.release() );
+
+  // df/du = lambda * d2g_tilde/du2
+  compute_df_du_lagrange( lambda, df_du );
+}
+
+std::unique_ptr<mfem::HypreParMatrix> EnergyMortarAdapter::getMfemDfDx() const
+{
+  return std::unique_ptr<mfem::HypreParMatrix>( df_dx_.release() );
+}
+
+std::unique_ptr<mfem::HypreParMatrix> EnergyMortarAdapter::getMfemDgDx() const
+{
+  return std::unique_ptr<mfem::HypreParMatrix>( dg_tilde_dx_.release() );
+}
+
+std::unique_ptr<mfem::HypreParMatrix> EnergyMortarAdapter::getMfemDfDp() const
+{
+  // SLIC_ERROR_ROOT( "EnergyMortar does not support getMfemDfDp()" );
+  return nullptr;
+}
+
+#endif  // TRIBOL_USE_ENZYME
+
+}  // namespace tribol
diff --git a/src/tribol/physics/EnergyMortarAdapter.hpp b/src/tribol/physics/EnergyMortarAdapter.hpp
new file mode 100644
index 00000000..eb08330c
--- /dev/null
+++ b/src/tribol/physics/EnergyMortarAdapter.hpp
@@ -0,0 +1,111 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_ENERGYMORTARADAPTER_HPP_
+#define SRC_TRIBOL_PHYSICS_ENERGYMORTARADAPTER_HPP_
+#include "tribol/config.hpp"
+
+#include "tribol/physics/ContactFormulation.hpp"
+#include "tribol/physics/EnergyMortar.hpp"
+#include "tribol/mesh/MfemData.hpp"
+#include "tribol/common/Parameters.hpp"
+
+#include "mfem.hpp"
+
+#include <memory>
+
+namespace tribol {
+
+#ifdef TRIBOL_USE_ENZYME
+
+class EnergyMortarAdapter : public ContactFormulation {
+ public:
+  /**
+   * @brief Constructor
+   *
+   * @param mfem_data Reference to Tribol's MFEM mesh data
+   * @param k Penalty stiffness
+   * @param delta Smoothing length
+   * @param N Quadrature order
+   */
+  EnergyMortarAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1, MeshData& mesh2,
+                    double k, double delta, int N, bool enzyme_quadrature, bool use_penalty_ = true );
+
+  virtual ~EnergyMortarAdapter() = default;
+
+  // --- ContactFormulation Interface Implementation ---
+
+  void setInterfacePairs( ArrayT<InterfacePair>&& pairs, int check_level ) override;
+
+  void updateIntegrationRule() override;
+
+  void updateNodalGaps() override;
+
+  void updateNodalForces() override;
+
+  RealT computeTimeStep() override;
+
+  RealT getEnergy() const override { return energy_; }
+
+#ifdef BUILD_REDECOMP
+  const mfem::HypreParVector& getMfemForce() const override { return force_vec_.get(); }
+
+  const mfem::HypreParVector& getMfemGap() const override { return gap_vec_.get(); }
+
+  mfem::HypreParVector& getMfemPressure() override { return pressure_vec_.get(); }
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const override;
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const override;
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const override;
+
+  void evaluateContactResidual( const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
+                                mfem::HypreParVector& r_gap ) override;
+
+  // Lagrange multiplier mode
+  void evaluateContactJacobian( const mfem::HypreParVector& lambda, std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                                std::unique_ptr<mfem::HypreParMatrix>& df_dlambda ) override;
+
+  void compute_df_du_lagrange( const mfem::HypreParVector& lambda, std::unique_ptr<mfem::HypreParMatrix>& df_du );
+
+#endif
+
+ private:
+  // --- Member Variables ---
+
+  bool use_penalty_;
+
+  double area_tol_{ 1.0e-14 };
+  bool tied_contact_ = false;
+
+  MfemSubmeshData& submesh_data_;
+  MfemJacobianData& jac_data_;
+  MeshData& mesh1_;
+  MeshData& mesh2_;
+  ContactParams params_;
+  std::unique_ptr<ContactEvaluator> evaluator_;
+
+  // Stored InterfacePairs
+  ArrayT<InterfacePair> pairs_;
+
+  // These store the assembled nodal values
+  shared::ParVector g_tilde_vec_;
+  shared::ParVector A_vec_;
+  shared::ParVector gap_vec_;
+  mutable shared::ParSparseMat dg_tilde_dx_;
+  shared::ParSparseMat dA_dx_;
+
+  shared::ParVector pressure_vec_;  // This holds p = k * g / A
+  RealT energy_;
+  shared::ParVector force_vec_;
+  mutable shared::ParSparseMat df_dx_;
+};
+
+#endif  // TRIBOL_USE_ENZYME
+
+}  // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_ENERGYMORTARADAPTER_HPP_ */
diff --git a/src/tribol/physics/MortarFormulation.cpp b/src/tribol/physics/MortarFormulation.cpp
new file mode 100644
index 00000000..9f43f14f
--- /dev/null
+++ b/src/tribol/physics/MortarFormulation.cpp
@@ -0,0 +1,82 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include "tribol/physics/MortarFormulation.hpp"
+
+namespace tribol {
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::setInterfacePairs(
+    ArrayT<InterfacePair>&& pairs, int check_level )
+{
+  integration_rule_.template findPairsInContact<PointwiseGapAndNormal>( std::move( pairs ), check_level );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::updateIntegrationRule()
+{
+  integration_rule_.template updateRule<PointwiseGapAndNormal>();
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::updateNodalGaps()
+{
+  force_and_gap_method_.template updateNodalGaps<PointwiseGapAndNormal>( integration_rule_.getRule() );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::updateNodalForces()
+{
+  force_and_gap_method_.template updateNodalForces<PointwiseGapAndNormal>( integration_rule_.getRule() );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+RealT MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::computeTimeStep()
+{
+  force_and_gap_method_.computeTimeStep( integration_rule_.getRule() );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemForce(
+    mfem::Vector& forces ) const
+{
+  force_and_gap_method_.getMfemForce( forces );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+void MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemGap(
+    mfem::Vector& gaps ) const
+{
+  force_and_gap_method_.getMfemGap( gaps );
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+mfem::ParGridFunction& MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemPressure()
+{
+  return force_and_gap_method_.getMfemPressure();
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+std::unique_ptr<mfem::HypreParMatrix>
+MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemDfDx() const
+{
+  return force_and_gap_method_.getMfemDfDx();
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+std::unique_ptr<mfem::HypreParMatrix>
+MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemDgDx() const
+{
+  return force_and_gap_method_.getMfemDgDx();
+}
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+std::unique_ptr<mfem::HypreParMatrix>
+MortarFormulation<IntegrationRule, PointwiseGapAndNormal, ForceAndGapMethod>::getMfemDfDp() const
+{
+  return force_and_gap_method_.getMfemDfDp();
+}
+
+}  // namespace tribol
\ No newline at end of file
diff --git a/src/tribol/physics/MortarFormulation.hpp b/src/tribol/physics/MortarFormulation.hpp
new file mode 100644
index 00000000..30c16c64
--- /dev/null
+++ b/src/tribol/physics/MortarFormulation.hpp
@@ -0,0 +1,127 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_MORTARFORMULATION_HPP_
+#define SRC_TRIBOL_PHYSICS_MORTARFORMULATION_HPP_
+
+#include "tribol/physics/ContactFormulation.hpp"
+
+namespace tribol {
+
+template <typename IntegrationRule, typename PointwiseGapAndNormal, typename ForceAndGapMethod>
+class MortarFormulation : public ContactFormulation {
+ public:
+  MortarFormulation( IntegrationRule&& integration_rule, PointwiseGapAndNormal&& pointwise_gap_and_normal,
+                     ForceAndGapMethod&& force_and_gap_method )
+      : integration_rule_( std::move( integration_rule ) ),
+        pointwise_gap_and_normal_( std::move( pointwise_gap_and_normal ) ),
+        force_and_gap_method_( std::move( force_and_gap_method ) )
+  {
+  }
+
+  /**
+   * @brief Sets the initial set of candidate interface pairs
+   *
+   * @param pairs View of the coarse-binned interface pairs
+   * @param check_level In general, higher values mean more checks and 0 means don't do checks. See
+   * IntegrationRule::findPairsInContact() for details.
+   */
+  void setInterfacePairs( ArrayT<InterfacePair>&& pairs, int check_level ) override;
+
+  /**
+   * @brief Updates the integration rule
+   *
+   * Determines overlapping contact pairs and computes necessary integration data (e.g. quadrature points, weights).
+   *
+   * @note Requires setInterfacePairs() to be called first.
+   */
+  void updateIntegrationRule() override;
+
+  /**
+   * @brief Updates nodal gaps
+   *
+   * @note Requires updateIntegrationRule() to be called first.
+   */
+  void updateNodalGaps() override;
+
+  /**
+   * @brief Updates nodal forces/residual
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  void updateNodalForces() override;
+
+  /**
+   * @brief Computes the maximum allowable timestep for the formulation
+   *
+   * @return maximum allowable timestep
+   */
+  RealT computeTimeStep() override;
+
+#ifdef BUILD_REDECOMP
+  /**
+   * @brief Adds computed forces to the provided MFEM vector
+   *
+   * @param [in,out] forces MFEM vector to add forces to
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  void getMfemForce( mfem::Vector& forces ) const override;
+
+  /**
+   * @brief Populates the provided MFEM vector with gap values
+   *
+   * Resizes the vector if necessary, zeros it out, and sets gap values.
+   *
+   * @param [out] gaps MFEM vector to store gaps in
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  void getMfemGap( mfem::Vector& gaps ) const override;
+
+  /**
+   * @brief Returns a reference to the MFEM pressure grid function
+   *
+   * @return mfem::ParGridFunction& Reference to the pressure grid function
+   */
+  mfem::ParGridFunction& getMfemPressure() override;
+
+  /**
+   * @brief Get the derivative of force with respect to displacement
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const = 0;
+
+  /**
+   * @brief Get the derivative of gap with respect to displacement
+   *
+   * @return Unique pointer to MFEM HypreParMatrix
+   *
+   * @note Requires updateNodalGaps() to be called first.
+   */
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const override;
+
+  /**
+   * @brief Get the derivative of force with respect to pressure
+   *
+   * @return Unique pointer to mfem::HypreParMatrix
+   *
+   * @note Requires updateNodalForces() to be called first.
+   */
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const override;
+#endif
+
+ private:
+  IntegrationRule integration_rule_;
+  PointwiseGapAndNormal pointwise_gap_and_normal_;
+  ForceAndGapMethod force_and_gap_method_;
+};
+
+}  // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_MORTARFORMULATION_HPP_ */
\ No newline at end of file
diff --git a/src/tribol/physics/NewMethodAdapter.cpp b/src/tribol/physics/NewMethodAdapter.cpp
new file mode 100644
index 00000000..cd5bcf6d
--- /dev/null
+++ b/src/tribol/physics/NewMethodAdapter.cpp
@@ -0,0 +1,407 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#include "tribol/physics/NewMethodAdapter.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/mesh/MfemData.hpp"
+
+namespace tribol {
+
+#ifdef TRIBOL_USE_ENZYME
+
+NewMethodAdapter::NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1,
+                                    MeshData& mesh2, double k, double delta, int N, bool enzyme_quadrature,
+                                    bool use_penalty_ )
+    // NOTE: mesh1 maps to mesh2_ and mesh2 maps to mesh1_. This is to keep consistent with mesh1_ being non-mortar and
+    // mesh2_ being mortar as is typical in the literature, but different from Tribol convention.
+    : submesh_data_( submesh_data ),
+      jac_data_( jac_data ),
+      mesh1_( mesh2 ),
+      mesh2_( mesh1 ),
+      use_penalty_( use_penalty_ )
+{
+  if ( mesh1.numberOfNodes() > 0 && mesh2.numberOfNodes() > 0 ) {
+    SLIC_ERROR_ROOT_IF( mesh1.spatialDimension() != 2 || mesh2.spatialDimension() != 2,
+                        "ENERGY_MORTAR requires 2D meshes." );
+  }
+
+  params_.k = k;
+  params_.del = delta;
+  params_.N = N;
+  params_.enzyme_quadrature = enzyme_quadrature;
+
+  evaluator_ = std::make_unique<ContactEvaluator>( params_ );
+}
+
+void NewMethodAdapter::setInterfacePairs( ArrayT<InterfacePair>&& pairs, int /*check_level*/ )
+{
+  // TODO: improved pair identification
+  pairs_ = std::move( pairs );
+}
+
+void NewMethodAdapter::updateIntegrationRule()
+{
+  // TODO: break out integration rule as a separate method
+}
+
+void NewMethodAdapter::updateNodalGaps()
+{
+  // NOTE: user should have called updateMfemParallelDecomposition() with updated coords before calling this
+
+  // Tribol level data structures for storing gap, area, and derivatives
+  auto& redecomp_gap = submesh_data_.GetRedecompGap();
+  mfem::GridFunction redecomp_area( redecomp_gap.FESpace() );
+  redecomp_area = 0.0;
+
+  JacobianContributions dg_tilde_dx_contribs( { { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::NONMORTAR },
+                                                { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::MORTAR } } );
+  JacobianContributions dA_dx_contribs( { { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::NONMORTAR },
+                                          { BlockSpace::LAGRANGE_MULTIPLIER, BlockSpace::MORTAR } } );
+
+  dg_tilde_dx_contribs.reserve( pairs_.size(), 8 );
+  dA_dx_contribs.reserve( pairs_.size(), 8 );
+
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+
+  // Compute local contributions
+  for ( const auto& pair : pairs_ ) {
+    // These need to be flipped, since the pairs are determined with element 1 associated with mesh 1, and we flipped
+    // the mesh numbers to be consistent with the literature and since the underlying method integrates on element 1
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
+
+    double g_tilde_elem[2];
+    double A_elem[2];
+
+    evaluator_->gtilde_and_area( flipped_pair, mesh1_view, mesh2_view, g_tilde_elem, A_elem );
+
+    if ( A_elem[0] <= 0.0 && A_elem[1] <= 0.0 ) {
+      continue;
+    }
+
+    auto A_conn = mesh1_view.getConnectivity()( elem1 );
+
+    // Add to nodes of Element A
+    redecomp_gap( A_conn[0] ) += g_tilde_elem[0];
+    redecomp_gap( A_conn[1] ) += g_tilde_elem[1];
+
+    redecomp_area( A_conn[0] ) += A_elem[0];
+    redecomp_area( A_conn[1] ) += A_elem[1];
+
+    // compute g_tilde first derivative
+    double dg_dx_node1[8];
+    double dg_dx_node2[8];
+    // TODO: make grad_gtilde return directly in dg_tilde_dx_blocks format
+    evaluator_->grad_gtilde( flipped_pair, mesh1_view, mesh2_view, dg_dx_node1, dg_dx_node2 );
+    double dg_tilde_dx_blocks[2][8];
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dg_tilde_dx_blocks[0][i * 2] = dg_dx_node1[node_idx[i]];
+      dg_tilde_dx_blocks[0][i * 2 + 1] = dg_dx_node2[node_idx[i]];
+      dg_tilde_dx_blocks[1][i * 2] = dg_dx_node1[node_idx[i + 4]];
+      dg_tilde_dx_blocks[1][i * 2 + 1] = dg_dx_node2[node_idx[i + 4]];
+    }
+    dg_tilde_dx_contribs.push_back( 0, elem1, elem1, dg_tilde_dx_blocks[0], 8 );
+    dg_tilde_dx_contribs.push_back( 1, elem1, elem2, dg_tilde_dx_blocks[1], 8 );
+
+    double dA_dx_node1[8];
+    double dA_dx_node2[8];
+    // TODO: make grad_trib_area return directly in dA_dx_blocks format
+    evaluator_->grad_trib_area( flipped_pair, mesh1_view, mesh2_view, dA_dx_node1, dA_dx_node2 );
+    double dA_dx_blocks[2][8];
+    for ( int i{ 0 }; i < 4; ++i ) {
+      dA_dx_blocks[0][i * 2] = dA_dx_node1[node_idx[i]];
+      dA_dx_blocks[0][i * 2 + 1] = dA_dx_node2[node_idx[i]];
+      dA_dx_blocks[1][i * 2] = dA_dx_node1[node_idx[i + 4]];
+      dA_dx_blocks[1][i * 2 + 1] = dA_dx_node2[node_idx[i + 4]];
+    }
+    dA_dx_contribs.push_back( 0, elem1, elem1, dA_dx_blocks[0], 8 );
+    dA_dx_contribs.push_back( 1, elem1, elem2, dA_dx_blocks[1], 8 );
+  }
+
+  // Move gap and area to submesh level vectors
+  mfem::ParLinearForm g_tilde_linear_form(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_data_.GetSubmeshGap( g_tilde_linear_form );
+  auto& P_submesh = *submesh_data_.GetSubmeshFESpace().GetProlongationMatrix();
+  g_tilde_vec_ = shared::ParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  g_tilde_vec_.Fill( 0.0 );
+  P_submesh.MultTranspose( g_tilde_linear_form, g_tilde_vec_.get() );
+
+  mfem::Array<int> rows_to_elim;
+  if ( !tied_contact_ && use_penalty_ ) {
+    rows_to_elim.Reserve( g_tilde_vec_.Size() );
+    for ( int i{ 0 }; i < g_tilde_vec_.Size(); ++i ) {
+      if ( g_tilde_vec_[i] > 0.0 ) {
+        g_tilde_vec_[i] = 0.0;
+        rows_to_elim.push_back( i );
+      }
+    }
+  }
+
+  mfem::ParLinearForm A_linear_form( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_data_.GetPressureTransfer().RedecompToSubmesh( redecomp_area, A_linear_form );
+  A_vec_ = shared::ParVector( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  A_vec_.Fill( 0.0 );
+  P_submesh.MultTranspose( A_linear_form, A_vec_.get() );
+
+  gap_vec_ = g_tilde_vec_.divide( A_vec_, area_tol_ );
+
+  // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
+  dg_tilde_dx_ = jac_data_.GetMfemJacobian( dg_tilde_dx_contribs.get() );
+  if ( !tied_contact_ && use_penalty_ ) {
+    // technically, we should do this on all the vectors/matrices below, but it looks like the mutliplication operators
+    // below will zero them out anyway
+    dg_tilde_dx_.EliminateRows( rows_to_elim );
+  }
+
+  dA_dx_ = jac_data_.GetMfemJacobian( dA_dx_contribs.get() );
+}
+
+void NewMethodAdapter::updateNodalForces()
+{
+  // NOTE: user should have called updateNodalGaps() with updated coords before calling this
+
+  // compute nodal pressures. these are used in the Hessian vector product below so we don't have to assemble a Hessian
+  // NOTE: in general, pressure should likely be set by the host code
+
+  pressure_vec_ = params_.k * gap_vec_;
+
+  energy_ = pressure_vec_.dot( g_tilde_vec_ );
+
+  auto k_over_a = params_.k * A_vec_.inverse( area_tol_ );
+
+  auto p_over_a = pressure_vec_.divide( A_vec_, area_tol_ );
+
+  shared::ParSparseMat dp_dx( dg_tilde_dx_.get() );
+  dp_dx->ScaleRows( k_over_a.get() );
+  shared::ParSparseMat dp_dx_temp( dA_dx_.get() );
+  dp_dx_temp->ScaleRows( p_over_a.get() );
+  dp_dx -= dp_dx_temp;
+
+  force_vec_ = ( pressure_vec_ * dg_tilde_dx_ ) + ( g_tilde_vec_ * dp_dx );
+
+  JacobianContributions df_dx_contribs( { { BlockSpace::NONMORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::NONMORTAR, BlockSpace::MORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::MORTAR } } );
+
+  df_dx_contribs.reserve( pairs_.size(), 16 );
+
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
+
+  mfem::GridFunction redecomp_pressure( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_pressure(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_pressure.SetFromTrueDofs( pressure_vec_.get() );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_pressure, redecomp_pressure );
+
+  mfem::GridFunction redecomp_g_tilde( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_g_tilde(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_g_tilde.SetFromTrueDofs( g_tilde_vec_.get() );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_g_tilde, redecomp_g_tilde );
+
+  mfem::GridFunction redecomp_A( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_A( const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_A.SetFromTrueDofs( A_vec_.get() );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_A, redecomp_A );
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+
+  // get pairwise action of second derivatives of gaps and pressure for stiffness contribution
+  for ( auto& pair : pairs_ ) {
+    // These need to be flipped, since the pairs are determined with element 1 associated with mesh 1, and we flipped
+    // the mesh numbers to be consistent with the literature and since the underlying method integrates on element 1
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
+    const auto node11 = mesh1_view.getConnectivity()( elem1, 0 );
+    const auto node12 = mesh1_view.getConnectivity()( elem1, 1 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
+
+    const RealT pressure1 = 2.0 * redecomp_pressure( node11 );
+    const RealT pressure2 = 2.0 * redecomp_pressure( node12 );
+
+    if ( pressure1 == 0.0 && pressure2 == 0.0 ) {
+      continue;
+    }
+
+    const RealT g_p_ainv1 = -redecomp_g_tilde( node11 ) * redecomp_pressure( node11 ) / redecomp_A( node11 );
+    const RealT g_p_ainv2 = -redecomp_g_tilde( node12 ) * redecomp_pressure( node12 ) / redecomp_A( node12 );
+
+    double d2g_dx2_node1[64];
+    double d2g_dx2_node2[64];
+    // ordering: [dg/(dx0dx0) dg/(dy0dx0) dg/(dx1dx0) ...]
+    evaluator_->d2_g2tilde( flipped_pair, mesh1_view, mesh2_view, d2g_dx2_node1, d2g_dx2_node2 );
+
+    double d2A_dx2_node1[64];
+    double d2A_dx2_node2[64];
+    evaluator_->compute_d2A_d2u( flipped_pair, mesh1_view, mesh2_view, d2A_dx2_node1, d2A_dx2_node2 );
+
+    double df_dx_blocks[2][2][16];
+    for ( int i{ 0 }; i < 2; ++i ) {
+      for ( int j{ 0 }; j < 2; ++j ) {
+        for ( int k{ 0 }; k < 4; ++k ) {
+          for ( int l{ 0 }; l < 4; ++l ) {
+            df_dx_blocks[i][j][l + k * 4] = pressure1 * d2g_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            pressure2 * d2g_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            g_p_ainv1 * d2A_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            g_p_ainv2 * d2A_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8];
+          }
+        }
+      }
+    }
+
+    df_dx_contribs.push_back( 0, elem1, elem1, df_dx_blocks[0][0], 16 );
+    df_dx_contribs.push_back( 1, elem1, elem2, df_dx_blocks[0][1], 16 );
+    df_dx_contribs.push_back( 2, elem2, elem1, df_dx_blocks[1][0], 16 );
+    df_dx_contribs.push_back( 3, elem2, elem2, df_dx_blocks[1][1], 16 );
+  }
+
+  // Move gap and area derivatives to HypreParMatrix (submesh rows, parent mesh cols)
+  df_dx_ = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
+
+  auto pg2_over_asq = ( 2.0 * pressure_vec_ )
+                          .multiplyInPlace( g_tilde_vec_ )
+                          .divideInPlace( A_vec_, area_tol_ )
+                          .divideInPlace( A_vec_, area_tol_ );
+
+  auto& submesh_fes = submesh_data_.GetSubmeshFESpace();
+  auto p_over_a_diag = shared::ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
+                                                             submesh_fes.GetTrueDofOffsets(), p_over_a.get() );
+  auto pg2_over_asq_diag = shared::ParSparseMat::diagonalMatrix( submesh_fes.GetComm(), submesh_fes.GlobalTrueVSize(),
+                                                                 submesh_fes.GetTrueDofOffsets(), pg2_over_asq.get() );
+
+  df_dx_ -= shared::ParSparseMat::RAP( dg_tilde_dx_, p_over_a_diag, dA_dx_ );
+  df_dx_ -= shared::ParSparseMat::RAP( dA_dx_, p_over_a_diag, dg_tilde_dx_ );
+  df_dx_ += shared::ParSparseMat::RAP( dA_dx_, pg2_over_asq_diag, dg_tilde_dx_ );
+  df_dx_ += dp_dx.transpose() * dg_tilde_dx_;
+  df_dx_ += dg_tilde_dx_.transpose() * dp_dx;
+}
+
+RealT NewMethodAdapter::computeTimeStep()
+{
+  // TODO: implement timestep calculation
+  return 1.0;
+}
+
+void NewMethodAdapter::compute_df_du_lagrange( const mfem::HypreParVector& lambda,
+                                               std::unique_ptr<mfem::HypreParMatrix>& df_du )
+{
+  // Convert Lambda to redecomp space for element wise access
+  mfem::GridFunction redecomp_lambda( submesh_data_.GetRedecompGap() );
+  mfem::ParGridFunction submesh_lambda(
+      const_cast<mfem::ParFiniteElementSpace*>( &submesh_data_.GetSubmeshFESpace() ) );
+  submesh_lambda.SetFromTrueDofs( lambda );
+  submesh_data_.GetPressureTransfer().SubmeshToRedecomp( submesh_lambda, redecomp_lambda );
+
+  JacobianContributions df_dx_contribs( { { BlockSpace::NONMORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::NONMORTAR, BlockSpace::MORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::NONMORTAR },
+                                          { BlockSpace::MORTAR, BlockSpace::MORTAR } } );
+
+  df_dx_contribs.reserve( pairs_.size(), 16 );
+
+  const int node_idx[8] = { 0, 2, 1, 3, 4, 6, 5, 7 };
+
+  auto mesh1_view = mesh1_.getView();
+  auto mesh2_view = mesh2_.getView();
+  // Loop over element pairs and compute Hessian contributions
+  for ( auto& pair : pairs_ ) {
+    InterfacePair flipped_pair( pair.m_element_id2, pair.m_element_id1 );
+    const auto elem1 = static_cast<int>( flipped_pair.m_element_id1 );
+    const auto node11 = mesh1_view.getConnectivity()( elem1, 0 );
+    const auto node12 = mesh1_view.getConnectivity()( elem1, 1 );
+    const auto elem2 = static_cast<int>( flipped_pair.m_element_id2 );
+
+    // Get lambda values at nodes (no factor of 2 in Lagrange mode)
+    const RealT lambda1 = redecomp_lambda( node11 );
+    const RealT lambda2 = redecomp_lambda( node12 );
+
+    // if ( lambda1 == 0.0 && lambda2 == 0.0 ) {
+    //   continue;
+    // }
+
+    // Compute Hessian of g̃
+    double d2g_dx2_node1[64];
+    double d2g_dx2_node2[64];
+    evaluator_->d2_g2tilde( flipped_pair, mesh1_view, mesh2_view, d2g_dx2_node1, d2g_dx2_node2 );
+
+    // Assemble df/du blocks: df/du = λ · d²g̃/du²
+    double df_dx_blocks[2][2][16];
+    for ( int i{ 0 }; i < 2; ++i ) {
+      for ( int j{ 0 }; j < 2; ++j ) {
+        for ( int k{ 0 }; k < 4; ++k ) {
+          for ( int l{ 0 }; l < 4; ++l ) {
+            df_dx_blocks[i][j][l + k * 4] = lambda1 * d2g_dx2_node1[node_idx[l + i * 4] + node_idx[k + j * 4] * 8] +
+                                            lambda2 * d2g_dx2_node2[node_idx[l + i * 4] + node_idx[k + j * 4] * 8];
+          }
+        }
+      }
+    }
+
+    df_dx_contribs.push_back( 0, elem1, elem1, df_dx_blocks[0][0], 16 );
+    df_dx_contribs.push_back( 1, elem1, elem2, df_dx_blocks[0][1], 16 );
+    df_dx_contribs.push_back( 2, elem2, elem1, df_dx_blocks[1][0], 16 );
+    df_dx_contribs.push_back( 3, elem2, elem2, df_dx_blocks[1][1], 16 );
+  }
+
+  auto df_dx_temp = jac_data_.GetMfemJacobian( df_dx_contribs.get() );
+
+  df_du = std::unique_ptr<mfem::HypreParMatrix>( df_dx_temp.release() );
+}
+
+void NewMethodAdapter::evaluateContactResidual( const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
+                                                mfem::HypreParVector& r_gap )
+{
+  SLIC_ERROR_ROOT_IF( use_penalty_, "evaluateContactResidual() should only be  called in lagrange multiplier mode" );
+
+  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0, "updateNodalGaps() must be called before evaluateContactResidual()" );
+
+  // Force residual = r_f = lambda * dg_tilde/du
+  dg_tilde_dx_->MultTranspose( lambda, r_force );
+
+  // gap residual
+  r_gap = g_tilde_vec_.get();
+}
+
+void NewMethodAdapter::evaluateContactJacobian( const mfem::HypreParVector& lambda,
+                                                std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                                                std::unique_ptr<mfem::HypreParMatrix>& df_dlambda )
+{
+  SLIC_ERROR_ROOT_IF( use_penalty_, "evaluateContactJacobian() should only be called in Lagrange multiplier mode" );
+
+  SLIC_ERROR_ROOT_IF( g_tilde_vec_.Size() == 0, "updateNodalGaps() must be called before evaluateContactJacobian()" );
+
+  // df/dlambda = dg_tilde/du:
+  df_dlambda = std::unique_ptr<mfem::HypreParMatrix>( dg_tilde_dx_.release() );
+
+  // df/du = lambda * d2g_tilde/du2
+  compute_df_du_lagrange( lambda, df_du );
+}
+
+std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDx() const
+{
+  return std::unique_ptr<mfem::HypreParMatrix>( df_dx_.release() );
+}
+
+std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDgDx() const
+{
+  return std::unique_ptr<mfem::HypreParMatrix>( dg_tilde_dx_.release() );
+}
+
+std::unique_ptr<mfem::HypreParMatrix> NewMethodAdapter::getMfemDfDp() const
+{
+  // SLIC_ERROR_ROOT( "NewMethod does not support getMfemDfDp()" );
+  return nullptr;
+}
+
+#endif  // TRIBOL_USE_ENZYME
+
+}  // namespace tribol
diff --git a/src/tribol/physics/NewMethodAdapter.hpp b/src/tribol/physics/NewMethodAdapter.hpp
new file mode 100644
index 00000000..14b41b88
--- /dev/null
+++ b/src/tribol/physics/NewMethodAdapter.hpp
@@ -0,0 +1,112 @@
+// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
+// other Tribol Project Developers. See the top-level LICENSE file for details.
+//
+// SPDX-License-Identifier: (MIT)
+
+#ifndef SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_
+#define SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_
+
+#include "tribol/config.hpp"
+
+#include "tribol/physics/ContactFormulation.hpp"
+#include "tribol/physics/new_method.hpp"
+#include "tribol/mesh/MfemData.hpp"
+#include "tribol/common/Parameters.hpp"
+
+#include "mfem.hpp"
+
+#include <memory>
+
+namespace tribol {
+
+#ifdef TRIBOL_USE_ENZYME
+
+class NewMethodAdapter : public ContactFormulation {
+ public:
+  /**
+   * @brief Constructor
+   *
+   * @param mfem_data Reference to Tribol's MFEM mesh data
+   * @param k Penalty stiffness
+   * @param delta Smoothing length
+   * @param N Quadrature order
+   */
+  NewMethodAdapter( MfemSubmeshData& submesh_data, MfemJacobianData& jac_data, MeshData& mesh1, MeshData& mesh2,
+                    double k, double delta, int N, bool enzyme_quadrature, bool use_penalty_ = true );
+
+  virtual ~NewMethodAdapter() = default;
+
+  // --- ContactFormulation Interface Implementation ---
+
+  void setInterfacePairs( ArrayT<InterfacePair>&& pairs, int check_level ) override;
+
+  void updateIntegrationRule() override;
+
+  void updateNodalGaps() override;
+
+  void updateNodalForces() override;
+
+  RealT computeTimeStep() override;
+
+  RealT getEnergy() const override { return energy_; }
+
+#ifdef BUILD_REDECOMP
+  const mfem::HypreParVector& getMfemForce() const override { return force_vec_.get(); }
+
+  const mfem::HypreParVector& getMfemGap() const override { return gap_vec_.get(); }
+
+  mfem::HypreParVector& getMfemPressure() override { return pressure_vec_.get(); }
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDx() const override;
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDgDx() const override;
+
+  std::unique_ptr<mfem::HypreParMatrix> getMfemDfDp() const override;
+
+  void evaluateContactResidual( const mfem::HypreParVector& lambda, mfem::HypreParVector& r_force,
+                                mfem::HypreParVector& r_gap ) override;
+
+  // Lagrange multiplier mode
+  void evaluateContactJacobian( const mfem::HypreParVector& lambda, std::unique_ptr<mfem::HypreParMatrix>& df_du,
+                                std::unique_ptr<mfem::HypreParMatrix>& df_dlambda ) override;
+
+  void compute_df_du_lagrange( const mfem::HypreParVector& lambda, std::unique_ptr<mfem::HypreParMatrix>& df_du );
+
+#endif
+
+ private:
+  // --- Member Variables ---
+
+  bool use_penalty_;
+
+  double area_tol_{ 1.0e-14 };
+  bool tied_contact_ = false;
+
+  MfemSubmeshData& submesh_data_;
+  MfemJacobianData& jac_data_;
+  MeshData& mesh1_;
+  MeshData& mesh2_;
+  ContactParams params_;
+  std::unique_ptr<ContactEvaluator> evaluator_;
+
+  // Stored InterfacePairs
+  ArrayT<InterfacePair> pairs_;
+
+  // These store the assembled nodal values
+  shared::ParVector g_tilde_vec_;
+  shared::ParVector A_vec_;
+  shared::ParVector gap_vec_;
+  mutable shared::ParSparseMat dg_tilde_dx_;
+  shared::ParSparseMat dA_dx_;
+
+  shared::ParVector pressure_vec_;  // This holds p = k * g / A
+  RealT energy_;
+  shared::ParVector force_vec_;
+  mutable shared::ParSparseMat df_dx_;
+};
+
+#endif  // TRIBOL_USE_ENZYME
+
+}  // namespace tribol
+
+#endif /* SRC_TRIBOL_PHYSICS_NEWMETHODADAPTER_HPP_ */
diff --git a/src/tribol/physics/new_method.cpp b/src/tribol/physics/new_method.cpp
new file mode 100644
index 00000000..ebdfbfb8
--- /dev/null
+++ b/src/tribol/physics/new_method.cpp
@@ -0,0 +1,918 @@
+#include "new_method.hpp"
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <vector>
+#include <array>
+#include <cmath>
+#include <algorithm>
+#include <cassert>
+#include <iomanip>
+#include "tribol/common/ArrayTypes.hpp"
+#include "tribol/common/Parameters.hpp"
+#include "tribol/geom/GeomUtilities.hpp"
+#include "tribol/common/Enzyme.hpp"
+#include "tribol/mesh/MeshData.hpp"
+#include <set>
+#include <map>
+
+namespace tribol {
+
+#ifdef TRIBOL_USE_ENZYME
+
+namespace {
+
+static ContactSmoothing smoother( ContactParams{} );
+static ContactEvaluator eval(ContactParams{});
+
+// struct Gparams {
+//   int N;
+//   const double* qp; 
+//   const double* w;
+//   const double* x2;
+// };
+
+
+void find_normal( const double* coord1, const double* coord2, double* normal )
+{
+  double dx = coord2[0] - coord1[0];
+  double dy = coord2[1] - coord1[1];
+  double len = std::sqrt( dy * dy + dx * dx );
+  dx /= len;
+  dy /= len;
+  normal[0] = dy;
+  normal[1] = -dx;
+}
+
+void determine_legendre_nodes( int N, std::array<double, 3>& x )
+{
+  // x.resize( N );
+  if ( N == 1 ) {
+    x[0] = 0.0;
+  } else if ( N == 2 ) {
+    const double a = 1.0 / std::sqrt( 3.0 );
+    x[0] = -a;
+    x[1] = a;
+  } else if ( N == 3 ) {
+    const double a = std::sqrt( 3.0 / 5.0 );
+    x[0] = -a;
+    x[1] = 0.0;
+    x[2] = a;
+  } else if ( N == 4 ) {
+    const double a = std::sqrt( ( 3.0 - 2.0 * std::sqrt( 6.0 / 5.0 ) ) / 7.0 );
+    const double b = std::sqrt( ( 3.0 + 2.0 * std::sqrt( 6.0 / 5.0 ) ) / 7.0 );
+    x[0] = -b;
+    x[1] = -a;
+    x[2] = a;
+    x[3] = b;
+  } else if ( N == 5 ) {
+    const double a = std::sqrt( 5.0 - 2.0 * std::sqrt( 10.0 / 7.0 ) ) / 3.0;
+    const double b = std::sqrt( 5.0 + 2.0 * std::sqrt( 10.0 / 7.0 ) ) / 3.0;
+    x[0] = -b;
+    x[1] = -a;
+    x[2] = 0.0;
+    x[3] = a;
+    x[4] = b;
+  } else {
+    assert( false && "Unsupported quadrature order" );
+  }
+}
+
+void determine_legendre_weights( int N, std::array<double, 3>& W )
+{
+  // W.resize( N );
+  if ( N == 1 ) {
+    W[0] = 2.0;
+  } else if ( N == 2 ) {
+    W[0] = 1.0;
+    W[1] = 1.0;
+  } else if ( N == 3 ) {
+    W[0] = 5.0 / 9.0;
+    W[1] = 8.0 / 9.0;
+    W[2] = 5.0 / 9.0;
+  } else if ( N == 4 ) {
+    W[0] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+    W[1] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[2] = ( 18 + std::sqrt( 30 ) ) / 36.0;
+    W[3] = ( 18 - std::sqrt( 30 ) ) / 36.0;
+  } else if ( N == 5 ) {
+    W[0] = ( 322.0 - 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[1] = ( 322.0 + 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[2] = 128.0 / 225.0;
+    W[3] = ( 322.0 + 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+    W[4] = ( 322.0 - 13.0 * std::sqrt( 70.0 ) ) / 900.0;
+  } else {
+    assert( false && "Unsupported quadrature order" );
+  }
+} 
+
+void iso_map( const double* coord1, const double* coord2, double xi, double* mapped_coord )
+{
+  double N1 = 0.5 - xi;
+  double N2 = 0.5 + xi;
+  mapped_coord[0] = N1 * coord1[0] + N2 * coord2[0];
+  mapped_coord[1] = N1 * coord1[1] + N2 * coord2[1];
+}
+
+inline void endpoints( const MeshData::Viewer& mesh, int elem_id, double P0[2], double P1[2] )
+{
+  double P0_P1[4];
+  mesh.getFaceCoords( elem_id, P0_P1 );
+  P0[0] = P0_P1[0];
+  P0[1] = P0_P1[1];
+  P1[0] = P0_P1[2];
+  P1[1] = P0_P1[3];
+}
+
+void find_intersection( const double* A0, const double* A1, const double* p, const double* nB, double* intersection )
+{
+  const double tA[2] = { A1[0] - A0[0], A1[1] - A0[1] };
+  const double d[2] = { p[0] - A0[0], p[1] - A0[1] };
+
+  const double nlen = std::sqrt( nB[0] * nB[0] + nB[1] * nB[1] );
+  if ( nlen < 1e-14 ) {
+    intersection[0] = p[0];
+    intersection[1] = p[1];
+    return;
+  }
+  const double n[2] = { nB[0] / nlen, nB[1] / nlen };
+
+  const double det = tA[0] * n[1] - tA[1] * n[0];
+
+  if ( std::abs( det ) < 1e-12 ) {
+    intersection[0] = p[0];
+    intersection[1] = p[1];
+    return;
+  }
+
+  const double inv_det = 1.0 / det;
+  double alpha = ( d[0] * n[1] - d[1] * n[0] ) * inv_det;
+
+  intersection[0] = A0[0] + alpha * tA[0];
+  intersection[1] = A0[1] + alpha * tA[1];
+}
+
+void get_projections( const double* A0, const double* A1, const double* B0, const double* B1, double* projections )
+{
+  double nB[2] = { 0.0, 0.0 };
+  find_normal( B0, B1, nB );
+
+  const double dxA = A1[0] - A0[0];
+  const double dyA = A1[1] - A0[1];
+  const double len2A = dxA * dxA + dyA * dyA;
+
+
+  const double* B_endpoints[2] = { B0, B1 };
+
+  double xi0 = 0.0, xi1 = 0.0;
+  for ( int i = 0; i < 2; ++i ) {
+    double q[2] = { 0.0, 0.0 };
+    find_intersection( A0, A1, B_endpoints[i], nB, q );
+
+    const double alphaA = ( ( q[0] - A0[0] ) * dxA + ( q[1] - A0[1] ) * dyA ) / len2A;
+    const double xiA = alphaA - 0.5;
+
+    if ( i == 0 )
+      xi0 = xiA;
+    else
+      xi1 = xiA;
+  }
+
+  double xi_min = std::min( xi0, xi1 );
+  double xi_max = std::max( xi0, xi1 );
+
+  projections[0] = xi_min;
+  projections[1] = xi_max;
+}
+
+void gtilde_kernel( const double* x, Gparams* gp, double* g_tilde_out, double* A_out )
+{
+  const double A0[2] = { x[0], x[1] };
+  const double A1[2] = { x[2], x[3] };
+  const double B0[2] = { x[4], x[5] };
+  const double B1[2] = { x[6], x[7] };
+
+  const double J = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  const double J_ref = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  double nB[2];
+  find_normal( B0, B1, nB );
+
+  double nA[2];
+  find_normal( A0, A1, nA );
+  double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  double eta = ( dot < 0 ) ? dot : 0.0;
+
+  double g1 = 0.0, g2 = 0.0;
+  double AI_1 = 0.0, AI_2 = 0.0;
+
+  for ( int i = 0; i < gp->N; ++i ) {
+    const double xiA = gp->qp[i];
+    const double w = gp->w[i];
+
+    const double N1 = 0.5 - xiA;
+    const double N2 = 0.5 + xiA;
+
+    // x1 on segment A
+    double x1[2];
+    iso_map( A0, A1, xiA, x1 );
+
+    double x2[2];
+    find_intersection( B0, B1, x1, nB, x2 );
+
+    const double dx = x1[0] - x2[0];
+    const double dy = x1[1] - x2[1];
+
+    // lagged normal on B
+    const double gn = -( dx * nB[0] + dy * nB[1] );
+    const double g = gn * eta;
+
+    g1 += w * N1 * g * J;
+    g2 += w * N2 * g * J;
+
+    AI_1 += w * N1 * J_ref;
+    AI_2 += w * N2 * J_ref;
+  }
+
+  g_tilde_out[0] = g1;
+  g_tilde_out[1] = g2;
+
+  A_out[0] = AI_1;
+  A_out[1] = AI_2;
+}
+
+//**************************************** */
+// Enzyme functions for constant quadrature:
+
+void gtilde_kernel_quad( const double* x, const Gparams* gp, double* g_tilde_out, double* A_out )
+{
+  const double A0[2] = { x[0], x[1] };
+  const double A1[2] = { x[2], x[3] };
+  const double B0[2] = { x[4], x[5] };
+  const double B1[2] = { x[6], x[7] };
+
+  const double J = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  const double J_ref = std::sqrt( ( A1[0] - A0[0] ) * ( A1[0] - A0[0] ) + ( A1[1] - A0[1] ) * ( A1[1] - A0[1] ) );
+
+  double nB[2];
+  find_normal( B0, B1, nB );
+
+  double nA[2];
+  find_normal( A0, A1, nA );
+  double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  double eta = ( dot < 0 ) ? dot : 0.0;
+
+  double g1 = 0.0, g2 = 0.0;
+  double AI_1 = 0.0, AI_2 = 0.0;
+
+  for ( int i = 0; i < gp->N; ++i ) {
+    const double xiA = gp->qp[i];
+    const double w = gp->w[i];
+
+    const double N1 = 0.5 - xiA;
+    const double N2 = 0.5 + xiA;
+
+    // x1 on segment A
+    double x1[2];
+    iso_map( A0, A1, xiA, x1 );
+
+    double x2[2];
+    find_intersection( B0, B1, x1, nB, x2 );
+
+    const double dx = x1[0] - x2[0];
+    const double dy = x1[1] - x2[1];
+
+    // lagged normal on B
+    const double gn = -( dx * nB[0] + dy * nB[1] );
+    const double g = gn * eta;
+
+    g1 += w * N1 * g * J;
+    g2 += w * N2 * g * J;
+
+    AI_1 += w * N1 * J_ref;
+    AI_2 += w * N2 * J_ref;
+  }
+
+  g_tilde_out[0] = g1;
+  g_tilde_out[1] = g2;
+
+  A_out[0] = AI_1;
+  A_out[1] = AI_2;
+}
+
+
+
+enum class KernelOutput { GTILDE1, GTILDE2, A1, A2 };
+
+template <KernelOutput Output>
+static void kernel_out( const double* x, const void* gp_void, double* out) {
+  const Gparams* gp = static_cast<const Gparams*>( gp_void );
+  double gt[2];
+  double A_out[2];
+  gtilde_kernel_quad( x, gp, gt, A_out);
+  if constexpr ( Output == KernelOutput::GTILDE1) *out = gt[0];
+  else if constexpr ( Output == KernelOutput::GTILDE2 ) *out = gt[1];
+  else if constexpr ( Output == KernelOutput::A1 ) *out = A_out[0];
+  else if constexpr ( Output == KernelOutput::A2 ) *out = A_out[1];
+}
+
+template <KernelOutput Output>
+void grad_kernel( const double* x, const Gparams* gp, double* dout_du) {
+  double dx[8] = {0.0};
+  double out = 0.0;
+  double dout = 1.0;
+
+  __enzyme_autodiff<void>( (void*)kernel_out<Output>, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, &out, &dout );
+
+  for ( int i = 0; i < 8; ++i ) dout_du[i] = dx[i];
+}
+
+
+
+//**************************************** */
+// Enzyme functions for varying quadrature:
+
+template <KernelOutput Output> 
+static void kernel_out_enzyme( const double* x, double* out) {
+  double A0[2], A1[2], B0[2], B1[2];
+  A0[0] = x[0];
+  A0[1] = x[1];
+  A1[0] = x[2];
+  A1[1] = x[3];
+  B0[0] = x[4];
+  B0[1] = x[5];
+  B1[0] = x[6];
+  B1[1] = x[7];
+
+
+  double projs[2] = { 0 };
+  get_projections( A0, A1, B0, B1, projs );
+  std::array<double, 2> projections = { projs[0], projs[1] };
+
+  auto bounds = ContactSmoothing::bounds_from_projections( projections, smoother.get_del() );
+  auto xi_bounds = ContactSmoothing::smooth_bounds( bounds, smoother.get_del() );
+
+  auto qp = ContactEvaluator::compute_quadrature( xi_bounds );
+
+  const int N = static_cast<int>( qp.qp.size() );
+
+  Gparams gp;
+  gp.N = N;
+  gp.qp = qp.qp.data();
+  gp.w = qp.w.data();
+  gp.x2 = nullptr;
+
+  double gt[2];
+  double A_out[2];
+  gtilde_kernel( x, &gp, gt, A_out );
+
+  if constexpr ( Output == KernelOutput::GTILDE1 ) *out = gt[0];
+  else if constexpr ( Output == KernelOutput::GTILDE2 ) *out = gt[1];
+  else if constexpr ( Output == KernelOutput::A1 )     *out = A_out[0];
+  else if constexpr ( Output == KernelOutput::A2 )     *out = A_out[1];
+
+}
+
+template <KernelOutput Output>
+void grad_kernel_enzyme( const double* x, double* dout_du )
+{
+  double dx[8] = { 0.0 };
+  double out = 0.0;
+  double dout = 1.0;
+
+  __enzyme_autodiff<void>( (void*)kernel_out_enzyme<Output>, enzyme_dup, x, dx, enzyme_dup, &out, &dout );
+
+  for ( int i = 0; i < 8; ++i ) {
+    dout_du[i] = dx[i];
+  } 
+}
+
+
+template <KernelOutput Output>
+void d2_kernel( const double* x, double* H )
+{
+  for ( int col = 0; col < 8; ++col ) {
+    double dx[8] = { 0.0 };
+    dx[col] = 1.0;
+
+    double grad[8] = { 0.0 };
+    double dgrad[8] = { 0.0 };
+
+    __enzyme_fwddiff<void>( (void*)grad_kernel_enzyme<Output>, enzyme_dup, x, dx, enzyme_dup, grad, dgrad );
+
+    for ( int row = 0; row < 8; ++row ) H[row * 8 + col] = dgrad[row];
+  }
+}
+
+
+
+template <KernelOutput Output> 
+void d2_kernel_quad( const double* x, const Gparams* gp, double* H ) 
+{
+  for ( int col = 0; col < 8; ++col) {
+    double dx[8] = {0.0};
+    dx[col] = 1.0;
+    double grad[8] = {0.0};
+    double dgrad[8] = {0.0};
+
+    __enzyme_fwddiff<void>( (void*)grad_kernel<Output>, enzyme_dup, x, dx, enzyme_const, (const void*)gp, enzyme_dup, grad, dgrad  );
+    for ( int row = 0; row < 8; ++row ) H[row * 8 + col] = dgrad[row];
+  }
+}
+
+
+}  // namespace
+
+
+
+
+Gparams ContactEvaluator::construct_gparams(const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2) const {
+
+      double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+    double nB[2] = { 0.0 };
+  find_normal( B0, B1, nB );
+
+    auto projs = eval.projections( pair, mesh1, mesh2 );
+    auto bounds = smoother.bounds_from_projections( projs, smoother.get_del() );
+    auto smooth_bounds = smoother.smooth_bounds( bounds, smoother.get_del() );
+
+    auto qp = eval.compute_quadrature( smooth_bounds );
+
+    const int N = static_cast<int>( qp.qp.size() );
+
+    std::vector<double> x2( 2 * N );
+
+    for ( int i = 0; i < N; ++i ) {
+      double x1[2] = { 0.0 };
+      iso_map( A0, A1, qp.qp[i], x1 );
+      double x2_i[2] = { 0.0 };
+      find_intersection( B0, B1, x1, nB, x2_i );
+      x2[2 * i] = x2_i[0];
+      x2[2 * i + 1] = x2_i[1];
+    }
+
+    Gparams gp;
+    gp.N = N;
+    gp.qp = qp.qp.data();
+    gp.w = qp.w.data();
+    gp.x2 = x2.data();
+
+    return gp;
+}
+
+
+std::array<double, 2> ContactEvaluator::projections( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                     const MeshData::Viewer& mesh2 ) const
+{
+  double A0[2];
+  double A1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  double B0[2];
+  double B1[2];
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double projs[2];
+  get_projections( A0, A1, B0, B1, projs );
+
+  return { projs[0], projs[1] };
+}
+
+std::array<double, 2> ContactSmoothing::bounds_from_projections( const std::array<double, 2>& proj, double del )
+{
+  double xi_min = std::min( proj[0], proj[1] );
+  double xi_max = std::max( proj[0], proj[1] );
+
+  if ( xi_max < -0.5 - del ) {
+    xi_max = -0.5 - del;
+  }
+  if ( xi_min > 0.5 + del ) {
+    xi_min = 0.5 + del;
+  }
+  if ( xi_min < -0.5 - del ) {
+    xi_min = -0.5 - del;
+  }
+  if ( xi_max > 0.5 + del ) {
+    xi_max = 0.5 + del;
+  }
+
+  return { xi_min, xi_max };
+}
+
+std::array<double, 2> ContactSmoothing::smooth_bounds( const std::array<double, 2>& bounds, double del )
+{
+  std::array<double, 2> smooth_bounds;
+  for ( int i = 0; i < 2; ++i ) {
+    double xi = 0.0;
+    double xi_hat = 0.0;
+    xi = bounds[i] + 0.5;
+    if ( del == 0.0 ) {
+      xi_hat = xi;
+    } else {
+      if ( 0.0 - del <= xi && xi <= del ) {
+        xi_hat = ( 1.0 / ( 4 * del ) ) * ( xi * xi ) + 0.5 * xi + del / 4.0;
+      } else if ( ( 1.0 - del ) <= xi && xi <= 1.0 + del ) {
+
+        double b = -1.0 / ( 4.0 * del );
+        double c = 0.5 + 1.0 / ( 2.0 * del );
+        double d = 1.0 - del + ( 1.0 / ( 4.0 * del ) ) * pow( 1.0 - del, 2 ) - 0.5 * ( 1.0 - del ) -
+                   ( 1.0 - del ) / ( 2.0 * del );
+
+        xi_hat = b * xi * xi + c * xi + d;
+      } else if ( del <= xi && xi <= ( 1.0 - del ) ) {
+        xi_hat = xi;
+      }
+    }
+    smooth_bounds[i] = xi_hat - 0.5;
+  }
+
+  return smooth_bounds;
+}
+
+QuadPoints ContactEvaluator::compute_quadrature( const std::array<double, 2>& xi_bounds )
+{
+  const int N = 3;
+  QuadPoints out;
+
+  std::array<double, 3> qpoints;
+  std::array<double, 3> weights;
+
+  determine_legendre_nodes( N, qpoints );
+  determine_legendre_weights( N, weights );
+
+  const double xi_min = xi_bounds[0];
+  const double xi_max = xi_bounds[1];
+  const double J = 0.5 * ( xi_max - xi_min );
+
+  for ( int i = 0; i < N; ++i ) {
+    out.qp[i] = 0.5 * ( xi_max - xi_min ) * qpoints[i] + 0.5 * ( xi_max + xi_min );
+    out.w[i] = weights[i] * J;
+  }
+
+  return out;
+}
+
+double ContactEvaluator::gap( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                              double xiA ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double nA[2] = { 0.0 };
+  double nB[2] = { 0.0 };
+  find_normal( A0, A1, nA );
+  find_normal( B0, B1, nB );
+
+  double x1[2] = { 0.0 };
+  iso_map( A0, A1, xiA, x1 );
+
+  double x2[2] = { 0.0 };
+  find_intersection( B0, B1, x1, nB, x2 );
+
+  double dx = x1[0] - x2[0];
+  double dy = x1[1] - x2[1];
+
+  double gn = -( dx * nB[0] + dy * nB[1] );  // signed normal gap
+  double dot = nB[0] * nA[0] + nB[1] * nA[1];
+  double eta = ( dot < 0 ) ? dot : 0.0;
+
+  return gn * eta;
+}
+
+NodalContactData ContactEvaluator::compute_nodal_contact_data( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                               const MeshData::Viewer& mesh2 ) const
+{
+  double A0[2];
+  double A1[2];
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+
+  double J = std::sqrt( ( std::pow( ( A1[0] - A0[0] ), 2 ) + std::pow( ( A1[1] - A0[1] ), 2 ) ) );
+  double J_ref = std::sqrt( std::pow( A1[0] - A0[0], 2 ) + std::pow( A1[1] - A0[1], 2 ) );
+
+  auto projs = projections( pair, mesh1, mesh2 );
+
+  auto bounds = smoother_.bounds_from_projections( projs, smoother.get_del() );
+  auto smooth_bounds = smoother_.smooth_bounds( bounds, smoother.get_del() );
+
+  auto qp = compute_quadrature( smooth_bounds );
+
+
+  double g_tilde1 = 0.0;
+  double g_tilde2 = 0.0;
+  double AI_1 = 0.0;
+  double AI_2 = 0.0;
+
+  for ( size_t i = 0; i < qp.qp.size(); ++i ) {
+    double xiA = qp.qp[i];
+    double w = qp.w[i];
+    double N1 = 0.5 - xiA;
+    double N2 = 0.5 + xiA;
+    double gn = gap( pair, mesh1, mesh2, xiA );
+    double gn_active = gn;
+
+
+    g_tilde1 += w * N1 * gn_active * J;
+    g_tilde2 += w * N2 * gn_active * J;
+
+    AI_1 += w * N1 * J_ref;
+    AI_2 += w * N2 * J_ref;
+  }
+
+  NodalContactData contact_data;
+
+  contact_data.AI = { AI_1, AI_2 };
+  contact_data.g_tilde = { g_tilde1, g_tilde2 };
+
+  return contact_data;
+}
+
+std::array<double, 2> ContactEvaluator::compute_pressures( const NodalContactData& ncd ) const
+{
+  double gt1 = ncd.g_tilde[0];
+  double gt2 = ncd.g_tilde[1];
+
+  double A1 = ncd.AI[0];
+  double A2 = ncd.AI[1];
+
+  double g1 = gt1 / A1;
+  double g2 = gt2 / A2;
+
+  // //KKT Conditons
+  double p1 = ( g1 < 0.0 ) ? p_.k * g1 : 0.0;
+  double p2 = ( g2 < 0.0 ) ? p_.k * g2 : 0.0;
+  std::array<double, 2> pressures;
+
+  pressures = { p1, p2 };
+
+  for ( int i = 0; i < 2; ++i ) {
+    if ( ncd.AI[i] < 1e-12 ) {
+      pressures[i] = 0.0;
+    }
+  }
+
+  return pressures;
+}
+
+double ContactEvaluator::compute_contact_energy( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                 const MeshData::Viewer& mesh2 ) const
+{
+  NodalContactData contact_data;
+  contact_data = compute_nodal_contact_data( pair, mesh1, mesh2 );
+
+  std::array<double, 2> pressures;
+  pressures = compute_pressures( contact_data );
+
+  double contact_energy = pressures[0] * contact_data.g_tilde[0] + pressures[1] * contact_data.g_tilde[1];
+  return contact_energy;
+}
+
+void ContactEvaluator::gtilde_and_area( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                        const MeshData::Viewer& mesh2, double gtilde[2], double area[2] ) const
+{
+  auto ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+  gtilde[0] = ncd.g_tilde[0];
+  gtilde[1] = ncd.g_tilde[1];
+  area[0] = ncd.AI[0];
+  area[1] = ncd.AI[1];
+}
+
+void ContactEvaluator::grad_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                    const MeshData::Viewer& mesh2, double dgt1_dx[8], double dgt2_dx[8] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
+
+
+  double dg1_du[8] = { 0.0 };
+  double dg2_du[8] = { 0.0 };
+
+  if ( !p_.enzyme_quadrature ) {
+
+    Gparams gp = construct_gparams(pair, mesh1, mesh2);
+    grad_kernel<KernelOutput::GTILDE1>( x, &gp, dg1_du );
+    grad_kernel<KernelOutput::GTILDE2>(  x, &gp, dg2_du);
+
+  } else {
+    grad_kernel_enzyme<KernelOutput::GTILDE1>(x, dg1_du);
+    grad_kernel_enzyme<KernelOutput::GTILDE2>(x, dg2_du);
+  }
+
+  for ( int i = 0; i < 8; ++i ) {
+    dgt1_dx[i] = dg1_du[i];
+    dgt2_dx[i] = dg2_du[i];
+  }
+}
+
+void ContactEvaluator::grad_trib_area( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                       const MeshData::Viewer& mesh2, double dA1_dx[8], double dA2_dx[8] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
+
+  if ( !p_.enzyme_quadrature ) {
+
+    Gparams gp = construct_gparams(pair, mesh1, mesh2);
+    grad_kernel<KernelOutput::A1>( x, &gp, dA1_dx);
+    grad_kernel<KernelOutput::A2>( x, &gp, dA2_dx);
+  } else {
+
+    grad_kernel_enzyme<KernelOutput::A1>(x, dA1_dx);
+    grad_kernel_enzyme<KernelOutput::A2>(x, dA2_dx);
+  }
+}
+
+std::array<double, 8> ContactEvaluator::compute_contact_forces( const InterfacePair& pair,
+                                                                const MeshData::Viewer& mesh1,
+                                                                const MeshData::Viewer& mesh2 ) const
+{
+  double dg_tilde1[8] = { 0.0 };
+  double dg_tilde2[8] = { 0.0 };
+  double dA1[8] = { 0.0 };
+  double dA2[8] = { 0.0 };
+  std::array<double*, 2> dg_t;
+  std::array<double*, 2> dA_I;
+  dg_t = { dg_tilde1, dg_tilde2 };
+  dA_I = { dA1, dA2 };
+
+  grad_gtilde( pair, mesh1, mesh2, dg_tilde1, dg_tilde2 );
+  grad_trib_area( pair, mesh1, mesh2, dA1, dA2 );
+
+  NodalContactData ncd;
+  ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+
+  std::array<double, 2> pressures;
+  pressures = compute_pressures( ncd );
+
+  std::array<double, 8> f = { 0.0 };
+
+  for ( int i = 0; i < 8; ++i ) {
+    for ( int j = 0; j < 2; ++j ) {
+      double g = 0.0;
+      g = ncd.g_tilde[j] / ncd.AI[j];
+      if ( ncd.AI[j] < 1e-12 ) {
+        g = 0.0;
+      }
+      f[i] += ( 2 * pressures[j] * dg_t[j][i] - pressures[j] * g * dA_I[j][i] );
+    }
+  }
+  return f;
+}
+
+void ContactEvaluator::d2_g2tilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                   const MeshData::Viewer& mesh2, double H1[64], double H2[64] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+
+  double nB[2], nA[2];
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
+
+  double d2g1_d2u[64] = { 0.0 };
+  double d2g2_d2u[64] = { 0.0 };
+
+  if ( !p_.enzyme_quadrature ) {
+    Gparams gp = construct_gparams(pair, mesh1, mesh2);
+
+    d2_kernel_quad<KernelOutput::GTILDE1>(x,&gp,d2g1_d2u);
+    d2_kernel_quad<KernelOutput::GTILDE2>(x,&gp,d2g2_d2u);
+
+  } 
+  else{
+    d2_kernel<KernelOutput::GTILDE1>( x, d2g1_d2u);
+    d2_kernel<KernelOutput::GTILDE2>( x, d2g2_d2u);
+  }
+
+  for ( int i = 0; i < 64; ++i ) {
+    H1[i] = d2g1_d2u[i];
+    H2[i] = d2g2_d2u[i];
+  }
+}
+
+void ContactEvaluator::compute_d2A_d2u( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                        const MeshData::Viewer& mesh2, double d2A1[64], double d2A2[64] ) const
+{
+  double A0[2], A1[2], B0[2], B1[2];
+
+  endpoints( mesh1, pair.m_element_id1, A0, A1 );
+  endpoints( mesh2, pair.m_element_id2, B0, B1 );
+
+  double x[8] = { A0[0], A0[1], A1[0], A1[1], B0[0], B0[1], B1[0], B1[1] };
+
+
+  double nB[2], nA[2];
+
+  find_normal( B0, B1, nB );
+  find_normal( A0, A1, nA );
+
+  double d2A1_d2u[64] = { 0.0 };
+  double d2A2_d2u[64] = { 0.0 };
+
+    if ( !p_.enzyme_quadrature ) {
+    Gparams gp = construct_gparams(pair, mesh1, mesh2);
+    d2_kernel_quad<KernelOutput::A1>(x,&gp,d2A1_d2u);
+    d2_kernel_quad<KernelOutput::A2>(x,&gp,d2A2_d2u);
+  } 
+    else {
+  d2_kernel<KernelOutput::A1>( x, d2A1_d2u);
+  d2_kernel<KernelOutput::A2>( x, d2A2_d2u);
+  }
+  
+
+  for ( int i = 0; i < 64; ++i ) {
+    d2A1[i] = d2A1_d2u[i];
+    d2A2[i] = d2A2_d2u[i];
+  }
+}
+
+std::array<std::array<double, 8>, 8> ContactEvaluator::compute_stiffness_matrix( const InterfacePair& pair,
+                                                                                 const MeshData::Viewer& mesh1,
+                                                                                 const MeshData::Viewer& mesh2 ) const
+{
+  NodalContactData ncd;
+  ncd = compute_nodal_contact_data( pair, mesh1, mesh2 );
+
+  std::array<double, 2> gI;
+  for ( int i = 0; i < 2; ++i ) {
+    gI[i] = ncd.g_tilde[i] / ncd.AI[i];
+  }
+
+  double dg_tilde1[8], dg_tilde2[8], dAI1[8], dAI2[8];
+
+  grad_gtilde( pair, mesh1, mesh2, dg_tilde1, dg_tilde2 );
+  grad_trib_area( pair, mesh1, mesh2, dAI1, dAI2 );
+
+  double d2_gtilde1[64], d2_gtilde2[64], d2_dA1[64], d2_dA2[64];
+
+  d2_g2tilde( pair, mesh1, mesh2, d2_gtilde1, d2_gtilde2 );
+  compute_d2A_d2u( pair, mesh1, mesh2, d2_dA1, d2_dA2 );
+
+  std::array<double*, 2> dg_t = { dg_tilde1, dg_tilde2 };
+  std::array<double*, 2> dA = { dAI1, dAI2 };
+
+  std::array<double*, 2> ddg_t = { d2_gtilde1, d2_gtilde2 };
+  std::array<double*, 2> ddA = { d2_dA1, d2_dA2 };
+
+  std::array<std::array<double, 8>, 8> K_mat = { { { 0.0 } } };
+
+  for ( int i = 0; i < 2; ++i ) {
+    for ( int k = 0; k < 8; ++k ) {
+      for ( int j = 0; j < 8; ++j ) {
+        // term 1:
+        K_mat[k][j] += p_.k * ( 2 / ncd.AI[i] ) * dg_t[i][k] * dg_t[i][j];
+
+        // term2:
+        K_mat[k][j] += -p_.k * ( 2 * gI[i] / ncd.AI[i] ) * dg_t[i][k] * dA[i][j];
+
+        // term3:
+        K_mat[k][j] += -p_.k * ( 2 * gI[i] / ncd.AI[i] ) * dA[i][k] * dg_t[i][j];
+
+        // term 4:
+        K_mat[k][j] += p_.k * ( 2 * gI[i] * gI[i] / ncd.AI[i] ) * dA[i][k] * dA[i][j];
+
+        // term 5;
+        K_mat[k][j] += p_.k * 2.0 * gI[i] * ddg_t[i][k * 8 + j];
+
+        // term 6:
+        K_mat[k][j] += -p_.k * gI[i] * gI[i] * ddA[i][k * 8 + j];
+
+        if ( ncd.AI[i] < 1e-12 ) {
+          K_mat[k][j] = 0.0;
+        }
+      }
+    }
+  }
+  return K_mat;
+}
+
+#endif  // TRIBOL_USE_ENZYME
+
+}  // namespace tribol
diff --git a/src/tribol/physics/new_method.hpp b/src/tribol/physics/new_method.hpp
new file mode 100644
index 00000000..125bf0da
--- /dev/null
+++ b/src/tribol/physics/new_method.hpp
@@ -0,0 +1,156 @@
+#pragma once
+#include <vector>
+#include <array>
+
+#include "tribol/config.hpp"
+
+#include "tribol/mesh/InterfacePairs.hpp"
+#include "tribol/mesh/MeshData.hpp"
+
+namespace tribol {
+
+#ifdef TRIBOL_USE_ENZYME
+
+struct Node {
+  double x, y;
+  int id;
+};
+
+struct Element {
+  int id;
+  std::array<int, 2> node_ids;
+};
+
+struct Mesh {
+  std::vector<Node> nodes;
+  std::vector<Element> elements;
+
+  const Node& node( int i ) const { return nodes[i]; }
+  Node& node( int i ) { return nodes[i]; }
+};
+
+struct QuadPoints {
+  std::array<double, 3> qp;
+  std::array<double, 3> w;  // weights
+};
+
+struct ContactParams {
+  double del;
+  double k;
+  int N;
+  bool enzyme_quadrature;
+};
+
+struct NodalContactData {
+  std::array<double, 2> AI;
+  std::array<double, 2> g_tilde;
+};
+
+struct FDResult {
+  std::array<double, 2> dgt;
+};
+
+struct FiniteDiffResult {
+  std::vector<double> fd_gradient_g1;
+  std::vector<double> fd_gradient_g2;
+  std::vector<double> analytical_gradient_g1;
+  std::vector<double> analytical_gradient_g2;
+  std::vector<int> node_ids;
+  double g_tilde1_baseline;
+  double g_tilde2_baseline;
+};
+
+struct Gparams {
+  int N;
+  const double* qp; 
+  const double* w;
+  const double* x2;
+};
+
+class ContactSmoothing {
+ public:
+  explicit ContactSmoothing( const ContactParams& p ) : p_( p ) {}  // Constructor
+
+  double get_del() const { return p_.del; }
+
+  static std::array<double, 2> bounds_from_projections( const std::array<double, 2>& proj, double del );
+
+  static std::array<double, 2> smooth_bounds( const std::array<double, 2>& bounds, double del );
+
+ private: 
+  ContactParams p_;
+};
+
+class ContactEvaluator {
+ public:
+  explicit ContactEvaluator( const ContactParams& p )
+      : p_( p ), smoother_( p ) {}  // constructor - copies params into the object
+
+  double compute_contact_energy( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                 const MeshData::Viewer& mesh2 ) const;
+
+  static QuadPoints compute_quadrature( const std::array<double, 2>& xi_bounds );
+
+  void gtilde_and_area( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                        double gtilde[2], double area[2] ) const;
+
+  void grad_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                    double dgt1_dx[8], double dgt2_dx[8] ) const;
+
+  void grad_trib_area( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                       double dA1_dx[8], double dA2_dx[8] ) const;
+
+  void d2_g2tilde( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                   double dgt1_dx[64], double dgt2_dx[64] ) const;
+
+  void compute_d2A_d2u( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                        double dgt1_dx[64], double dgt2_dx[64] ) const;
+
+  std::pair<double, double> eval_gtilde( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                         const MeshData::Viewer& mesh2 ) const;
+
+  FiniteDiffResult validate_g_tilde( const InterfacePair& pair, MeshData& mesh1, MeshData& mesh2,
+                                     double epsilon = 1e-7 ) const;
+
+  std::pair<double, double> eval_gtilde_fixed_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                  const MeshData::Viewer& mesh2, const QuadPoints& qp_fixed ) const;
+
+  FiniteDiffResult validate_hessian( const InterfacePair& pair, MeshData& mesh1, MeshData& mesh2,
+                                     double epsilon = 1e-7 ) const;
+
+ private:
+  ContactParams p_;
+  ContactSmoothing smoother_;
+
+  Gparams construct_gparams( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                           const MeshData::Viewer& mesh2 ) const;
+
+
+  std::array<double, 8> compute_contact_forces( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                                const MeshData::Viewer& mesh2 ) const;
+
+  std::array<std::array<double, 8>, 8> compute_stiffness_matrix( const InterfacePair& pair,
+                                                                 const MeshData::Viewer& mesh1,
+                                                                 const MeshData::Viewer& mesh2 ) const;
+
+    std::array<double, 2> projections( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                     const MeshData::Viewer& mesh2 ) const;
+
+
+
+  void grad_gtilde_with_qp( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+                            const QuadPoints& qp_fixed, double dgt1_dx[8], double dgt2_dx[8] ) const;
+
+
+  double gap( const InterfacePair& pair, const MeshData::Viewer& mesh1, const MeshData::Viewer& mesh2,
+              double xiA ) const;
+
+  NodalContactData compute_nodal_contact_data( const InterfacePair& pair, const MeshData::Viewer& mesh1,
+                                               const MeshData::Viewer& mesh2 ) const;
+
+  std::array<double, 2> compute_pressures( const NodalContactData& ncd ) const;
+};
+
+#endif  // TRIBOL_USE_ENZYME
+
+}  // namespace tribol
diff --git a/strain_energy.cpp b/strain_energy.cpp
new file mode 100644
index 00000000..3e903679
--- /dev/null
+++ b/strain_energy.cpp
@@ -0,0 +1,743 @@
+#include <iostream>
+#include <cstdlib> 
+#include <cstdio>
+#include "gtest/gtest.h"
+#include <array>
+#include "axom/core.hpp"
+#include "axom/slic/interface/slic.hpp"
+#include <cmath>
+#include <limits>
+extern void* enzyme_dup;
+extern void* enzyme_const;
+
+
+template <typename return_type, typename... Args>
+return_type __enzyme_fwddiff( Args... );
+
+template <typename return_type, typename... Args>
+return_type __enzyme_autodiff( Args... );
+
+
+void multiply3x3(const double F[9], const double F_T[9], double C[9]) {
+        for (int i = 0; i < 3; ++i) {        // row of A
+        for (int j = 0; j < 3; ++j) {    // column of B
+            C[i * 3 + j] = 0.0;
+            for (int k = 0; k < 3; ++k) {
+                C[i * 3 + j] += F_T[i * 3 + k] * F[k * 3 + j];
+            }
+        }
+    }
+}
+
+void calc_E_from_F(const double F[9], double E[9]) {
+    double I[9] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0};
+    double F_T[9] = {0.0};
+    F_T[0] = F[0];
+    F_T[1] = F[3];
+    F_T[2] = F[6];
+    F_T[3] = F[1];
+    F_T[4] = F[4];
+    F_T[5] = F[7];
+    F_T[6] = F[2];
+    F_T[7] = F[5];
+    F_T[8] = F[8];
+    double C[9] = {0.0};
+    multiply3x3(F, F_T, C);
+    for(int i = 0; i < 9; ++i) {
+        E[i] = 0.5 * (C[i] - I[i]);
+    }
+}
+
+//Calculates right cauchy stress tensor
+void calc_cauchy_stress_tensor(const double E[9], double C[9]){
+    double I[9] = {1, 0, 0, 0, 1, 0, 0, 0, 1};
+    for(int i = 0; i < 9; ++i) {
+        C[i] = 2 * E[i];
+        C[i] += I[i];
+    }
+}
+
+
+//Calculates Trace 
+double calc_trace(const double C[9]) {
+    double Tr_C = C[0] + C[4] + C[8];
+    return Tr_C;
+}
+
+
+
+double calc_determinant(const double C[9]) {
+    double J = C[0] * (C[4] * C[8] - C[5] * C[7]) - C[1] * (C[3] * C[8] - C[5] * C[6]) + C[2] * (C[3] * C[7] - C[4] * C[6]);
+    J = std::sqrt(J);
+    return J;
+}
+
+
+bool invert3x3(const double F[9], double Finv[9])
+{
+    // Compute the determinant
+    double det =
+        F[0]*(F[4]*F[8] - F[5]*F[7]) -
+        F[1]*(F[3]*F[8] - F[5]*F[6]) +
+        F[2]*(F[3]*F[7] - F[4]*F[6]);
+
+    if (std::abs(det) < std::numeric_limits<double>::epsilon())
+        return false; // Singular matrix
+
+    double invDet = 1.0 / det;
+
+    // Compute the inverse using the formula for the inverse of a 3x3 matrix
+    Finv[0] =  (F[4]*F[8] - F[5]*F[7]) * invDet;
+    Finv[1] = -(F[1]*F[8] - F[2]*F[7]) * invDet;
+    Finv[2] =  (F[1]*F[5] - F[2]*F[4]) * invDet;
+
+    Finv[3] = -(F[3]*F[8] - F[5]*F[6]) * invDet;
+    Finv[4] =  (F[0]*F[8] - F[2]*F[6]) * invDet;
+    Finv[5] = -(F[0]*F[5] - F[2]*F[3]) * invDet;
+
+    Finv[6] =  (F[3]*F[7] - F[4]*F[6]) * invDet;
+    Finv[7] = -(F[0]*F[7] - F[1]*F[6]) * invDet;
+    Finv[8] =  (F[0]*F[4] - F[1]*F[3]) * invDet;
+
+    return true;
+}
+
+
+//build strain energy equation
+void strain_energy(double* E, double mu, double lambda, double* W) {
+    double C[9] = {0.0};
+    calc_cauchy_stress_tensor(E, C);
+    double Tr_C = calc_trace(C);
+    double J = calc_determinant(C);
+    *W = mu/2.0 * (Tr_C - 3.0) - mu * log(J) + lambda/2.0 * pow((log(J)), 2.0);
+}
+
+//calc stress using enzyme fwddiff
+void stress(double* E, double mu, double lambda, double* dW_dE) {
+    double W = 0.0;
+    for(int i = 0; i < 9; ++i) {
+        double dE[9] = {0.0};
+        dE[i] = 1.0;
+        double dmu = 0.0;
+        double dlambda = 0.0;
+        double dw = 0.0;
+        __enzyme_fwddiff<void>( (void*) strain_energy, E, dE, mu, dmu, lambda, dlambda, &W, &dw);
+        dW_dE[i] = dw;
+    }
+
+}
+
+//calc stress using enzyme autodiff
+void stress_reverse(double* E, double mu, double lambda, double* dW_dE) {
+    double dE[9] = {0.0};
+    double W = 0.0;
+    double dW = 1.0;
+    __enzyme_autodiff<void>( strain_energy, enzyme_dup, E, dE, enzyme_const, mu, enzyme_const, lambda, enzyme_dup, &W, &dW); 
+
+    for (int i = 0; i < 9; ++i) {
+        dW_dE[i] = dE[i];
+    }
+}
+
+
+//calc stress using finite Difference 
+void stress_FD(double* E, double mu, double lambda, double* dW_dE, double h = 1e-7) {
+    double E_plus[9] = {0.0};
+    double E_minus[9] = {0.0};
+    double W_plus;
+    double W_minus;
+    for(int i = 0; i < 9; ++i) {
+        for(int j = 0; j < 9; ++j) {
+            E_plus[j] = E[j];
+            E_minus[j] = E[j];
+        }
+        E_plus[i] = E[i] + h;
+        E_minus[i] = E[i] - h;
+        strain_energy(E_plus, mu, lambda, &W_plus);
+        strain_energy(E_minus, mu, lambda, &W_minus);
+        dW_dE[i] = (W_plus - W_minus) / (2 * h);
+
+    }
+
+}
+
+
+void hand_code_deriv(double* E, double mu, double lambda, double* S) {
+    double C[9] = {0.0};
+    double Cinv[9];
+    double I[9] = {1.0, 0.0, 0.0, 0, 1.0, 0.0, 0.0, 0.0, 1.0};
+    calc_cauchy_stress_tensor(E, C);
+    double J = calc_determinant(C);
+    invert3x3(C, Cinv);
+    double first_term[9];
+    for(int i = 0; i < 9; ++i) {
+        first_term[i] = lambda * std::log(J) * Cinv[i];
+    }
+    double second_term[9];
+    for(int i = 0; i < 9; ++i) {
+        second_term[i] = I[i] - Cinv[i];
+        second_term[i] *= mu;
+    }
+    for(int i = 0; i < 9; ++i) {
+        S[i] = first_term[i] + second_term[i];
+    }
+
+}
+
+void second_deriv_fwd_fwd(double* E, double mu, double lambda, double* d2W_d2E) {
+    double dW[9] = {0.0};
+    double d2w[9] = {0.0};
+
+    for(int i  = 0; i < 9; ++i) {
+        double d2E[9] = {0.0};
+        d2E[i] = 1.0;
+        double d2mu = 0.0;
+        double d2lambda = 0.0;
+        __enzyme_fwddiff<void> ( (void*) stress, E, d2E, mu, d2mu, lambda, d2lambda, &dW, &d2w  );
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = d2w[j];
+        }   
+    }
+}
+
+void second_deriv_rev_fwd(double* E, double mu, double lambda, double* d2W_d2E) {
+        double dW[9] = {0.0};
+    double d2w[9] = {0.0};
+
+    for(int i  = 0; i < 9; ++i) {
+        double d2E[9] = {0.0};
+        d2E[i] = 1.0;
+        double d2mu = 0.0;
+        double d2lambda = 0.0;
+        __enzyme_fwddiff<void> ( (void*) stress_reverse, E, d2E, mu, d2mu, lambda, d2lambda, &dW, &d2w  );
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = d2w[j];
+        }   
+    }
+}
+
+void second_deriv_rev_rev(double* E, double mu, double lambda, double* d2W_d2E) {
+    for (int i = 0; i < 9; ++i) {
+    double d2E[81] = {0.0};
+    double W[9] = {0.0};
+    double d2W[9] = {0.0};
+    d2W[i] = 1.0;
+    __enzyme_autodiff<void>( stress_reverse, enzyme_dup, E, d2E, enzyme_const, mu, enzyme_const, lambda, enzyme_dup, &W, &d2W);
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * j + i] = d2E[j];
+        }
+    }
+}
+
+void second_deriv_fwd_rev(double* E, double mu, double lambda, double* d2W_d2E) {
+    for (int i = 0; i < 9; ++i) {
+    double d2E[81] = {0.0};
+    double W[9] = {0.0};
+    double d2W[9] = {0.0};
+    d2W[i] = 1.0;
+    __enzyme_autodiff<void>( stress, enzyme_dup, E, d2E, enzyme_const, mu, enzyme_const, lambda, enzyme_dup, &W, &d2W);
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = d2E[j];
+        }
+    }
+}
+
+
+void second_deriv_fwd_FD(double* E, double mu, double lambda, double* d2W_d2E, double h = 1e-7){
+    double E_plus[9] = {0.0};
+    double E_minus[9] = {0.0};
+    double dW_plus[9] = {0.0};
+    double dW_minus[9] = {0.0};
+    for(int i = 0; i < 9; ++i) {
+        for(int j = 0; j < 9; ++j) {
+            E_plus[j] = E[j];
+            E_minus[j] = E[j];
+        }
+        E_plus[i] = E[i] + h;
+        E_minus[i] = E[i] - h;
+        stress(E_plus, mu, lambda, dW_plus);
+        stress(E_minus, mu, lambda, dW_minus);
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = (dW_plus[j] - dW_minus[j]) / (2 * h);
+        }
+    }
+}
+
+void second_deriv_hand_fwd(double *E, double mu, double lambda, double* d2W_d2E) {
+    double dW[9] = {0.0};
+    double d2w[9] = {0.0};
+
+
+    for(int i  = 0; i < 9; ++i) {
+        double d2E[9] = {0.0};
+        d2E[i] = 1.0;
+        double d2mu = 0.0;
+        double d2lambda = 0.0;
+        __enzyme_fwddiff<void> ( (void*) hand_code_deriv, E, d2E, mu, d2mu, lambda, d2lambda, &dW, &d2w  );
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = d2w[j];
+        }   
+    }
+}
+
+void second_deriv_hand_FD(double* E, double mu, double lambda, double* d2W_d2E, double h = 1e-7) {
+    double E_plus[9] = {0.0};
+    double E_minus[9] = {0.0};
+    double dw_minus[9] = {0.0};
+    double dw_plus[9] = {0.0};
+    for(int i = 0; i < 9; ++i){
+        for (int j = 0; j < 9; ++j) {
+            E_plus[j] = E[j];
+            E_minus[j] = E[j];
+        }
+        E_plus[i] = E[i] + h;
+        E_minus[i] = E[i] - h;
+        hand_code_deriv(E_plus, mu, lambda, dw_plus);
+        hand_code_deriv(E_minus, mu, lambda, dw_minus);
+        for(int j = 0; j < 9; ++j) {
+            d2W_d2E[9 * i + j] = (dw_plus[j] - dw_minus[j]) / (2 * h);
+        }
+    }
+}
+
+
+
+
+
+void run_fwd_mode(double* E, double mu, double lambda, double* dw_df, int N) {
+    axom::utilities::Timer timer{ false };
+    stress(E, mu, lambda, dw_df);
+    double Dw[9] = {0.0};
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        stress(E, mu, lambda, dw_df);
+        for(int j = 0; j < 9; ++j) {
+            Dw[j] += dw_df[j];
+        }
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc fwd_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_bkwd_mode(double* E, double mu, double lambda, double* dw_dE, int N) {
+    axom::utilities::Timer timer{ false };
+
+    stress_reverse(E, mu, lambda, dw_dE);
+    double Dw[9] = {0.0};
+    
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        stress_reverse(E, mu, lambda, dw_dE);
+        for(int j = 0; j < 9; ++j) {
+            Dw[j] += dw_dE[j];
+        }
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc backward_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_hand_derivative(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    hand_code_deriv(E, mu, lambda, S);
+    double Dw[9] = {0.0};
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        hand_code_deriv(E, mu, lambda, S);
+        for(int j = 0; j < 9; ++j) {
+            Dw[j] += S[j];
+        }
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc hand_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_fwd_fwd(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_fwd_fwd(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc fwd_fwd_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_rev_fwd(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_rev_fwd(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc rev_fwd_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_fwd_rev(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_fwd_rev(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc fwd_rev_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_rev_rev(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_rev_rev(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc rev_rev_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_fwd_FD(double* E, double mu, double lambda, double* S, int N, double h) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_fwd_FD(E, mu, lambda, S, h);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc fwd_FD_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+void run_hand_fwd(double* E, double mu, double lambda, double* S, int N) {
+    axom::utilities::Timer timer{ false };
+    timer.start();
+    for(int i = 0; i < N; ++i) {
+        second_deriv_hand_fwd(E, mu, lambda, S);
+    }
+    timer.stop();
+    std::cout << axom::fmt::format( "Time to calc hand_fwd_diff: {0:f}ms", timer.elapsedTimeInMilliSec() ) << std::endl;
+}
+
+
+// int main() {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     int N = 0;
+//     std::cout << "Enter Number: ";
+//     std::cin >> N;
+//     double dw_dE[9] = {0.0};
+//     double d2W_d2E[81] = {0.0};
+//     double h = 1e-7;
+
+//     // run_fwd_mode(E, mu, lambda, dw_dE, N);
+//     // run_bkwd_mode(E, mu, lambda, dw_dE, N);
+//     // run_hand_derivative(E, mu, lambda, dw_dE, N);
+//     // run_fwd_fwd(E, mu, lambda, d2W_d2E, N);
+//     // run_rev_fwd(E, mu, lambda, d2W_d2E, N);
+//     // run_fwd_rev(E, mu, lambda, d2W_d2E, N);
+//     // run_rev_rev(E, mu, lambda, d2W_d2E, N);
+//     // run_fwd_FD(E, mu, lambda, d2W_d2E, N, h);
+//     run_hand_fwd(E, mu, lambda, d2W_d2E, N);
+
+    
+
+//     // second_deriv_hand_fwd(E, mu, lambda, d2W_d2E);
+//     // std::cout << " { ";
+//     // for(int i = 0; i < 81; ++i) {
+//     //     std::cout << d2W_d2E[i] << ", ";
+//     // }
+//     // std::cout << " }" << std::endl;
+
+//     // return 0;
+
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsAutodiff) {
+//     double F[9] = {1.0, 0.5, 0.0, 0.0, 1.2, 0.1, 0.0, 0.0, 1.0};
+//     double E[9] = {0.0};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fd[9];
+//     double dW_dF_ad[9];
+//     double J = calc_determinant(F);
+//     calc_green_lagrange(F, E);
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     stress_FD(E, J, mu, lambda, dW_dF_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling stress..." << std::endl;
+//     stress(E, J, mu, lambda, dW_dF_ad);
+//     std::cout << "Autodiff stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_ad[" << i << "] = " << dW_dF_ad[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fd[i]
+//                   << ", AD = " << dW_dF_ad[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fd[i], dW_dF_ad[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {1.0, 0.5, 0.0, 0.0, 1.2, 0.1, 0.0, 0.0, 1.0};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fd[9];
+//     double dW_dF_rev[9];
+//     std::cout << "E: ";
+// for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+// std::cout << std::endl;
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     stress_FD(E, mu, lambda, dW_dF_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling stress_reverse..." << std::endl;
+//     stress_reverse(E, mu, lambda, dW_dF_rev);
+//     std::cout << "Reverse-mode autodiff stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_rev[" << i << "] = " << dW_dF_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fd[i]
+//                   << ", Reverse AD = " << dW_dF_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fd[i], dW_dF_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fd[9];
+//     double dW_dF_hand[9];
+//     std::cout << "E: ";
+// for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+// std::cout << std::endl;
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     stress_FD(E, mu, lambda, dW_dF_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv..." << std::endl;
+//     hand_code_deriv(E, mu, lambda, dW_dF_hand);
+//     std::cout << "Hand coded stress computed:" << std::endl;
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_hand[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 9; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fd[i]
+//                   << ", Hand derivative = " << dW_dF_hand[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fd[i], dW_dF_hand[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_fwd_fwd[81];
+//     std::cout << "E: ";
+// for (int i = 0; i < 81; ++i) std::cout << E[i] << " ";
+// std::cout << std::endl;
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv..." << std::endl;
+//     second_deriv_fwd_fwd(E, mu, lambda, dW_dF_fwd_fwd);
+//     std::cout << "Hand coded stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_fwd_fwd[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_fwd_fwd[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_fwd_fwd[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_fwd_rev[81];
+
+//     std::cout << "E: ";
+//     for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+//     std::cout << std::endl;
+
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv (fwd_rev)..." << std::endl;
+//     second_deriv_fwd_rev(E, mu, lambda, dW_dF_fwd_rev);
+//     std::cout << "Hand coded stress computed (fwd_rev):" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_fwd_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_fwd_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_fwd_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsReverseAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_rev_rev[81];
+
+//     std::cout << "E: ";
+//     for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+//     std::cout << std::endl;
+
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv (rev_rev)..." << std::endl;
+//     second_deriv_rev_rev(E, mu, lambda, dW_dF_rev_rev);
+//     std::cout << "Hand coded stress computed (rev_rev):" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_rev_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_rev_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_rev_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsFwdRevAutodiff) {
+//     double E[9] = {0.1, 0.05, 0.02, 0.05, 0.2, 0.01, 0.02, 0.01, 0.15};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_fwd_rev[81];
+
+//     std::cout << "E: ";
+//     for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+//     std::cout << std::endl;
+
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv (fwd_rev)..." << std::endl;
+//     second_deriv_fwd_rev(E, mu, lambda, dW_dF_fwd_rev);
+//     std::cout << "Hand coded stress computed (fwd_rev):" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_fwd_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_fwd_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_fwd_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+
+// TEST(StrainEnergyTest, StressFiniteDifferenceVsFwdRevAutodiff) {
+//     double E[9] = {0.12, -0.03, 0.01, -0.03, 0.08, 0.02, 0.01, 0.02, 0.11};
+//     double mu = 1.0;
+//     double lambda = 1.0;
+//     double dW_dF_fwd_fd[81];
+//     double dW_dF_fwd_rev[81];
+
+//     std::cout << "E: ";
+//     for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+//     std::cout << std::endl;
+
+//     std::cout << "Calling stress_FD..." << std::endl;
+//     second_deriv_fwd_FD(E, mu, lambda, dW_dF_fwd_fd);
+//     std::cout << "Finite difference stress computed:" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_fd[" << i << "] = " << dW_dF_fwd_fd[i] << std::endl;
+//     }
+
+//     std::cout << "Calling hand_code_deriv (fwd_rev)..." << std::endl;
+//     second_deriv_hand_fwd(E, mu, lambda, dW_dF_fwd_rev);
+//     std::cout << "Hand coded stress computed (fwd_rev):" << std::endl;
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "dW_dF_hand[" << i << "] = " << dW_dF_fwd_rev[i] << std::endl;
+//     }
+
+//     for (int i = 0; i < 81; ++i) {
+//         std::cout << "Comparing index " << i << ": FD = " << dW_dF_fwd_fd[i]
+//                   << ", Hand derivative = " << dW_dF_fwd_rev[i] << std::endl;
+//         EXPECT_NEAR(dW_dF_fwd_fd[i], dW_dF_fwd_rev[i], 1e-6) << "Mismatch at index " << i;
+//     }
+// }
+
+TEST(StrainEnergyTest, StressFiniteDifferenceVsFwdRevAutodiff) {
+    double F[9] = {1.01, -0.03, 0.01, -0.03, 1.05, 0.02, 0.01, 0.02, 1.0};
+    double E[9] = {0};
+    double mu = 1.0;
+    double lambda = 1.0;
+    double* S = nullptr;
+    calc_E_from_F(F, E);
+    hand_code_deriv(E, mu, lambda, S);
+    std::cout << "{ ";
+    for (int i = 0; i < 9; ++i) {
+        std::cout << ", " << S[i];
+    }
+    std::cout << " }" << std::endl;
+    std::cout << "E: { ";
+    for (int i = 0; i < 9; ++i) {
+        std::cout << ", " << E[i];
+    }
+    std::cout << "}" << std::endl;
+
+    double dW_dF_hand_fd[81];
+    double dW_dF_hand_fwd[81];
+
+    std::cout << "E: ";
+    for (int i = 0; i < 9; ++i) std::cout << E[i] << " ";
+    std::cout << std::endl;
+
+    std::cout << "Calling stress_FD..." << std::endl;
+    second_deriv_hand_FD(E, mu, lambda, dW_dF_hand_fd);
+    std::cout << "Finite difference stress computed:" << std::endl;
+    for (int i = 0; i < 81; ++i) {
+        std::cout << "dW_dF_hand_fd[" << i << "] = " << dW_dF_hand_fd[i] << std::endl;
+    }
+
+    std::cout << "Calling hand_code_deriv (fwd_rev)..." << std::endl;
+    second_deriv_hand_fwd(E, mu, lambda, dW_dF_hand_fwd);
+    std::cout << "Hand coded stress computed (fwd_rev):" << std::endl;
+    for (int i = 0; i < 81; ++i) {
+        std::cout << "dW_dF_hand_fwd[" << i << "] = " << dW_dF_hand_fwd[i] << std::endl;
+    }
+
+    for (int i = 0; i < 81; ++i) {
+        std::cout << "Comparing index " << i << ": FD = " << dW_dF_hand_fd[i]
+                  << ", Hand derivative = " << dW_dF_hand_fwd[i] << std::endl;
+        EXPECT_NEAR(dW_dF_hand_fd[i], dW_dF_hand_fd[i], 1e-6) << "Mismatch at index " << i;
+    }
+}
\ No newline at end of file