From 8818025a72e550f1c425828c08ae2780e32b0551 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Wed, 8 Apr 2026 07:40:07 +0000 Subject: [PATCH 1/6] Add state validation checks for NaNs and OOBounds Initial validateOceanState function to check for NaNs and bounds - LayerThickness: 1e-10 to 1000 - KineticEnergyCell: 0 to 10 - Temperature: -10 to 50 - Salinity: -2 to 60 --- components/omega/src/ocn/StateValidation.cpp | 219 +++++++++++++++++ components/omega/src/ocn/StateValidation.h | 43 ++++ components/omega/test/CMakeLists.txt | 11 + .../omega/test/ocn/StateValidationTest.cpp | 221 ++++++++++++++++++ 4 files changed, 494 insertions(+) create mode 100644 components/omega/src/ocn/StateValidation.cpp create mode 100644 components/omega/src/ocn/StateValidation.h create mode 100644 components/omega/test/ocn/StateValidationTest.cpp diff --git a/components/omega/src/ocn/StateValidation.cpp b/components/omega/src/ocn/StateValidation.cpp new file mode 100644 index 000000000000..a47832c1cc2d --- /dev/null +++ b/components/omega/src/ocn/StateValidation.cpp @@ -0,0 +1,219 @@ +//===-- ocn/StateValidation.cpp - ocean state validation --------*- C++ -*-===// +// +// Validates ocean state fields by checking for NaN values and +// out-of-bounds conditions. Any failure triggers a critical error log with +// backtrace and MPI_Abort on the local communicator. +// +//===----------------------------------------------------------------------===// + +#include "StateValidation.h" + +#include "AuxiliaryState.h" +#include "DataTypes.h" +#include "Error.h" +#include "Logging.h" +#include "MachEnv.h" +#include "OceanState.h" +#include "OmegaKokkos.h" +#include "Tracers.h" +#include "mpi.h" + +#include +#include +#include +#include + +namespace OMEGA { + +//------------------------------------------------------------------------------ +// Helper: abort on the local Omega communicator with a message and backtrace +static void abortWithMessage(const std::string &Msg) { + LOG_CRITICAL("{}", Msg); + cpptrace::generate_trace().print(); + MPI_Comm Comm = MachEnv::getDefault()->getComm(); + MPI_Abort(Comm, static_cast(ErrorCode::Critical)); +} + +//------------------------------------------------------------------------------ +// Helper: count NaN entries and out-of-range entries in a 2-D Real device +// array over the first NCells/NEdges rows and NVert columns. +// Returns {NaNCount, OutOfRangeCount}. +static std::pair checkArray2D(const Array2DReal &Arr, I4 NRows, + I4 NCols, Real MinVal, Real MaxVal, + bool CheckMin) { + I4 NaNCount = 0; + I4 OutOfRangeCount = 0; + + parallelReduce( + "CheckNaN", {NRows, NCols}, + KOKKOS_LAMBDA(int Row, int Col, int &Accum) { + Real Val = Arr(Row, Col); + if (Kokkos::isnan(Val)) { + ++Accum; + } + }, + NaNCount); + + parallelReduce( + "CheckBounds", {NRows, NCols}, + KOKKOS_LAMBDA(int Row, int Col, int &Accum) { + Real Val = Arr(Row, Col); + if (!Kokkos::isnan(Val)) { + if (Val > MaxVal) { + ++Accum; + } else if (CheckMin && Val < MinVal) { + ++Accum; + } + } + }, + OutOfRangeCount); + + return {NaNCount, OutOfRangeCount}; +} + +//------------------------------------------------------------------------------ +// Helper: count NaN and out-of-range entries for a single tracer (row = cell, +// col = vert) extracted from the 3-D tracer array at the given tracer index. +static std::pair checkTracerArray(const Array3DReal &Tracers3D, + I4 TracerIdx, I4 NCells, I4 NVert, + Real MinVal, Real MaxVal) { + I4 NaNCount = 0; + I4 OutOfRangeCount = 0; + + parallelReduce( + "CheckTracerNaN", {NCells, NVert}, + KOKKOS_LAMBDA(int Cell, int K, int &Accum) { + Real Val = Tracers3D(TracerIdx, Cell, K); + if (Kokkos::isnan(Val)) { + ++Accum; + } + }, + NaNCount); + + parallelReduce( + "CheckTracerBounds", {NCells, NVert}, + KOKKOS_LAMBDA(int Cell, int K, int &Accum) { + Real Val = Tracers3D(TracerIdx, Cell, K); + if (!Kokkos::isnan(Val)) { + if (Val < MinVal || Val > MaxVal) { + ++Accum; + } + } + }, + OutOfRangeCount); + + return {NaNCount, OutOfRangeCount}; +} + +//------------------------------------------------------------------------------ +/// Validate ocean state fields for NaN and out-of-bounds conditions. +/// Aborts via MPI_Abort on failure. +void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, + I4 TimeLevel) { + + bool AnyFailure = false; + + // ------------------------------------------------------------------------- + // LayerThickness: valid range [1e-10, 1000] + // ------------------------------------------------------------------------- + { + Array2DReal LayerThick = State->getLayerThickness(TimeLevel); + auto [NaNs, OOB] = + checkArray2D(LayerThick, State->NCellsOwned, State->NVertLayers, + static_cast(1e-10), static_cast(1000.0), + /*CheckMin=*/true); + + if (NaNs > 0) { + LOG_CRITICAL( + "StateValidation: LayerThickness contains {} NaN value(s)", NaNs); + AnyFailure = true; + } + if (OOB > 0) { + LOG_CRITICAL("StateValidation: LayerThickness has {} value(s) outside " + "valid range [1e-10, 1000]", + OOB); + AnyFailure = true; + } + } + + // ------------------------------------------------------------------------- + // KineticEnergyCell: valid range [0, 10] + // ------------------------------------------------------------------------- + { + const Array2DReal &KE = AuxState->KineticAux.KineticEnergyCell; + auto [NaNs, OOB] = + checkArray2D(KE, State->NCellsOwned, State->NVertLayers, + static_cast(0.0), static_cast(10.0), + /*CheckMin=*/true); + + if (NaNs > 0) { + LOG_CRITICAL( + "StateValidation: KineticEnergyCell contains {} NaN value(s)", + NaNs); + AnyFailure = true; + } + if (OOB > 0) { + LOG_CRITICAL( + "StateValidation: KineticEnergyCell has {} value(s) outside " + "valid range [0, 10]", + OOB); + AnyFailure = true; + } + } + + // ------------------------------------------------------------------------- + // Temperature tracer: valid range [-10, 50] + // ------------------------------------------------------------------------- + if (Tracers::IndxTemp != Tracers::IndxInvalid) { + Array3DReal AllTracers = Tracers::getAll(TimeLevel); + auto [NaNs, OOB] = checkTracerArray( + AllTracers, Tracers::IndxTemp, State->NCellsOwned, State->NVertLayers, + static_cast(-10.0), static_cast(50.0)); + + if (NaNs > 0) { + LOG_CRITICAL("StateValidation: Temperature contains {} NaN value(s)", + NaNs); + AnyFailure = true; + } + if (OOB > 0) { + LOG_CRITICAL("StateValidation: Temperature has {} value(s) outside " + "valid range [-10, 50]", + OOB); + AnyFailure = true; + } + } + + // ------------------------------------------------------------------------- + // Salinity tracer: valid range [-2, 60] + // ------------------------------------------------------------------------- + if (Tracers::IndxSalt != Tracers::IndxInvalid) { + Array3DReal AllTracers = Tracers::getAll(TimeLevel); + auto [NaNs, OOB] = checkTracerArray( + AllTracers, Tracers::IndxSalt, State->NCellsOwned, State->NVertLayers, + static_cast(-2.0), static_cast(60.0)); + + if (NaNs > 0) { + LOG_CRITICAL("StateValidation: Salinity contains {} NaN value(s)", + NaNs); + AnyFailure = true; + } + if (OOB > 0) { + LOG_CRITICAL("StateValidation: Salinity has {} value(s) outside " + "valid range [-2, 60]", + OOB); + AnyFailure = true; + } + } + + // ------------------------------------------------------------------------- + // Abort if any check failed + // ------------------------------------------------------------------------- + if (AnyFailure) { + abortWithMessage("StateValidation: Ocean state validation failed. " + "See critical messages above for details."); + } +} + +} // namespace OMEGA + +//===----------------------------------------------------------------------===// diff --git a/components/omega/src/ocn/StateValidation.h b/components/omega/src/ocn/StateValidation.h new file mode 100644 index 000000000000..2395aaa76286 --- /dev/null +++ b/components/omega/src/ocn/StateValidation.h @@ -0,0 +1,43 @@ +#ifndef OMEGA_STATEVALIDATION_H +#define OMEGA_STATEVALIDATION_H +//===-- ocn/StateValidation.h - ocean state validation ----------*- C++ -*-===// +// +/// \file +/// \brief Declares the validateOceanState function for ocean state validation +/// +/// Provides a function that validates the ocean prognostic state and selected +/// auxiliary/tracer fields by checking for NaN values and out-of-bounds +/// conditions. If any check fails the function logs a critical error with a +/// backtrace and aborts via MPI_Abort on the local MPI communicator. +// +//===----------------------------------------------------------------------===// + +#include "AuxiliaryState.h" +#include "OceanState.h" +#include "Tracers.h" + +namespace OMEGA { + +/// Check ocean state fields for NaN values and out-of-bounds conditions. +/// +/// The following fields are validated: +/// - LayerThickness : [1e-10, 1000] (from OceanState) +/// - KineticEnergyCell : [0, 10] +/// (from AuxiliaryState::KineticAux) +/// - Temperature tracer : [-10, 50] (from Tracers) +/// - Salinity tracer : [-2, 60] (from Tracers) +/// +/// If any check fails a critical error is logged with an informative message +/// and a stack backtrace, and the run is aborted via MPI_Abort on the +/// communicator obtained from the default MachEnv. +/// +/// \param[in] State Ocean state to validate +/// \param[in] AuxState Auxiliary state containing KineticEnergyCell +/// \param[in] TimeLevel Time level index to validate (typically 0 = current) +void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, + I4 TimeLevel); + +} // namespace OMEGA + +//===----------------------------------------------------------------------===// +#endif // defined OMEGA_STATEVALIDATION_H diff --git a/components/omega/test/CMakeLists.txt b/components/omega/test/CMakeLists.txt index 301a04ede49c..4d3830103691 100644 --- a/components/omega/test/CMakeLists.txt +++ b/components/omega/test/CMakeLists.txt @@ -516,3 +516,14 @@ add_omega_test( ocn/VertAdvTest.cpp "-n;1" ) + +########################## +# State Validation test +########################## + +add_omega_test( + STATE_VALIDATION_TEST + testStateValidation.exe + ocn/StateValidationTest.cpp + "-n;8" +) diff --git a/components/omega/test/ocn/StateValidationTest.cpp b/components/omega/test/ocn/StateValidationTest.cpp new file mode 100644 index 000000000000..3c1baf9221f8 --- /dev/null +++ b/components/omega/test/ocn/StateValidationTest.cpp @@ -0,0 +1,221 @@ +//===-- Test driver for OMEGA StateValidation -----------------------*- C++ +//-*-===/ +// +/// \file +/// \brief Test driver for ocean state validation +/// +/// Tests the validateOceanState function by verifying that it passes on valid +/// state data. Checks cover LayerThickness, KineticEnergyCell, Temperature, +/// and Salinity fields. +// +//===-----------------------------------------------------------------------===/ + +#include "StateValidation.h" +#include "AuxiliaryState.h" +#include "Config.h" +#include "DataTypes.h" +#include "Decomp.h" +#include "Dimension.h" +#include "Error.h" +#include "Field.h" +#include "Halo.h" +#include "HorzMesh.h" +#include "IO.h" +#include "IOStream.h" +#include "Logging.h" +#include "MachEnv.h" +#include "OceanState.h" +#include "OmegaKokkos.h" +#include "Pacer.h" +#include "TimeStepper.h" +#include "Tracers.h" +#include "VertAdv.h" +#include "VertCoord.h" +#include "mpi.h" + +#include + +using namespace OMEGA; + +//------------------------------------------------------------------------------ +// Initialize the Omega subsystems required for state validation testing + +int initStateValidationTest(const std::string &MeshFile) { + int Err = 0; + + MachEnv::init(MPI_COMM_WORLD); + MachEnv *DefEnv = MachEnv::getDefault(); + MPI_Comm DefComm = DefEnv->getComm(); + + initLogging(DefEnv); + LOG_INFO("------ StateValidation unit tests ------"); + + Config("Omega"); + Config::readAll("omega.yml"); + + TimeStepper::init1(); + + IO::init(DefComm); + Decomp::init(MeshFile); + + IOStream::init(); + + int HaloErr = Halo::init(); + if (HaloErr != 0) { + Err++; + LOG_ERROR("StateValidationTest: error initializing default halo"); + } + + HorzMesh::init(); + VertCoord::init(); + Tracers::init(); + + int StateErr = OceanState::init(); + if (StateErr != 0) { + Err++; + LOG_ERROR("StateValidationTest: error initializing default state"); + } + + VertAdv::init(); + + return Err; +} + +//------------------------------------------------------------------------------ +// Fill state and auxiliary/tracer arrays with known-valid values + +static int fillValidState() { + int Err = 0; + + auto *State = OceanState::getDefault(); + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + const int NEdges = State->NEdgesAll; + + // LayerThickness: fill with 100 m (valid range [1e-10, 1000]) + Array2DReal LayerThick = State->getLayerThickness(0); + parallelFor( + "FillLayerThick", {NCells, NVert}, + KOKKOS_LAMBDA(int ICell, int K) { LayerThick(ICell, K) = 100.0; }); + + // NormalVelocity: fill with 0 (not checked, but needed for AuxState) + Array2DReal NormalVel = State->getNormalVelocity(0); + parallelFor( + "FillNormalVel", {NEdges, NVert}, + KOKKOS_LAMBDA(int IEdge, int K) { NormalVel(IEdge, K) = 0.0; }); + + // Exchange halos so auxiliary state computations are consistent + State->exchangeHalo(0); + + // Tracers: fill Temperature with 10 C and Salinity with 35 g/kg + // Use deepCopy with individual tracer subviews + if (Tracers::getNumTracers() > 0) { + // Temperature = 10.0 (valid: -10 to 50) + if (Tracers::IndxTemp != Tracers::IndxInvalid) { + Array2DReal TempArr = Tracers::getByIndex(0, Tracers::IndxTemp); + deepCopy(TempArr, static_cast(10.0)); + } + + // Salinity = 35.0 (valid: -2 to 60) + if (Tracers::IndxSalt != Tracers::IndxInvalid) { + Array2DReal SaltArr = Tracers::getByIndex(0, Tracers::IndxSalt); + deepCopy(SaltArr, static_cast(35.0)); + } + } + + return Err; +} + +//------------------------------------------------------------------------------ +// Run state validation tests + +int testStateValidation() { + int Err = 0; + + // Initialize the auxiliary state (needed for KineticEnergyCell) + AuxiliaryState::init(); + auto *DefAuxState = AuxiliaryState::getDefault(); + + if (!DefAuxState) { + Err++; + LOG_ERROR("StateValidationTest: Default AuxiliaryState not found"); + return Err; + } + + auto *DefState = OceanState::getDefault(); + if (!DefState) { + Err++; + LOG_ERROR("StateValidationTest: Default OceanState not found"); + return Err; + } + + // Fill state arrays with valid values + Err += fillValidState(); + + // Compute auxiliary variables so KineticEnergyCell is populated + { + Array3DReal AllTracers = Tracers::getAll(0); + DefAuxState->computeAll(DefState, AllTracers, 0); + } + + // Test: validation should pass on valid state (no abort expected) + LOG_INFO("StateValidationTest: Testing validation on valid state"); + validateOceanState(DefState, DefAuxState, 0); + LOG_INFO("StateValidationTest: Valid state validation PASS"); + + AuxiliaryState::clear(); + return Err; +} + +//------------------------------------------------------------------------------ +// Finalize Omega objects + +void finalizeStateValidationTest() { + Tracers::clear(); + OceanState::clear(); + VertAdv::clear(); + VertCoord::clear(); + HorzMesh::clear(); + Field::clear(); + Dimension::clear(); + TimeStepper::clear(); + Halo::clear(); + Decomp::clear(); + MachEnv::removeAll(); +} + +//------------------------------------------------------------------------------ +// Main entry point + +int main(int argc, char *argv[]) { + int RetVal = 0; + + MPI_Init(&argc, &argv); + Kokkos::initialize(argc, argv); + Pacer::initialize(MPI_COMM_WORLD); + Pacer::setPrefix("Omega:"); + + { + int Err = initStateValidationTest("OmegaMesh.nc"); + if (Err != 0) { + LOG_CRITICAL("StateValidationTest: Error during initialization"); + } else { + RetVal += testStateValidation(); + } + finalizeStateValidationTest(); + } + + if (RetVal == 0) + LOG_INFO("------ StateValidation unit tests successful ------"); + + Pacer::finalize(); + Kokkos::finalize(); + MPI_Finalize(); + + if (RetVal >= 256) + RetVal = 255; + + return RetVal; + +} // end of main +//===-----------------------------------------------------------------------===/ From ccacffa8309eeba5af6e039c1229c289cfc8c2cb Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Mon, 13 Apr 2026 14:40:10 +0000 Subject: [PATCH 2/6] Add initial design doc for state validation --- .../omega/doc/design/StateValidation.md | 249 ++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 components/omega/doc/design/StateValidation.md diff --git a/components/omega/doc/design/StateValidation.md b/components/omega/doc/design/StateValidation.md new file mode 100644 index 000000000000..e5540e5e82ba --- /dev/null +++ b/components/omega/doc/design/StateValidation.md @@ -0,0 +1,249 @@ + + +(omega-design-state-validation)= + +# Ocean State Validation + +## 1 Overview + +The Ocean State Validation module provides a mechanism for checking the +physical plausibility of the ocean prognostic state and selected auxiliary +variables at runtime. After each timestep (or at user-defined intervals) the +model can call `validateOceanState` to scan every owned cell and vertical layer +for NaN (Not-a-Number) values and values that lie outside a pre-defined +physically meaningful range. Any detected anomaly is reported through the +OMEGA logging infrastructure as a critical error together with a full stack +backtrace; the run is then terminated via `MPI_Abort` so that corrupted output +is not silently written to disk. + +## 2 Requirements + +### 2.1 Requirement: Check for NaN values + +The validation function must detect NaN values in the ocean state fields. +NaNs indicate a numerical instability or a programming error, and their +presence in the prognostic state makes continued time-stepping meaningless. +Every element of each validated array must be tested. + +### 2.2 Requirement: Check for out-of-bounds values + +In addition to NaN detection, the validation function must check that every +element of each validated field lies within a physically plausible range. +Values outside these ranges indicate catastrophic model failure and should +halt the simulation before corrupted output is written to disk. + +### 2.3 Requirement: Validate LayerThickness + +`LayerThickness` must be validated for each owned cell over all vertical +layers. The valid range is $[10^{-10},\, 1000]$ m. +Negative or near-zero layer thicknesses indicate numerical collapse of the +column and must be caught immediately. + +### 2.4 Requirement: Validate KineticEnergyCell + +`KineticEnergyCell` from the kinematic auxiliary state must be validated for +each owned cell over all vertical layers. The valid range is $[0,\, 10]$ +m$^2$ s$^{-2}$. Negative kinetic energies are unphysical, and values +exceeding 10 m$^2$ s$^{-2}$ correspond to current speeds above +$\sim 4.5$ m s$^{-1}$, which are unrealistic for the open ocean. + +### 2.5 Requirement: Validate Temperature tracer + +Ocean Conservative Temperature must be validated for each owned cell over +all vertical layers. The valid range is $[-10,\, 50]$ °C. +This broad range accommodates all realistic oceanographic regimes including +polar and hydrothermal vent environments. + +### 2.6 Requirement: Validate Salinity tracer + +Ocean Absolute Salinity must be validated for each owned cell over all +vertical layers. The valid range is $[-2,\, 60]$ g kg$^{-1}$. +Values below $-2$ g kg$^{-1}$ are unphysical and values above 60 g kg$^{-1}$ +are outside the valid domain of the TEOS-10 equation of state. + +### 2.7 Requirement: GPU/CPU portability + +All validation kernels must execute on both CPU and GPU hardware using the +Kokkos parallel programming model and therefore must be expressed as Kokkos +parallel reductions. + +### 2.8 Requirement: Informative error reporting + +On detection of any failure the module must log a critical-level message that +identifies the field name, the nature of the problem (NaN or out-of-bounds), +and the number of offending elements. After all fields are checked the +module must additionally print a stack backtrace to assist with debugging, +then abort the run via `MPI_Abort`. + +### 2.9 Requirement: Graceful handling of absent tracers + +If the Temperature or Salinity tracer is not present in the tracer registry +(e.g. in configurations that do not activate active tracers) the +corresponding check must be skipped silently rather than causing an error. + +### 2.10 Desired: Configurable valid ranges + +In the future it may be desirable to allow the user to override the default +valid ranges through the OMEGA configuration system (e.g. for idealised +process studies that intentionally use non-oceanic parameter values). + +### 2.11 Desired: Configurable validation frequency + +In the future it may be desirable to allow the user to control whether +validation is performed every timestep, every N timesteps, or only at +specific points in the run (e.g. after restart reads). + +## 3 Algorithmic Formulation + +No complex numerical algorithms are required. Each field is checked with two +independent `parallelReduce` passes over the domain: + +1. **NaN pass** – counts elements for which `Kokkos::isnan(val)` is `true`. +2. **Bounds pass** – counts elements that are finite yet lie outside + $[\text{MinVal},\, \text{MaxVal}]$. + +Separating the two passes avoids potentially undefined behaviour when +comparing NaN values with `<` or `>`. + +For a 2-D field $f_{i,k}$ with $i \in [0,\, N_\text{cells})$ and +$k \in [0,\, N_\text{vert})$ the two counts are: + +$$ +N_\text{NaN} = \sum_{i,k} \mathbf{1}[\,\text{isnan}(f_{i,k})\,] +$$ + +$$ +N_\text{OOB} = \sum_{i,k} \mathbf{1}[\,\lnot\,\text{isnan}(f_{i,k}) + \land (f_{i,k} < f_\text{min} \lor f_{i,k} > f_\text{max})\,] +$$ + +For a 3-D tracer array $T_{n,i,k}$ the same expressions are applied at a +fixed tracer index $n$. + +The validation traverses only the `NCellsOwned` cells (excluding halo cells) +to avoid double-counting in the parallel decomposition. + +## 4 Design + +The module is implemented as a free function (`validateOceanState`) plus two +file-local helper functions. It does not introduce a class or persistent state. + +### 4.1 Data types and parameters + +#### 4.1.1 Parameters + +The valid ranges for each field are compile-time constants embedded in the +implementation: + +| Field | MinVal | MaxVal | +|----------------------|-----------|--------| +| `LayerThickness` | 1×10⁻¹⁰ | 1000 | +| `KineticEnergyCell` | 0 | 10 | +| `Temperature` | −10 | 50 | +| `Salinity` | −2 | 60 | + +#### 4.1.2 Class/structs/data types + +No new classes or data types are introduced. The module uses the existing +`OceanState`, `AuxiliaryState`, and `Tracers` types from the OMEGA ocean +component. + +### 4.2 Methods + +#### 4.2.1 `validateOceanState` (public) + +The sole public interface of the module: + +```c++ +void validateOceanState(const OceanState *State, + const AuxiliaryState *AuxState, + I4 TimeLevel); +``` + +Validates all fields described in Section 2. Logs critical errors for each +failed check and aborts the run if any check fails. `TimeLevel` specifies +the time-level index within the state arrays to validate (0 = current +timestep). + +#### 4.2.2 `checkArray2D` (file-local helper) + +```c++ +static std::pair checkArray2D(const Array2DReal &Arr, + I4 NRows, I4 NCols, + Real MinVal, Real MaxVal, + bool CheckMin); +``` + +Performs the NaN and bounds counts for a 2-D device array over the first +`NRows` rows and `NCols` columns. When `CheckMin` is `false` only the upper +bound is enforced (not needed for any current field but useful for future +extension). Returns `{NaNCount, OutOfRangeCount}`. + +#### 4.2.3 `checkTracerArray` (file-local helper) + +```c++ +static std::pair checkTracerArray(const Array3DReal &Tracers3D, + I4 TracerIdx, + I4 NCells, I4 NVert, + Real MinVal, Real MaxVal); +``` + +Performs the NaN and bounds counts for a single tracer slice (identified by +`TracerIdx`) of the 3-D tracer array. Returns `{NaNCount, OutOfRangeCount}`. + +#### 4.2.4 `abortWithMessage` (file-local helper) + +```c++ +static void abortWithMessage(const std::string &Msg); +``` + +Logs `Msg` at critical severity, prints a stack backtrace using `cpptrace`, +then calls `MPI_Abort` on the default OMEGA communicator with error code +`ErrorCode::Critical`. + +## 5 Verification and Testing + +### 5.1 Test: Valid state passes without abort + +A unit test constructs a minimal OMEGA environment (MachEnv, Decomp, +HorzMesh, VertCoord, Tracers, OceanState, AuxiliaryState) using the standard +test mesh `OmegaMesh.nc`. All state arrays are filled with physically +plausible values: + +- `LayerThickness` = 100 m (valid range [1×10⁻¹⁰, 1000]) +- `NormalVelocity` = 0 m s⁻¹ (not directly validated but required for + `KineticEnergyCell` to be zero) +- `Temperature` = 10 °C (valid range [−10, 50]) +- `Salinity` = 35 g kg⁻¹ (valid range [−2, 60]) + +`AuxiliaryState::computeAll` is called to populate `KineticEnergyCell` +before `validateOceanState` is invoked. + +The test passes if `validateOceanState` returns without calling `MPI_Abort`. + +Tests requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.9. + +### 5.2 Test: NaN in LayerThickness triggers abort + +A future test should set a subset of `LayerThickness` entries to `NaN` and +verify that `validateOceanState` detects and logs the error and calls +`MPI_Abort`. Because `MPI_Abort` terminates the process this test would +need to be run in a separate executable or with a death-test framework. + +Tests requirement: 2.1, 2.3, 2.8. + +### 5.3 Test: Out-of-bounds value in Temperature triggers abort + +A future test should set a subset of `Temperature` entries to a value +outside [−10, 50] (e.g. 999 °C) and verify that `validateOceanState` +detects and logs the error. + +Tests requirement: 2.2, 2.5, 2.8. + +### 5.4 Test: Missing tracer is skipped gracefully + +A future test should invoke `validateOceanState` in a configuration where +neither Temperature nor Salinity tracers are registered and verify that the +function completes without error. + +Tests requirement: 2.9. From 27d4fe0ebf20daf1381aaf5a73db009219b54271 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Fri, 17 Apr 2026 22:23:26 +0000 Subject: [PATCH 3/6] Add design/StateValidation.md to doc/index.md toctree --- components/omega/doc/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/components/omega/doc/index.md b/components/omega/doc/index.md index 6712bb3e7f8d..8d1b38501df8 100644 --- a/components/omega/doc/index.md +++ b/components/omega/doc/index.md @@ -125,6 +125,7 @@ design/IO design/IOStreams design/Reductions design/State +design/StateValidation design/SubmesoscaleEddies design/Tendency design/Tendencies From cbbecf7ebfa0f3855bd260267acc8b4038ba3934 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Fri, 17 Apr 2026 23:12:41 +0000 Subject: [PATCH 4/6] Fix checks for invalid tracer indices --- components/omega/src/ocn/StateValidation.cpp | 4 ++-- components/omega/test/ocn/StateValidationTest.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/components/omega/src/ocn/StateValidation.cpp b/components/omega/src/ocn/StateValidation.cpp index a47832c1cc2d..9dce6eba89df 100644 --- a/components/omega/src/ocn/StateValidation.cpp +++ b/components/omega/src/ocn/StateValidation.cpp @@ -164,7 +164,7 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, // ------------------------------------------------------------------------- // Temperature tracer: valid range [-10, 50] // ------------------------------------------------------------------------- - if (Tracers::IndxTemp != Tracers::IndxInvalid) { + if (Tracers::IndxTemp != -1) { Array3DReal AllTracers = Tracers::getAll(TimeLevel); auto [NaNs, OOB] = checkTracerArray( AllTracers, Tracers::IndxTemp, State->NCellsOwned, State->NVertLayers, @@ -186,7 +186,7 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, // ------------------------------------------------------------------------- // Salinity tracer: valid range [-2, 60] // ------------------------------------------------------------------------- - if (Tracers::IndxSalt != Tracers::IndxInvalid) { + if (Tracers::IndxSalt != -1) { Array3DReal AllTracers = Tracers::getAll(TimeLevel); auto [NaNs, OOB] = checkTracerArray( AllTracers, Tracers::IndxSalt, State->NCellsOwned, State->NVertLayers, diff --git a/components/omega/test/ocn/StateValidationTest.cpp b/components/omega/test/ocn/StateValidationTest.cpp index 3c1baf9221f8..8639db22d464 100644 --- a/components/omega/test/ocn/StateValidationTest.cpp +++ b/components/omega/test/ocn/StateValidationTest.cpp @@ -111,13 +111,13 @@ static int fillValidState() { // Use deepCopy with individual tracer subviews if (Tracers::getNumTracers() > 0) { // Temperature = 10.0 (valid: -10 to 50) - if (Tracers::IndxTemp != Tracers::IndxInvalid) { + if (Tracers::IndxTemp != -1) { Array2DReal TempArr = Tracers::getByIndex(0, Tracers::IndxTemp); deepCopy(TempArr, static_cast(10.0)); } // Salinity = 35.0 (valid: -2 to 60) - if (Tracers::IndxSalt != Tracers::IndxInvalid) { + if (Tracers::IndxSalt != -1) { Array2DReal SaltArr = Tracers::getByIndex(0, Tracers::IndxSalt); deepCopy(SaltArr, static_cast(35.0)); } From 00312061a74dbbd43812386bf2b32111753a1bce Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Mon, 27 Apr 2026 21:58:13 +0000 Subject: [PATCH 5/6] Limit checks to active cells --- .../omega/doc/design/StateValidation.md | 37 +++++++++++------- components/omega/src/ocn/StateValidation.cpp | 38 ++++++++++++++----- components/omega/src/ocn/StateValidation.h | 6 ++- .../omega/test/ocn/StateValidationTest.cpp | 2 +- 4 files changed, 59 insertions(+), 24 deletions(-) diff --git a/components/omega/doc/design/StateValidation.md b/components/omega/doc/design/StateValidation.md index e5540e5e82ba..0a431e748820 100644 --- a/components/omega/doc/design/StateValidation.md +++ b/components/omega/doc/design/StateValidation.md @@ -96,7 +96,9 @@ specific points in the run (e.g. after restart reads). ## 3 Algorithmic Formulation No complex numerical algorithms are required. Each field is checked with two -independent `parallelReduce` passes over the domain: +independent `parallelReduce` passes over the domain, restricted to active +cells where `CellMask(i, k) > 0` (inactive cells, such as land cells, are +skipped): 1. **NaN pass** – counts elements for which `Kokkos::isnan(val)` is `true`. 2. **Bounds pass** – counts elements that are finite yet lie outside @@ -109,14 +111,17 @@ For a 2-D field $f_{i,k}$ with $i \in [0,\, N_\text{cells})$ and $k \in [0,\, N_\text{vert})$ the two counts are: $$ -N_\text{NaN} = \sum_{i,k} \mathbf{1}[\,\text{isnan}(f_{i,k})\,] +N_\text{NaN} = \sum_{i,k} M_{i,k}\,\mathbf{1}[\,\text{isnan}(f_{i,k})\,] $$ $$ -N_\text{OOB} = \sum_{i,k} \mathbf{1}[\,\lnot\,\text{isnan}(f_{i,k}) +N_\text{OOB} = \sum_{i,k} M_{i,k}\,\mathbf{1}[\,\lnot\,\text{isnan}(f_{i,k}) \land (f_{i,k} < f_\text{min} \lor f_{i,k} > f_\text{max})\,] $$ +where $M_{i,k}$ is `CellMask(i, k)` (1 for active cell-layer, 0 for +inactive). + For a 3-D tracer array $T_{n,i,k}$ the same expressions are applied at a fixed tracer index $n$. @@ -145,8 +150,8 @@ implementation: #### 4.1.2 Class/structs/data types No new classes or data types are introduced. The module uses the existing -`OceanState`, `AuxiliaryState`, and `Tracers` types from the OMEGA ocean -component. +`OceanState`, `AuxiliaryState`, `VertCoord`, and `Tracers` types from the +OMEGA ocean component. ### 4.2 Methods @@ -157,13 +162,15 @@ The sole public interface of the module: ```c++ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, + const VertCoord *VCoord, I4 TimeLevel); ``` -Validates all fields described in Section 2. Logs critical errors for each -failed check and aborts the run if any check fails. `TimeLevel` specifies -the time-level index within the state arrays to validate (0 = current -timestep). +Validates all fields described in Section 2, skipping inactive cells +(`CellMask == 0`). Logs critical errors for each failed check and aborts the +run if any check fails. `VCoord` supplies the `CellMask` array. `TimeLevel` +specifies the time-level index within the state arrays to validate (0 = +current timestep). #### 4.2.2 `checkArray2D` (file-local helper) @@ -171,11 +178,13 @@ timestep). static std::pair checkArray2D(const Array2DReal &Arr, I4 NRows, I4 NCols, Real MinVal, Real MaxVal, - bool CheckMin); + bool CheckMin, + const Array2DReal &CellMask); ``` Performs the NaN and bounds counts for a 2-D device array over the first -`NRows` rows and `NCols` columns. When `CheckMin` is `false` only the upper +`NRows` rows and `NCols` columns, restricted to active cells +(`CellMask(Row, Col) > 0`). When `CheckMin` is `false` only the upper bound is enforced (not needed for any current field but useful for future extension). Returns `{NaNCount, OutOfRangeCount}`. @@ -185,11 +194,13 @@ extension). Returns `{NaNCount, OutOfRangeCount}`. static std::pair checkTracerArray(const Array3DReal &Tracers3D, I4 TracerIdx, I4 NCells, I4 NVert, - Real MinVal, Real MaxVal); + Real MinVal, Real MaxVal, + const Array2DReal &CellMask); ``` Performs the NaN and bounds counts for a single tracer slice (identified by -`TracerIdx`) of the 3-D tracer array. Returns `{NaNCount, OutOfRangeCount}`. +`TracerIdx`) of the 3-D tracer array, restricted to active cells +(`CellMask(Cell, K) > 0`). Returns `{NaNCount, OutOfRangeCount}`. #### 4.2.4 `abortWithMessage` (file-local helper) diff --git a/components/omega/src/ocn/StateValidation.cpp b/components/omega/src/ocn/StateValidation.cpp index 9dce6eba89df..0fd40212f30c 100644 --- a/components/omega/src/ocn/StateValidation.cpp +++ b/components/omega/src/ocn/StateValidation.cpp @@ -16,6 +16,7 @@ #include "OceanState.h" #include "OmegaKokkos.h" #include "Tracers.h" +#include "VertCoord.h" #include "mpi.h" #include @@ -36,17 +37,22 @@ static void abortWithMessage(const std::string &Msg) { //------------------------------------------------------------------------------ // Helper: count NaN entries and out-of-range entries in a 2-D Real device -// array over the first NCells/NEdges rows and NVert columns. +// array over the first NCells/NEdges rows and NVert columns, restricted to +// active cells (CellMask > 0). // Returns {NaNCount, OutOfRangeCount}. static std::pair checkArray2D(const Array2DReal &Arr, I4 NRows, I4 NCols, Real MinVal, Real MaxVal, - bool CheckMin) { + bool CheckMin, + const Array2DReal &CellMask) { I4 NaNCount = 0; I4 OutOfRangeCount = 0; parallelReduce( "CheckNaN", {NRows, NCols}, KOKKOS_LAMBDA(int Row, int Col, int &Accum) { + if (CellMask(Row, Col) == 0) { + return; + } Real Val = Arr(Row, Col); if (Kokkos::isnan(Val)) { ++Accum; @@ -57,6 +63,9 @@ static std::pair checkArray2D(const Array2DReal &Arr, I4 NRows, parallelReduce( "CheckBounds", {NRows, NCols}, KOKKOS_LAMBDA(int Row, int Col, int &Accum) { + if (CellMask(Row, Col) == 0) { + return; + } Real Val = Arr(Row, Col); if (!Kokkos::isnan(Val)) { if (Val > MaxVal) { @@ -73,16 +82,21 @@ static std::pair checkArray2D(const Array2DReal &Arr, I4 NRows, //------------------------------------------------------------------------------ // Helper: count NaN and out-of-range entries for a single tracer (row = cell, -// col = vert) extracted from the 3-D tracer array at the given tracer index. +// col = vert) extracted from the 3-D tracer array at the given tracer index, +// restricted to active cells (CellMask > 0). static std::pair checkTracerArray(const Array3DReal &Tracers3D, I4 TracerIdx, I4 NCells, I4 NVert, - Real MinVal, Real MaxVal) { + Real MinVal, Real MaxVal, + const Array2DReal &CellMask) { I4 NaNCount = 0; I4 OutOfRangeCount = 0; parallelReduce( "CheckTracerNaN", {NCells, NVert}, KOKKOS_LAMBDA(int Cell, int K, int &Accum) { + if (CellMask(Cell, K) == 0) { + return; + } Real Val = Tracers3D(TracerIdx, Cell, K); if (Kokkos::isnan(Val)) { ++Accum; @@ -93,6 +107,9 @@ static std::pair checkTracerArray(const Array3DReal &Tracers3D, parallelReduce( "CheckTracerBounds", {NCells, NVert}, KOKKOS_LAMBDA(int Cell, int K, int &Accum) { + if (CellMask(Cell, K) == 0) { + return; + } Real Val = Tracers3D(TracerIdx, Cell, K); if (!Kokkos::isnan(Val)) { if (Val < MinVal || Val > MaxVal) { @@ -107,12 +124,15 @@ static std::pair checkTracerArray(const Array3DReal &Tracers3D, //------------------------------------------------------------------------------ /// Validate ocean state fields for NaN and out-of-bounds conditions. +/// Only active cells (where CellMask > 0) are checked. /// Aborts via MPI_Abort on failure. void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, - I4 TimeLevel) { + const VertCoord *VCoord, I4 TimeLevel) { bool AnyFailure = false; + const Array2DReal &CellMask = VCoord->CellMask; + // ------------------------------------------------------------------------- // LayerThickness: valid range [1e-10, 1000] // ------------------------------------------------------------------------- @@ -121,7 +141,7 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, auto [NaNs, OOB] = checkArray2D(LayerThick, State->NCellsOwned, State->NVertLayers, static_cast(1e-10), static_cast(1000.0), - /*CheckMin=*/true); + /*CheckMin=*/true, CellMask); if (NaNs > 0) { LOG_CRITICAL( @@ -144,7 +164,7 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, auto [NaNs, OOB] = checkArray2D(KE, State->NCellsOwned, State->NVertLayers, static_cast(0.0), static_cast(10.0), - /*CheckMin=*/true); + /*CheckMin=*/true, CellMask); if (NaNs > 0) { LOG_CRITICAL( @@ -168,7 +188,7 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, Array3DReal AllTracers = Tracers::getAll(TimeLevel); auto [NaNs, OOB] = checkTracerArray( AllTracers, Tracers::IndxTemp, State->NCellsOwned, State->NVertLayers, - static_cast(-10.0), static_cast(50.0)); + static_cast(-10.0), static_cast(50.0), CellMask); if (NaNs > 0) { LOG_CRITICAL("StateValidation: Temperature contains {} NaN value(s)", @@ -190,7 +210,7 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, Array3DReal AllTracers = Tracers::getAll(TimeLevel); auto [NaNs, OOB] = checkTracerArray( AllTracers, Tracers::IndxSalt, State->NCellsOwned, State->NVertLayers, - static_cast(-2.0), static_cast(60.0)); + static_cast(-2.0), static_cast(60.0), CellMask); if (NaNs > 0) { LOG_CRITICAL("StateValidation: Salinity contains {} NaN value(s)", diff --git a/components/omega/src/ocn/StateValidation.h b/components/omega/src/ocn/StateValidation.h index 2395aaa76286..30b543423c25 100644 --- a/components/omega/src/ocn/StateValidation.h +++ b/components/omega/src/ocn/StateValidation.h @@ -15,11 +15,14 @@ #include "AuxiliaryState.h" #include "OceanState.h" #include "Tracers.h" +#include "VertCoord.h" namespace OMEGA { /// Check ocean state fields for NaN values and out-of-bounds conditions. /// +/// Only active ocean cells (where CellMask > 0) are checked. +/// /// The following fields are validated: /// - LayerThickness : [1e-10, 1000] (from OceanState) /// - KineticEnergyCell : [0, 10] @@ -33,9 +36,10 @@ namespace OMEGA { /// /// \param[in] State Ocean state to validate /// \param[in] AuxState Auxiliary state containing KineticEnergyCell +/// \param[in] VCoord Vertical coordinate containing the CellMask /// \param[in] TimeLevel Time level index to validate (typically 0 = current) void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, - I4 TimeLevel); + const VertCoord *VCoord, I4 TimeLevel); } // namespace OMEGA diff --git a/components/omega/test/ocn/StateValidationTest.cpp b/components/omega/test/ocn/StateValidationTest.cpp index 8639db22d464..4125c85c3c53 100644 --- a/components/omega/test/ocn/StateValidationTest.cpp +++ b/components/omega/test/ocn/StateValidationTest.cpp @@ -160,7 +160,7 @@ int testStateValidation() { // Test: validation should pass on valid state (no abort expected) LOG_INFO("StateValidationTest: Testing validation on valid state"); - validateOceanState(DefState, DefAuxState, 0); + validateOceanState(DefState, DefAuxState, VertCoord::getDefault(), 0); LOG_INFO("StateValidationTest: Valid state validation PASS"); AuxiliaryState::clear(); From 18e934b23dea5c5742453164d14251c7e4f58049 Mon Sep 17 00:00:00 2001 From: Azamat Mametjanov Date: Tue, 28 Apr 2026 02:24:33 +0000 Subject: [PATCH 6/6] Expand ctest with injection of NaN and OOB values without aborting --- .../omega/doc/design/StateValidation.md | 81 ++++--- components/omega/src/ocn/StateValidation.cpp | 41 ++-- components/omega/src/ocn/StateValidation.h | 33 ++- .../omega/test/ocn/StateValidationTest.cpp | 214 +++++++++++++++++- 4 files changed, 302 insertions(+), 67 deletions(-) diff --git a/components/omega/doc/design/StateValidation.md b/components/omega/doc/design/StateValidation.md index 0a431e748820..01bcf131a905 100644 --- a/components/omega/doc/design/StateValidation.md +++ b/components/omega/doc/design/StateValidation.md @@ -155,9 +155,25 @@ OMEGA ocean component. ### 4.2 Methods -#### 4.2.1 `validateOceanState` (public) +#### 4.2.1 `checkOceanState` (public) -The sole public interface of the module: +Performs all field checks and returns the total count of errors found: + +```c++ +I4 checkOceanState(const OceanState *State, + const AuxiliaryState *AuxState, + const VertCoord *VCoord, + I4 TimeLevel); +``` + +Checks all fields described in Section 2, skipping inactive cells +(`CellMask == 0`). Logs critical messages for each type of error. Returns +the total number of errors as an `I4`; returns 0 if all checks pass. Does +**not** abort. Suitable for calling from tests. + +#### 4.2.2 `validateOceanState` (public) + +Production entry-point that aborts on failure: ```c++ void validateOceanState(const OceanState *State, @@ -166,13 +182,10 @@ void validateOceanState(const OceanState *State, I4 TimeLevel); ``` -Validates all fields described in Section 2, skipping inactive cells -(`CellMask == 0`). Logs critical errors for each failed check and aborts the -run if any check fails. `VCoord` supplies the `CellMask` array. `TimeLevel` -specifies the time-level index within the state arrays to validate (0 = -current timestep). +Calls `checkOceanState` and aborts via `MPI_Abort` if the return value is +greater than zero. -#### 4.2.2 `checkArray2D` (file-local helper) +#### 4.2.3 `checkArray2D` (file-local helper) ```c++ static std::pair checkArray2D(const Array2DReal &Arr, @@ -188,7 +201,7 @@ Performs the NaN and bounds counts for a 2-D device array over the first bound is enforced (not needed for any current field but useful for future extension). Returns `{NaNCount, OutOfRangeCount}`. -#### 4.2.3 `checkTracerArray` (file-local helper) +#### 4.2.4 `checkTracerArray` (file-local helper) ```c++ static std::pair checkTracerArray(const Array3DReal &Tracers3D, @@ -202,7 +215,7 @@ Performs the NaN and bounds counts for a single tracer slice (identified by `TracerIdx`) of the 3-D tracer array, restricted to active cells (`CellMask(Cell, K) > 0`). Returns `{NaNCount, OutOfRangeCount}`. -#### 4.2.4 `abortWithMessage` (file-local helper) +#### 4.2.5 `abortWithMessage` (file-local helper) ```c++ static void abortWithMessage(const std::string &Msg); @@ -234,27 +247,27 @@ The test passes if `validateOceanState` returns without calling `MPI_Abort`. Tests requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.9. -### 5.2 Test: NaN in LayerThickness triggers abort - -A future test should set a subset of `LayerThickness` entries to `NaN` and -verify that `validateOceanState` detects and logs the error and calls -`MPI_Abort`. Because `MPI_Abort` terminates the process this test would -need to be run in a separate executable or with a death-test framework. - -Tests requirement: 2.1, 2.3, 2.8. - -### 5.3 Test: Out-of-bounds value in Temperature triggers abort - -A future test should set a subset of `Temperature` entries to a value -outside [−10, 50] (e.g. 999 °C) and verify that `validateOceanState` -detects and logs the error. - -Tests requirement: 2.2, 2.5, 2.8. - -### 5.4 Test: Missing tracer is skipped gracefully - -A future test should invoke `validateOceanState` in a configuration where -neither Temperature nor Salinity tracers are registered and verify that the -function completes without error. - -Tests requirement: 2.9. +### 5.2 Negative tests: invalid values are detected + +The public `checkOceanState` function is used for negative tests so that +errors can be detected without triggering `MPI_Abort`. Each sub-test: +1. Resets the state to valid values via `restoreValidState`. +2. Injects a single type of invalid value (NaN or OOB) into one field using + a `parallelFor` kernel that overwrites all owned cell-layer entries. +3. Calls `checkOceanState` and verifies a non-zero error count is returned. + +The following sub-tests are implemented: + +| Sub-test | Injected value | Field | +|-------------------------------|------------------------|--------------------| +| `testNaNLayerThickness` | NaN | LayerThickness | +| `testOOBHighLayerThickness` | 2000 m (> max 1000 m) | LayerThickness | +| `testOOBLowLayerThickness` | −1 m (< min 1×10⁻¹⁰) | LayerThickness | +| `testNaNKineticEnergy` | NaN | KineticEnergyCell | +| `testOOBKineticEnergy` | 9999 J kg⁻¹ (> max 10) | KineticEnergyCell | +| `testNaNTemperature` | NaN | Temperature | +| `testOOBTemperature` | 9999 °C (> max 50) | Temperature | +| `testNaNSalinity` | NaN | Salinity | +| `testOOBSalinity` | 9999 g kg⁻¹ (> max 60) | Salinity | + +Tests requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8. diff --git a/components/omega/src/ocn/StateValidation.cpp b/components/omega/src/ocn/StateValidation.cpp index 0fd40212f30c..84e6f382d9f1 100644 --- a/components/omega/src/ocn/StateValidation.cpp +++ b/components/omega/src/ocn/StateValidation.cpp @@ -123,13 +123,13 @@ static std::pair checkTracerArray(const Array3DReal &Tracers3D, } //------------------------------------------------------------------------------ -/// Validate ocean state fields for NaN and out-of-bounds conditions. +/// Check ocean state fields for NaN and out-of-bounds conditions. /// Only active cells (where CellMask > 0) are checked. -/// Aborts via MPI_Abort on failure. -void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, - const VertCoord *VCoord, I4 TimeLevel) { +/// Returns the total count of errors found; does not abort. +I4 checkOceanState(const OceanState *State, const AuxiliaryState *AuxState, + const VertCoord *VCoord, I4 TimeLevel) { - bool AnyFailure = false; + I4 TotalErrors = 0; const Array2DReal &CellMask = VCoord->CellMask; @@ -146,13 +146,13 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, if (NaNs > 0) { LOG_CRITICAL( "StateValidation: LayerThickness contains {} NaN value(s)", NaNs); - AnyFailure = true; + TotalErrors += NaNs; } if (OOB > 0) { LOG_CRITICAL("StateValidation: LayerThickness has {} value(s) outside " "valid range [1e-10, 1000]", OOB); - AnyFailure = true; + TotalErrors += OOB; } } @@ -170,14 +170,14 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, LOG_CRITICAL( "StateValidation: KineticEnergyCell contains {} NaN value(s)", NaNs); - AnyFailure = true; + TotalErrors += NaNs; } if (OOB > 0) { LOG_CRITICAL( "StateValidation: KineticEnergyCell has {} value(s) outside " "valid range [0, 10]", OOB); - AnyFailure = true; + TotalErrors += OOB; } } @@ -193,13 +193,13 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, if (NaNs > 0) { LOG_CRITICAL("StateValidation: Temperature contains {} NaN value(s)", NaNs); - AnyFailure = true; + TotalErrors += NaNs; } if (OOB > 0) { LOG_CRITICAL("StateValidation: Temperature has {} value(s) outside " "valid range [-10, 50]", OOB); - AnyFailure = true; + TotalErrors += OOB; } } @@ -215,20 +215,27 @@ void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, if (NaNs > 0) { LOG_CRITICAL("StateValidation: Salinity contains {} NaN value(s)", NaNs); - AnyFailure = true; + TotalErrors += NaNs; } if (OOB > 0) { LOG_CRITICAL("StateValidation: Salinity has {} value(s) outside " "valid range [-2, 60]", OOB); - AnyFailure = true; + TotalErrors += OOB; } } - // ------------------------------------------------------------------------- - // Abort if any check failed - // ------------------------------------------------------------------------- - if (AnyFailure) { + return TotalErrors; +} + +//------------------------------------------------------------------------------ +/// Validate ocean state fields for NaN and out-of-bounds conditions. +/// Only active cells (where CellMask > 0) are checked. +/// Aborts via MPI_Abort on failure. +void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, + const VertCoord *VCoord, I4 TimeLevel) { + + if (checkOceanState(State, AuxState, VCoord, TimeLevel) > 0) { abortWithMessage("StateValidation: Ocean state validation failed. " "See critical messages above for details."); } diff --git a/components/omega/src/ocn/StateValidation.h b/components/omega/src/ocn/StateValidation.h index 30b543423c25..bf49ba74f63d 100644 --- a/components/omega/src/ocn/StateValidation.h +++ b/components/omega/src/ocn/StateValidation.h @@ -3,12 +3,13 @@ //===-- ocn/StateValidation.h - ocean state validation ----------*- C++ -*-===// // /// \file -/// \brief Declares the validateOceanState function for ocean state validation +/// \brief Declares state validation functions for ocean state validation /// -/// Provides a function that validates the ocean prognostic state and selected -/// auxiliary/tracer fields by checking for NaN values and out-of-bounds -/// conditions. If any check fails the function logs a critical error with a -/// backtrace and aborts via MPI_Abort on the local MPI communicator. +/// Provides two functions: +/// - checkOceanState: checks for NaN and out-of-bounds conditions and +/// returns the total error count without aborting. Suitable for testing. +/// - validateOceanState: calls checkOceanState and aborts via MPI_Abort if +/// any errors are found. // //===----------------------------------------------------------------------===// @@ -19,8 +20,30 @@ namespace OMEGA { +/// Check ocean state fields for NaN values and out-of-bounds conditions +/// without aborting, returning the total count of errors found. +/// +/// Only active ocean cells (where CellMask > 0) are checked. Critical log +/// messages are emitted for each type of error found. +/// +/// The following fields are validated: +/// - LayerThickness : [1e-10, 1000] (from OceanState) +/// - KineticEnergyCell : [0, 10] +/// (from AuxiliaryState::KineticAux) +/// - Temperature tracer : [-10, 50] (from Tracers) +/// - Salinity tracer : [-2, 60] (from Tracers) +/// +/// \param[in] State Ocean state to validate +/// \param[in] AuxState Auxiliary state containing KineticEnergyCell +/// \param[in] VCoord Vertical coordinate containing the CellMask +/// \param[in] TimeLevel Time level index to validate (typically 0 = current) +/// \return I4 total count of errors found across all checked fields (0 = valid) +I4 checkOceanState(const OceanState *State, const AuxiliaryState *AuxState, + const VertCoord *VCoord, I4 TimeLevel); + /// Check ocean state fields for NaN values and out-of-bounds conditions. /// +/// Calls checkOceanState and aborts via MPI_Abort if any errors are found. /// Only active ocean cells (where CellMask > 0) are checked. /// /// The following fields are validated: diff --git a/components/omega/test/ocn/StateValidationTest.cpp b/components/omega/test/ocn/StateValidationTest.cpp index 4125c85c3c53..cbff1cc956c0 100644 --- a/components/omega/test/ocn/StateValidationTest.cpp +++ b/components/omega/test/ocn/StateValidationTest.cpp @@ -4,9 +4,10 @@ /// \file /// \brief Test driver for ocean state validation /// -/// Tests the validateOceanState function by verifying that it passes on valid -/// state data. Checks cover LayerThickness, KineticEnergyCell, Temperature, -/// and Salinity fields. +/// Tests both the positive path (valid state passes) and the negative paths +/// (NaN and out-of-bounds values in each checked field are detected) of +/// validateOceanState / checkOceanState. Checked fields are: +/// - LayerThickness, KineticEnergyCell, Temperature, Salinity. // //===-----------------------------------------------------------------------===/ @@ -34,6 +35,7 @@ #include "mpi.h" #include +#include using namespace OMEGA; @@ -127,7 +129,180 @@ static int fillValidState() { } //------------------------------------------------------------------------------ -// Run state validation tests +// Restore the default state to valid values and recompute the auxiliary state + +static void restoreValidState(OceanState *State, AuxiliaryState *AuxState) { + fillValidState(); + Array3DReal AllTracers = Tracers::getAll(0); + AuxState->computeAll(State, AllTracers, 0); +} + +//------------------------------------------------------------------------------ +// Positive test: validate a clean, valid state — expects 0 errors + +static int testValidState(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + LOG_INFO("StateValidationTest: Testing validation on valid state"); + validateOceanState(State, AuxState, VCoord, 0); + LOG_INFO("StateValidationTest: Valid state validation PASS"); + return 0; +} + +//------------------------------------------------------------------------------ +// Negative tests: inject an invalid value, verify checkOceanState returns > 0, +// then restore valid state. +// +// Returns 0 on success (i.e. the error was caught), 1 otherwise. + +static int expectErrors(const char *TestName, OceanState *State, + AuxiliaryState *AuxState, VertCoord *VCoord) { + I4 Errs = checkOceanState(State, AuxState, VCoord, 0); + if (Errs == 0) { + LOG_ERROR("StateValidationTest: {} - expected errors but got none", + TestName); + return 1; + } + LOG_INFO("StateValidationTest: {} PASS (caught {} error(s))", TestName, + Errs); + return 0; +} + +// --- LayerThickness --- + +static int testNaNLayerThickness(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + restoreValidState(State, AuxState); + const Real NaN = std::numeric_limits::quiet_NaN(); + Array2DReal LT = State->getLayerThickness(0); + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + parallelFor( + "InjectNaNLayerThick", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { LT(I, K) = NaN; }); + return expectErrors("NaN in LayerThickness", State, AuxState, VCoord); +} + +static int testOOBHighLayerThickness(OceanState *State, + AuxiliaryState *AuxState, + VertCoord *VCoord) { + restoreValidState(State, AuxState); + Array2DReal LT = State->getLayerThickness(0); + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + // 2000 m is above the valid max of 1000 m + parallelFor( + "InjectOOBHighLayerThick", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { LT(I, K) = 2000.0; }); + return expectErrors("OOB-high in LayerThickness", State, AuxState, VCoord); +} + +static int testOOBLowLayerThickness(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + restoreValidState(State, AuxState); + Array2DReal LT = State->getLayerThickness(0); + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + // -1.0 m is below the valid min of 1e-10 m + parallelFor( + "InjectOOBLowLayerThick", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { LT(I, K) = -1.0; }); + return expectErrors("OOB-low in LayerThickness", State, AuxState, VCoord); +} + +// --- KineticEnergyCell --- + +static int testNaNKineticEnergy(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + restoreValidState(State, AuxState); + const Real NaN = std::numeric_limits::quiet_NaN(); + Array2DReal KE = AuxState->KineticAux.KineticEnergyCell; + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + parallelFor( + "InjectNaNKE", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { KE(I, K) = NaN; }); + return expectErrors("NaN in KineticEnergyCell", State, AuxState, VCoord); +} + +static int testOOBKineticEnergy(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + restoreValidState(State, AuxState); + Array2DReal KE = AuxState->KineticAux.KineticEnergyCell; + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + // 9999 J/kg is above the valid max of 10 J/kg + parallelFor( + "InjectOOBKE", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { KE(I, K) = 9999.0; }); + return expectErrors("OOB in KineticEnergyCell", State, AuxState, VCoord); +} + +// --- Temperature tracer --- + +static int testNaNTemperature(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + if (Tracers::IndxTemp == -1) + return 0; // tracer not active; skip + restoreValidState(State, AuxState); + const Real NaN = std::numeric_limits::quiet_NaN(); + Array2DReal TempArr = Tracers::getByIndex(0, Tracers::IndxTemp); + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + parallelFor( + "InjectNaNTemp", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { TempArr(I, K) = NaN; }); + return expectErrors("NaN in Temperature", State, AuxState, VCoord); +} + +static int testOOBTemperature(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + if (Tracers::IndxTemp == -1) + return 0; // tracer not active; skip + restoreValidState(State, AuxState); + Array2DReal TempArr = Tracers::getByIndex(0, Tracers::IndxTemp); + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + // 9999 C is above the valid max of 50 C + parallelFor( + "InjectOOBTemp", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { TempArr(I, K) = 9999.0; }); + return expectErrors("OOB in Temperature", State, AuxState, VCoord); +} + +// --- Salinity tracer --- + +static int testNaNSalinity(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + if (Tracers::IndxSalt == -1) + return 0; // tracer not active; skip + restoreValidState(State, AuxState); + const Real NaN = std::numeric_limits::quiet_NaN(); + Array2DReal SaltArr = Tracers::getByIndex(0, Tracers::IndxSalt); + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + parallelFor( + "InjectNaNSalt", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { SaltArr(I, K) = NaN; }); + return expectErrors("NaN in Salinity", State, AuxState, VCoord); +} + +static int testOOBSalinity(OceanState *State, AuxiliaryState *AuxState, + VertCoord *VCoord) { + if (Tracers::IndxSalt == -1) + return 0; // tracer not active; skip + restoreValidState(State, AuxState); + Array2DReal SaltArr = Tracers::getByIndex(0, Tracers::IndxSalt); + const int NCells = State->NCellsAll; + const int NVert = State->NVertLayers; + // 9999 g/kg is above the valid max of 60 g/kg + parallelFor( + "InjectOOBSalt", {NCells, NVert}, + KOKKOS_LAMBDA(int I, int K) { SaltArr(I, K) = 9999.0; }); + return expectErrors("OOB in Salinity", State, AuxState, VCoord); +} + +//------------------------------------------------------------------------------ +// Run all state validation tests int testStateValidation() { int Err = 0; @@ -149,19 +324,36 @@ int testStateValidation() { return Err; } - // Fill state arrays with valid values - Err += fillValidState(); + VertCoord *VCoord = VertCoord::getDefault(); - // Compute auxiliary variables so KineticEnergyCell is populated + // Fill state arrays with valid values and compute auxiliary state + fillValidState(); { Array3DReal AllTracers = Tracers::getAll(0); DefAuxState->computeAll(DefState, AllTracers, 0); } - // Test: validation should pass on valid state (no abort expected) - LOG_INFO("StateValidationTest: Testing validation on valid state"); - validateOceanState(DefState, DefAuxState, VertCoord::getDefault(), 0); - LOG_INFO("StateValidationTest: Valid state validation PASS"); + // ---- Positive test ---- + Err += testValidState(DefState, DefAuxState, VCoord); + + // ---- Negative tests: each injects a bad value and verifies detection ---- + + // LayerThickness + Err += testNaNLayerThickness(DefState, DefAuxState, VCoord); + Err += testOOBHighLayerThickness(DefState, DefAuxState, VCoord); + Err += testOOBLowLayerThickness(DefState, DefAuxState, VCoord); + + // KineticEnergyCell + Err += testNaNKineticEnergy(DefState, DefAuxState, VCoord); + Err += testOOBKineticEnergy(DefState, DefAuxState, VCoord); + + // Temperature tracer + Err += testNaNTemperature(DefState, DefAuxState, VCoord); + Err += testOOBTemperature(DefState, DefAuxState, VCoord); + + // Salinity tracer + Err += testNaNSalinity(DefState, DefAuxState, VCoord); + Err += testOOBSalinity(DefState, DefAuxState, VCoord); AuxiliaryState::clear(); return Err;