From e559c465b63fb54b29348a2927140d9de543d67d Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 25 Aug 2025 14:53:34 +0000
Subject: [PATCH 01/98] Relax cuml constraints

---
 examples/cfd/external_aerodynamics/domino/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cfd/external_aerodynamics/domino/requirements.txt b/examples/cfd/external_aerodynamics/domino/requirements.txt
index cafc1c7a4c..bb81466cdd 100644
--- a/examples/cfd/external_aerodynamics/domino/requirements.txt
+++ b/examples/cfd/external_aerodynamics/domino/requirements.txt
@@ -1,4 +1,4 @@
 torchinfo
 warp-lang
 tensorboard
-cuml-cu12>=25.6.0
+cuml

From e38ecdf0d464de928a033f4aec82b49756f87740 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 25 Aug 2025 14:54:58 +0000
Subject: [PATCH 02/98] Port sdf function to use only torch inputs.  No changes
 to tests yet.

---
 physicsnemo/utils/sdf.py | 155 +++++++++++++++++----------------------
 1 file changed, 66 insertions(+), 89 deletions(-)

diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py
index a095074a96..446b7b5d54 100644
--- a/physicsnemo/utils/sdf.py
+++ b/physicsnemo/utils/sdf.py
@@ -14,8 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import cupy as cp
-import numpy as np
+import torch
 import warp as wp
 
 wp.config.quiet = True
@@ -28,7 +27,6 @@ def _bvh_query_distance(
     max_dist: wp.float32,
     sdf: wp.array(dtype=wp.float32),
     sdf_hit_point: wp.array(dtype=wp.vec3f),
-    sdf_hit_point_id: wp.array(dtype=wp.int32),
     use_sign_winding_number: bool = False,
 ):
     """
@@ -67,22 +65,15 @@ def _bvh_query_distance(
 
     sdf[tid] = res.sign * wp.abs(wp.length(points[tid] - p_closest))
     sdf_hit_point[tid] = p_closest
-    sdf_hit_point_id[tid] = res.face
-
-
-Array = np.ndarray | cp.ndarray
 
 
 def signed_distance_field(
-    mesh_vertices: Array,
-    mesh_indices: Array,
-    input_points: Array,
+    mesh_vertices: torch.Tensor,
+    mesh_indices: torch.Tensor,
+    input_points: torch.Tensor,
     max_dist: float = 1e8,
-    include_hit_points: bool = False,
-    include_hit_points_id: bool = False,
     use_sign_winding_number: bool = False,
-    return_cupy: bool | None = None,
-) -> Array | tuple[Array, ...]:
+):
     """
     Computes the signed distance field (SDF) for a given mesh and input points.
 
@@ -100,11 +91,7 @@ def signed_distance_field(
         max_dist (float, optional): Maximum distance within which
             to search for the closest point on the mesh. Default is 1e8.
         include_hit_points (bool, optional): Whether to include hit points in
-            the output. Here, "hit points" are the points on the mesh that are
-            closest to the input points, and hence, are defining the SDF.
-            Default is False.
-        include_hit_points_id (bool, optional): Whether to include hit point
-            IDs in the output. Default is False.
+            the output. Here,
         use_sign_winding_number (bool, optional): Whether to use sign winding
             number method for SDF. Default is False. If False, your mesh should
             be watertight to obtain correct results.
@@ -115,88 +102,78 @@ def signed_distance_field(
     Returns:
     -------
     Returns:
-        np.ndarray | cp.ndarray or tuple:
-            - If both `include_hit_points` and `include_hit_points_id` are False
-              (default), returns a 1D array of signed distances for each input
-              point.
-            - If `include_hit_points` is True, returns a tuple: (sdf,
-              hit_points), where `hit_points` contains the closest mesh point
-              for each input point.
-            - If `include_hit_points_id` is True, returns a tuple: (sdf,
-              hit_point_ids), where `hit_point_ids` contains the face index of
-              the closest mesh face for each input point.
-            - If both `include_hit_points` and `include_hit_points_id` are True,
-              returns a tuple: (sdf, hit_points, hit_point_ids).
-            - The returned array type (NumPy or CuPy) is determined by the
-            `return_cupy` argument, or inferred from the input arrays.
+        tuple[torch.Tensor, torch.Tensor] of:
+            - signed distance to the mesh, per input point
+            - hith point, per input point. "hit points" are the points on the
+              mesh that are closest to the input points, and hence, are
+              defining the SDF.
 
     Example:
     -------
     >>> mesh_vertices = [(0, 0, 0), (1, 0, 0), (0, 1, 0)]
-    >>> mesh_indices = np.array((0, 1, 2))
-    >>> input_points = [(0.5, 0.5, 0.5)]
+    >>> mesh_indices = torch.tensor((0, 1, 2))
+    >>> input_points = torch.tensor((0.5, 0.5, 0.5))
     >>> signed_distance_field(mesh_vertices, mesh_indices, input_points)
-    array([0.5], dtype=float32)
+    (tensor([0.5]), tensor([0.5, 0.5, 0.5]))
     """
-    if return_cupy is None:
-        return_cupy = any(
-            isinstance(arr, cp.ndarray)
-            for arr in (mesh_vertices, mesh_indices, input_points)
-        )
 
-    wp.init()
+    if input_points.shape[-1] != 3:
+        raise ValueError("Input points must be a tensor with last dimension of size 3")
 
-    if isinstance(mesh_vertices, cp.ndarray):
-        device = mesh_vertices.device
-        wp_device = f"cuda:{device.id}"
-    else:
-        wp_device = wp.get_device()
+    input_shape = input_points.shape
 
-    with wp.ScopedDevice(wp_device):
-        mesh = wp.Mesh(
-            points=wp.array(mesh_vertices, dtype=wp.vec3f, device=wp_device),
-            indices=wp.array(mesh_indices, dtype=wp.int32, device=wp_device),
-        )
+    # Flatten the input points:
+    input_points = input_points.reshape(-1, 3)
 
-        warp_input_points = wp.array(input_points, dtype=wp.vec3f, device=wp_device)
-
-        N = len(warp_input_points)
-
-        sdf = wp.empty(shape=(N,), dtype=wp.float32, device=wp_device)
-        sdf_hit_point = wp.empty(shape=(N,), dtype=wp.vec3f, device=wp_device)
-        sdf_hit_point_id = wp.empty(shape=(N,), dtype=wp.int32, device=wp_device)
-
-        wp.launch(
-            kernel=_bvh_query_distance,
-            dim=N,
-            inputs=[
-                mesh.id,
-                warp_input_points,
-                max_dist,
-                sdf,
-                sdf_hit_point,
-                sdf_hit_point_id,
-                use_sign_winding_number,
-            ],
-            device=wp_device,
-        )
+    N = len(input_points)
+
+    # Allocate output tensors with torch:
+    sdf = torch.zeros(N, dtype=torch.float32, device=input_points.device)
+    sdf_hit_point = torch.zeros(N, 3, dtype=torch.float32, device=input_points.device)
 
-        def convert(array: wp.array) -> np.ndarray | cp.ndarray:
-            """Converts a Warp array to CuPy/NumPy based on the `return_cupy` flag."""
-            if return_cupy:
-                return cp.asarray(array)
-            else:
-                return array.numpy()
+    wp.init()
+
+    # zero copy the vertices, indices, and input points to warp:
+    wp_vertices = wp.from_torch(mesh_vertices, dtype=wp.vec3)
+    wp_indices = wp.from_torch(mesh_indices, dtype=wp.int32)
+    wp_input_points = wp.from_torch(input_points, dtype=wp.vec3)
 
-        arrays_to_return: list[np.ndarray | cp.ndarray] = [convert(sdf)]
+    # Convert output points:
+    wp_sdf = wp.from_torch(sdf, dtype=wp.float32)
+    wp_sdf_hit_point = wp.from_torch(sdf_hit_point, dtype=wp.vec3f)
 
-        if include_hit_points:
-            arrays_to_return.append(convert(sdf_hit_point))
-        if include_hit_points_id:
-            arrays_to_return.append(convert(sdf_hit_point_id))
+    mesh = wp.Mesh(
+        points=wp_vertices,
+        indices=wp_indices,
+        support_winding_number=use_sign_winding_number,
+    )
 
-        return (
-            arrays_to_return[0]
-            if len(arrays_to_return) == 1
-            else tuple(arrays_to_return)
+    if input_points.device.type == "cuda":
+        wp_launch_stream = wp.stream_from_torch(
+            torch.cuda.current_stream(input_points.device)
         )
+        wp_launch_device = None  # We explicitly pass None if using the stream.
+    else:
+        wp_launch_stream = None
+        wp_launch_device = "cpu"  # CPUs have no streams
+
+    wp.launch(
+        kernel=_bvh_query_distance,
+        dim=N,
+        inputs=[
+            mesh.id,
+            wp_input_points,
+            max_dist,
+            wp_sdf,
+            wp_sdf_hit_point,
+            use_sign_winding_number,
+        ],
+        device=wp_launch_device,
+        stream=wp_launch_stream,
+    )
+
+    # Unflatten the output to be like the input:
+    sdf = sdf.reshape(input_shape[:-1] + (1,))
+    sdf_hit_point = sdf_hit_point.reshape(input_shape)
+
+    return sdf, sdf_hit_point

From dd7b3cfa14b53bac69d385b82eb9a70918a76e7a Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 25 Aug 2025 14:58:21 +0000
Subject: [PATCH 03/98] Porting some domino utils function to pure torch
 interface

---
 physicsnemo/utils/domino/utils.py          | 578 ++++++---------------
 physicsnemo/utils/domino/vtk_file_utils.py | 380 ++++++++++++++
 2 files changed, 542 insertions(+), 416 deletions(-)
 create mode 100644 physicsnemo/utils/domino/vtk_file_utils.py

diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index dc4f3ac796..15437dca9e 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -26,15 +26,10 @@
 from typing import Any, Sequence
 
 import numpy as np
-import vtk
+import torch
 from scipy.spatial import KDTree
-from vtk import vtkDataSetTriangleFilter
-from vtk.util import numpy_support
-
-from physicsnemo.utils.profiling import profile
 
 # Type alias for arrays that can be either NumPy or CuPy
-
 try:
     import cupy as cp
 
@@ -69,7 +64,9 @@ def array_type(array: ArrayType) -> "type[np] | type[cp]":
         return np
 
 
-def calculate_center_of_mass(centers: ArrayType, sizes: ArrayType) -> ArrayType:
+def calculate_center_of_mass(
+    centers: torch.Tensor, sizes: torch.Tensor
+) -> torch.Tensor:
     """Calculate the center of mass for a collection of elements.
 
     Computes the volume-weighted centroid of mesh elements, commonly used
@@ -88,24 +85,25 @@ def calculate_center_of_mass(centers: ArrayType, sizes: ArrayType) -> ArrayType:
         ValueError: If centers and sizes have incompatible shapes.
 
     Examples:
-        >>> import numpy as np
-        >>> centers = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]])
-        >>> sizes = np.array([1.0, 2.0, 3.0])
+        >>> import torch
+        >>> centers = torch.tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]])
+        >>> sizes = torch.tensor([1.0, 2.0, 3.0])
         >>> com = calculate_center_of_mass(centers, sizes)
-        >>> np.allclose(com, [[4.0/3.0, 4.0/3.0, 4.0/3.0]])
+        >>> torch.allclose(com, torch.tensor([[4.0/3.0, 4.0/3.0, 4.0/3.0]]))
         True
     """
-    xp = array_type(centers)
 
-    total_weighted_position = xp.einsum("i,ij->j", sizes, centers)
-    total_size = xp.sum(sizes)
+    total_weighted_position = torch.einsum("i,ij->j", sizes, centers)
+    total_size = torch.sum(sizes)
 
     return total_weighted_position[None, ...] / total_size
 
 
 def normalize(
-    field: ArrayType, max_val: ArrayType | None = None, min_val: ArrayType | None = None
-) -> ArrayType:
+    field: torch.Tensor,
+    max_val: torch.Tensor | None = None,
+    min_val: torch.Tensor | None = None,
+) -> torch.Tensor:
     """Normalize field values to the range [-1, 1].
 
     Applies min-max normalization to scale field values to a symmetric range
@@ -126,30 +124,29 @@ def normalize(
         ZeroDivisionError: If max_val equals min_val (zero range).
 
     Examples:
-        >>> import numpy as np
-        >>> field = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+        >>> import torch
+        >>> field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
         >>> normalized = normalize(field, 5.0, 1.0)
-        >>> np.allclose(normalized, [-1.0, -0.5, 0.0, 0.5, 1.0])
+        >>> torch.allclose(normalized, [-1.0, -0.5, 0.0, 0.5, 1.0])
         True
         >>> # Auto-compute min/max
         >>> normalized_auto = normalize(field)
-        >>> np.allclose(normalized_auto, [-1.0, -0.5, 0.0, 0.5, 1.0])
+        >>> torch.allclose(normalized_auto, [-1.0, -0.5, 0.0, 0.5, 1.0])
         True
     """
-    xp = array_type(field)
 
     if max_val is None:
-        max_val = xp.max(field, axis=0, keepdims=True)
+        max_val = field.max(axis=0, keepdim=True)
     if min_val is None:
-        min_val = xp.min(field, axis=0, keepdims=True)
+        min_val = field.min(axis=0, keepdim=True)
 
     field_range = max_val - min_val
     return 2.0 * (field - min_val) / field_range - 1.0
 
 
 def unnormalize(
-    normalized_field: ArrayType, max_val: ArrayType, min_val: ArrayType
-) -> ArrayType:
+    normalized_field: torch.Tensor, max_val: torch.Tensor, min_val: torch.Tensor
+) -> torch.Tensor:
     """Reverse the normalization process to recover original field values.
 
     Transforms normalized values from the range [-1, 1] back to their original
@@ -164,10 +161,10 @@ def unnormalize(
         Field values restored to their original physical range.
 
     Examples:
-        >>> import numpy as np
-        >>> normalized = np.array([-1.0, -0.5, 0.0, 0.5, 1.0])
+        >>> import torch
+        >>> normalized = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0])
         >>> original = unnormalize(normalized, 5.0, 1.0)
-        >>> np.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0])
+        >>> torch.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0])
         True
     """
     field_range = max_val - min_val
@@ -175,8 +172,10 @@ def unnormalize(
 
 
 def standardize(
-    field: ArrayType, mean: ArrayType | None = None, std: ArrayType | None = None
-) -> ArrayType:
+    field: torch.Tensor,
+    mean: torch.Tensor | None = None,
+    std: torch.Tensor | None = None,
+) -> torch.Tensor:
     """Standardize field values to have zero mean and unit variance.
 
     Applies z-score normalization to center the data around zero with
@@ -195,31 +194,30 @@ def standardize(
         ZeroDivisionError: If std contains zeros.
 
     Examples:
-        >>> import numpy as np
-        >>> field = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-        >>> standardized = standardize(field, 3.0, np.sqrt(2.5))
-        >>> np.allclose(standardized, [-1.265, -0.632, 0.0, 0.632, 1.265], atol=1e-3)
+        >>> import torch
+        >>> field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
+        >>> standardized = standardize(field, 3.0, torch.sqrt(2.5))
+        >>> torch.allclose(standardized, [-1.265, -0.632, 0.0, 0.632, 1.265], atol=1e-3)
         True
         >>> # Auto-compute mean/std
         >>> standardized_auto = standardize(field)
-        >>> np.allclose(np.mean(standardized_auto), 0.0)
+        >>> torch.allclose(torch.mean(standardized_auto), 0.0)
         True
-        >>> np.allclose(np.std(standardized_auto, ddof=0), 1.0)
+        >>> torch.allclose(torch.std(standardized_auto, ddof=0), 1.0)
         True
     """
-    xp = array_type(field)
 
     if mean is None:
-        mean = xp.mean(field, axis=0, keepdims=True)
+        mean = field.mean(axis=0, keepdim=True)
     if std is None:
-        std = xp.std(field, axis=0, keepdims=True)
+        std = field.std(axis=0, keepdim=True)
 
     return (field - mean) / std
 
 
 def unstandardize(
-    standardized_field: ArrayType, mean: ArrayType, std: ArrayType
-) -> ArrayType:
+    standardized_field: torch.Tensor, mean: torch.Tensor, std: torch.Tensor
+) -> torch.Tensor:
     """Reverse the standardization process to recover original field values.
 
     Transforms standardized values (zero mean, unit variance) back to their
@@ -234,365 +232,15 @@ def unstandardize(
         Field values restored to their original distribution.
 
     Examples:
-        >>> import numpy as np
-        >>> standardized = np.array([-1.265, -0.632, 0.0, 0.632, 1.265])
-        >>> original = unstandardize(standardized, 3.0, np.sqrt(2.5))
-        >>> np.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0], atol=1e-3)
+        >>> import torch
+        >>> standardized = torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265])
+        >>> original = unstandardize(standardized, 3.0, torch.sqrt(2.5))
+        >>> torch.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0], atol=1e-3)
         True
     """
     return standardized_field * std + mean
 
 
-def write_to_vtp(polydata: "vtk.vtkPolyData", filename: str) -> None:
-    """Write VTK polydata to a VTP (VTK PolyData) file format.
-
-    VTP files are XML-based and store polygonal data including points, polygons,
-    and associated field data. This format is commonly used for surface meshes
-    in computational fluid dynamics visualization.
-
-    Args:
-        polydata: VTK polydata object containing mesh geometry and fields.
-        filename: Output filename with .vtp extension. Directory will be created
-            if it doesn't exist.
-
-    Raises:
-        RuntimeError: If writing fails due to file permissions or disk space.
-
-    """
-    # Ensure output directory exists
-    output_path = Path(filename)
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-
-    writer = vtk.vtkXMLPolyDataWriter()
-    writer.SetFileName(str(output_path))
-    writer.SetInputData(polydata)
-
-    if not writer.Write():
-        raise RuntimeError(f"Failed to write polydata to {output_path}")
-
-
-def write_to_vtu(unstructured_grid: "vtk.vtkUnstructuredGrid", filename: str) -> None:
-    """Write VTK unstructured grid to a VTU (VTK Unstructured Grid) file format.
-
-    VTU files store 3D volumetric meshes with arbitrary cell types including
-    tetrahedra, hexahedra, and pyramids. This format is essential for storing
-    finite element analysis results.
-
-    Args:
-        unstructured_grid: VTK unstructured grid object containing volumetric mesh
-            geometry and field data.
-        filename: Output filename with .vtu extension. Directory will be created
-            if it doesn't exist.
-
-    Raises:
-        RuntimeError: If writing fails due to file permissions or disk space.
-
-    """
-    # Ensure output directory exists
-    output_path = Path(filename)
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-
-    writer = vtk.vtkXMLUnstructuredGridWriter()
-    writer.SetFileName(str(output_path))
-    writer.SetInputData(unstructured_grid)
-
-    if not writer.Write():
-        raise RuntimeError(f"Failed to write unstructured grid to {output_path}")
-
-
-def extract_surface_triangles(tetrahedral_mesh: "vtk.vtkUnstructuredGrid") -> list[int]:
-    """Extract surface triangle indices from a tetrahedral mesh.
-
-    This function identifies the boundary faces of a 3D tetrahedral mesh and
-    returns the vertex indices that form triangular faces on the surface.
-    This is essential for visualization and boundary condition application.
-
-    Args:
-        tetrahedral_mesh: VTK unstructured grid containing tetrahedral elements.
-
-    Returns:
-        List of vertex indices forming surface triangles. Every three consecutive
-        indices define one triangle.
-
-    Raises:
-        NotImplementedError: If the surface contains non-triangular faces.
-
-    """
-    # Extract the surface using VTK filter
-    surface_filter = vtk.vtkDataSetSurfaceFilter()
-    surface_filter.SetInputData(tetrahedral_mesh)
-    surface_filter.Update()
-
-    # Wrap with PyVista for easier manipulation
-    import pyvista as pv
-
-    surface_mesh = pv.wrap(surface_filter.GetOutput())
-    triangle_indices = []
-
-    # Process faces - PyVista stores faces as [n_vertices, v1, v2, ..., vn]
-    faces = surface_mesh.faces.reshape((-1, 4))
-    for face in faces:
-        if face[0] == 3:  # Triangle (3 vertices)
-            triangle_indices.extend([face[1], face[2], face[3]])
-        else:
-            raise NotImplementedError(
-                f"Non-triangular face found with {face[0]} vertices"
-            )
-
-    return triangle_indices
-
-
-def convert_to_tet_mesh(polydata: "vtk.vtkPolyData") -> "vtk.vtkUnstructuredGrid":
-    """Convert surface polydata to a tetrahedral volumetric mesh.
-
-    This function performs tetrahedralization of a surface mesh, creating
-    a 3D volumetric mesh suitable for finite element analysis. The process
-    fills the interior of the surface with tetrahedral elements.
-
-    Args:
-        polydata: VTK polydata representing a closed surface mesh.
-
-    Returns:
-        VTK unstructured grid containing tetrahedral elements filling the
-        volume enclosed by the input surface.
-
-    Raises:
-        RuntimeError: If tetrahedralization fails (e.g., non-manifold surface).
-
-    """
-    tetrahedral_filter = vtkDataSetTriangleFilter()
-    tetrahedral_filter.SetInputData(polydata)
-    tetrahedral_filter.Update()
-
-    tetrahedral_mesh = tetrahedral_filter.GetOutput()
-    return tetrahedral_mesh
-
-
-def convert_point_data_to_cell_data(input_data: "vtk.vtkDataSet") -> "vtk.vtkDataSet":
-    """Convert point-based field data to cell-based field data.
-
-    This function transforms field variables defined at mesh vertices (nodes)
-    to values defined at cell centers. This conversion is often needed when
-    switching between different numerical methods or visualization requirements.
-
-    Args:
-        input_data: VTK dataset with point data to be converted.
-
-    Returns:
-        VTK dataset with the same geometry but field data moved from points to cells.
-        Values are typically averaged from the surrounding points.
-
-    """
-    point_to_cell_filter = vtk.vtkPointDataToCellData()
-    point_to_cell_filter.SetInputData(input_data)
-    point_to_cell_filter.Update()
-
-    return point_to_cell_filter.GetOutput()
-
-
-def get_node_to_elem(polydata: "vtk.vtkDataSet") -> "vtk.vtkDataSet":
-    """Convert point data to cell data for VTK dataset.
-
-    This function transforms field variables defined at mesh vertices to
-    values defined at cell centers using VTK's built-in conversion filter.
-
-    Args:
-        polydata: VTK dataset with point data to be converted.
-
-    Returns:
-        VTK dataset with field data moved from points to cells.
-
-    """
-    point_to_cell_filter = vtk.vtkPointDataToCellData()
-    point_to_cell_filter.SetInputData(polydata)
-    point_to_cell_filter.Update()
-    cell_data = point_to_cell_filter.GetOutput()
-    return cell_data
-
-
-def get_fields_from_cell(
-    cell_data: "vtk.vtkCellData", variable_names: list[str]
-) -> np.ndarray:
-    """Extract field variables from VTK cell data.
-
-    This function extracts multiple field variables from VTK cell data and
-    organizes them into a structured NumPy array. Each variable becomes a
-    column in the output array.
-
-    Args:
-        cell_data: VTK cell data object containing field variables.
-        variable_names: List of variable names to extract from the cell data.
-
-    Returns:
-        NumPy array of shape (n_cells, n_variables) containing the extracted
-        field data. Variables are ordered according to the input list.
-
-    Raises:
-        ValueError: If a requested variable name is not found in the cell data.
-
-    """
-    extracted_fields = []
-    for variable_name in variable_names:
-        variable_array = cell_data.GetArray(variable_name)
-        if variable_array is None:
-            raise ValueError(f"Variable '{variable_name}' not found in cell data")
-
-        num_tuples = variable_array.GetNumberOfTuples()
-        field_values = []
-        for tuple_idx in range(num_tuples):
-            variable_value = np.array(variable_array.GetTuple(tuple_idx))
-            field_values.append(variable_value)
-        field_values = np.asarray(field_values)
-        extracted_fields.append(field_values)
-
-    # Transpose to get shape (n_cells, n_variables)
-    extracted_fields = np.transpose(np.asarray(extracted_fields), (1, 0))
-    return extracted_fields
-
-
-def get_fields(
-    data_attributes: "vtk.vtkDataSetAttributes", variable_names: list[str]
-) -> list[np.ndarray]:
-    """Extract multiple field variables from VTK data attributes.
-
-    This function extracts field variables from VTK data attributes (either
-    point data or cell data) and returns them as a list of NumPy arrays.
-    It handles both point and cell data seamlessly.
-
-    Args:
-        data_attributes: VTK data attributes object (point data or cell data).
-        variable_names: List of variable names to extract.
-
-    Returns:
-        List of NumPy arrays, one for each requested variable. Each array
-        has shape (n_points/n_cells, n_components) where n_components
-        depends on the variable (1 for scalars, 3 for vectors, etc.).
-
-    Raises:
-        ValueError: If a requested variable is not found in the data attributes.
-
-    """
-    extracted_fields = []
-    for variable_name in variable_names:
-        try:
-            vtk_array = data_attributes.GetArray(variable_name)
-        except ValueError as e:
-            raise ValueError(
-                f"Failed to get array '{variable_name}' from the data attributes: {e}"
-            )
-
-        # Convert VTK array to NumPy array with proper shape
-        numpy_array = numpy_support.vtk_to_numpy(vtk_array).reshape(
-            vtk_array.GetNumberOfTuples(), vtk_array.GetNumberOfComponents()
-        )
-        extracted_fields.append(numpy_array)
-
-    return extracted_fields
-
-
-def get_vertices(polydata: "vtk.vtkPolyData") -> np.ndarray:
-    """Extract vertex coordinates from VTK polydata object.
-
-    This function converts VTK polydata to a NumPy array containing the 3D
-    coordinates of all vertices in the mesh.
-
-    Args:
-        polydata: VTK polydata object containing mesh geometry.
-
-    Returns:
-        NumPy array of shape (n_points, 3) containing [x, y, z] coordinates
-        for each vertex.
-
-    """
-    vtk_points = polydata.GetPoints()
-    vertices = numpy_support.vtk_to_numpy(vtk_points.GetData())
-    return vertices
-
-
-def get_volume_data(
-    polydata: "vtk.vtkPolyData", variable_names: list[str]
-) -> tuple[np.ndarray, list[np.ndarray]]:
-    """Extract vertices and field data from 3D volumetric mesh.
-
-    This function extracts both geometric information (vertex coordinates)
-    and field data from a 3D volumetric mesh. It's commonly used for
-    processing finite element analysis results.
-
-    Args:
-        polydata: VTK polydata representing a 3D volumetric mesh.
-        variable_names: List of field variable names to extract.
-
-    Returns:
-        Tuple containing:
-        - Vertex coordinates as NumPy array of shape (n_vertices, 3)
-        - List of field arrays, one per variable
-
-    """
-    vertices = get_vertices(polydata)
-    point_data = polydata.GetPointData()
-    fields = get_fields(point_data, variable_names)
-
-    return vertices, fields
-
-
-def get_surface_data(
-    polydata: "vtk.vtkPolyData", variable_names: list[str]
-) -> tuple[np.ndarray, list[np.ndarray], list[tuple[int, int]]]:
-    """Extract surface mesh data including vertices, fields, and edge connectivity.
-
-    This function extracts comprehensive surface mesh information including
-    vertex coordinates, field data at vertices, and edge connectivity information.
-    It's commonly used for processing CFD surface results and boundary conditions.
-
-    Args:
-        polydata: VTK polydata representing a surface mesh.
-        variable_names: List of field variable names to extract from the mesh.
-
-    Returns:
-        Tuple containing:
-        - Vertex coordinates as NumPy array of shape (n_vertices, 3)
-        - List of field arrays, one per variable
-        - List of edge tuples representing mesh connectivity
-
-    Raises:
-        ValueError: If a requested variable is not found or polygon data is missing.
-
-    """
-    points = polydata.GetPoints()
-    vertices = np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())])
-
-    point_data = polydata.GetPointData()
-    fields = []
-    for array_name in variable_names:
-        try:
-            array = point_data.GetArray(array_name)
-        except ValueError:
-            raise ValueError(
-                f"Failed to get array {array_name} from the unstructured grid."
-            )
-        array_data = np.zeros(
-            (points.GetNumberOfPoints(), array.GetNumberOfComponents())
-        )
-        for j in range(points.GetNumberOfPoints()):
-            array.GetTuple(j, array_data[j])
-        fields.append(array_data)
-
-    polys = polydata.GetPolys()
-    if polys is None:
-        raise ValueError("Failed to get polygons from the polydata.")
-    polys.InitTraversal()
-    edges = []
-    id_list = vtk.vtkIdList()
-    for _ in range(polys.GetNumberOfCells()):
-        polys.GetNextCell(id_list)
-        num_ids = id_list.GetNumberOfIds()
-        edges = [
-            (id_list.GetId(j), id_list.GetId((j + 1) % num_ids)) for j in range(num_ids)
-        ]
-
-    return vertices, fields, edges
-
-
 def calculate_normal_positional_encoding(
     coordinates_a: ArrayType,
     coordinates_b: ArrayType | None = None,
@@ -769,21 +417,28 @@ def pad_inp(arr: ArrayType, n_points: int, pad_value: float = 0.0) -> ArrayType:
     return arr_padded
 
 
-@profile
 def shuffle_array(
-    arr: ArrayType,
+    points: torch.Tensor,
     n_points: int,
-) -> tuple[ArrayType, ArrayType]:
-    """Randomly sample points from array without replacement.
+    weights: torch.Tensor = None,
+):
+    """
+    Randomly sample points from array without replacement.
 
     This function performs random sampling from the input array, selecting
     n_points points without replacement. It's commonly used for creating training
     subsets and data augmentation in machine learning workflows.
 
+    Optionally, you can provide weights to use in the sampling.
+
+    Note: the implementation with torch.multinomial is constrained to 2^24 points.
+    If the input is larger than that, it will be split and sampled from each chunk.
+
     Args:
         arr: Input array to sample from, shape (n_points, ...).
         n_points: Number of points to sample. If greater than arr.shape[0],
             all points are returned.
+        weights: Optional weights for sampling. If None, uniform weights are used.
 
     Returns:
         Tuple containing:
@@ -791,9 +446,9 @@ def shuffle_array(
         - Indices of the selected points
 
     Examples:
-        >>> import numpy as np
-        >>> np.random.seed(42)  # For reproducible results
-        >>> data = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
+        >>> import torch
+        >>> torch.manual_seed(42)  # For reproducible results
+        >>> data = torch.tensor([[1, 2], [3, 4], [5, 6], [7, 8]])
         >>> subset, indices = shuffle_array(data, 2)
         >>> subset.shape
         (2, 2)
@@ -802,15 +457,109 @@ def shuffle_array(
         >>> len(np.unique(indices)) == 2  # No duplicates
         True
     """
-    xp = array_type(arr)
-    if n_points > arr.shape[0]:
-        # If asking too many points, truncate the ask but still shuffle.
-        n_points = arr.shape[0]
-    idx = xp.random.choice(arr.shape[0], size=n_points, replace=False)
-    return arr[idx], idx
 
+    N_input_points = points.shape[0]
+
+    if N_input_points < n_points:
+        return points, torch.arange(N_input_points)
+
+    # If there are no weights, use uniform weights:
+    if weights is None:
+        weights = torch.ones(N_input_points, device=points.device)
+
+    # Using torch multinomial for this.
+    # Multinomial can't work with more than 2^24 input points.
+
+    # So apply chunking and stich back together in that case.
+    # Assume each chunk gets a number proportional to it's size,
+    # (but make sure they add up to n_points!)
+
+    max_chunk_size = 2**24
+
+    N_chunks = (N_input_points // max_chunk_size) + 1
 
-def shuffle_array_without_sampling(arr: ArrayType) -> tuple[ArrayType, ArrayType]:
+    # Divide the weights into these chunks
+    chunk_weights = torch.chunk(weights, N_chunks)
+
+    # Determine how mant points to compute per chunk:
+    points_per_chunk = [
+        round(n_points * c.shape[0] / N_input_points) for c in chunk_weights
+    ]
+    print(f"points_per_chunk: {points_per_chunk}")
+
+    gap = n_points - sum(points_per_chunk)
+    print(f"gap: {gap}")
+
+    if gap > 0:
+        for g in range(gap):
+            points_per_chunk[g] += 1
+    elif gap < 0:
+        for g in range(gap):
+            points_per_chunk[g] -= 1
+
+    # Create a list of indexes per chunk:
+    idx_chunks = [
+        torch.multinomial(
+            w,
+            p,
+            replacement=False,
+        )
+        for w, p in zip(chunk_weights, points_per_chunk)
+    ]
+
+    # Stich the chunks back together:
+    idx = torch.cat(idx_chunks)
+
+    # Apply the selection:
+    points_selected = points[idx]
+
+    return points_selected, idx
+
+
+# @profile
+# def shuffle_array(
+#     arr: ArrayType,
+#     n_points: int,
+# ) -> tuple[ArrayType, ArrayType]:
+#     """Randomly sample points from array without replacement.
+
+#     This function performs random sampling from the input array, selecting
+#     n_points points without replacement. It's commonly used for creating training
+#     subsets and data augmentation in machine learning workflows.
+
+#     Args:
+#         arr: Input array to sample from, shape (n_points, ...).
+#         n_points: Number of points to sample. If greater than arr.shape[0],
+#             all points are returned.
+
+#     Returns:
+#         Tuple containing:
+#         - Sampled array subset
+#         - Indices of the selected points
+
+#     Examples:
+#         >>> import numpy as np
+#         >>> np.random.seed(42)  # For reproducible results
+#         >>> data = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
+#         >>> subset, indices = shuffle_array(data, 2)
+#         >>> subset.shape
+#         (2, 2)
+#         >>> indices.shape
+#         (2,)
+#         >>> len(np.unique(indices)) == 2  # No duplicates
+#         True
+#     """
+#     xp = array_type(arr)
+#     if n_points > arr.shape[0]:
+#         # If asking too many points, truncate the ask but still shuffle.
+#         n_points = arr.shape[0]
+#     idx = xp.random.choice(arr.shape[0], size=n_points, replace=False)
+#     return arr[idx], idx
+
+
+def shuffle_array_without_sampling(
+    arr: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor]:
     """Shuffle array order without changing the number of elements.
 
     This function reorders all elements in the array randomly while preserving
@@ -826,9 +575,9 @@ def shuffle_array_without_sampling(arr: ArrayType) -> tuple[ArrayType, ArrayType
         - Permutation indices used for shuffling
 
     Examples:
-        >>> import numpy as np
-        >>> np.random.seed(42)  # For reproducible results
-        >>> data = np.array([[1], [2], [3], [4]])
+        >>> import torch
+        >>> torch.manual_seed(42)  # For reproducible results
+        >>> data = torch.tensor([[1], [2], [3], [4]])
         >>> shuffled, indices = shuffle_array_without_sampling(data)
         >>> shuffled.shape
         (4, 1)
@@ -837,9 +586,7 @@ def shuffle_array_without_sampling(arr: ArrayType) -> tuple[ArrayType, ArrayType
         >>> set(indices) == set(range(4))  # All original indices present
         True
     """
-    xp = array_type(arr)
-    idx = xp.arange(arr.shape[0])
-    xp.random.shuffle(idx)
+    idx = torch.randperm(arr.shape[0])
     return arr[idx], idx
 
 
@@ -1004,7 +751,6 @@ def create_grid(
     zv = xp.expand_dims(zv, -1)
     grid = xp.concatenate((xv, yv, zv), axis=-1)
     grid = xp.transpose(grid, (1, 0, 2, 3))
-
     return grid
 
 
diff --git a/physicsnemo/utils/domino/vtk_file_utils.py b/physicsnemo/utils/domino/vtk_file_utils.py
new file mode 100644
index 0000000000..cdde402f8c
--- /dev/null
+++ b/physicsnemo/utils/domino/vtk_file_utils.py
@@ -0,0 +1,380 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Utilities for data processing and training with the DoMINO model architecture.
+
+This module provides essential utilities for computational fluid dynamics data processing,
+mesh manipulation, field normalization, and geometric computations. It supports both
+CPU (NumPy) and GPU (CuPy) operations with automatic fallbacks.
+"""
+
+from pathlib import Path
+
+import numpy as np
+import vtk
+from vtk import vtkDataSetTriangleFilter
+from vtk.util import numpy_support
+
+
+def write_to_vtp(polydata: "vtk.vtkPolyData", filename: str) -> None:
+    """Write VTK polydata to a VTP (VTK PolyData) file format.
+
+    VTP files are XML-based and store polygonal data including points, polygons,
+    and associated field data. This format is commonly used for surface meshes
+    in computational fluid dynamics visualization.
+
+    Args:
+        polydata: VTK polydata object containing mesh geometry and fields.
+        filename: Output filename with .vtp extension. Directory will be created
+            if it doesn't exist.
+
+    Raises:
+        RuntimeError: If writing fails due to file permissions or disk space.
+
+    """
+    # Ensure output directory exists
+    output_path = Path(filename)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    writer = vtk.vtkXMLPolyDataWriter()
+    writer.SetFileName(str(output_path))
+    writer.SetInputData(polydata)
+
+    if not writer.Write():
+        raise RuntimeError(f"Failed to write polydata to {output_path}")
+
+
+def write_to_vtu(unstructured_grid: "vtk.vtkUnstructuredGrid", filename: str) -> None:
+    """Write VTK unstructured grid to a VTU (VTK Unstructured Grid) file format.
+
+    VTU files store 3D volumetric meshes with arbitrary cell types including
+    tetrahedra, hexahedra, and pyramids. This format is essential for storing
+    finite element analysis results.
+
+    Args:
+        unstructured_grid: VTK unstructured grid object containing volumetric mesh
+            geometry and field data.
+        filename: Output filename with .vtu extension. Directory will be created
+            if it doesn't exist.
+
+    Raises:
+        RuntimeError: If writing fails due to file permissions or disk space.
+
+    """
+    # Ensure output directory exists
+    output_path = Path(filename)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    writer = vtk.vtkXMLUnstructuredGridWriter()
+    writer.SetFileName(str(output_path))
+    writer.SetInputData(unstructured_grid)
+
+    if not writer.Write():
+        raise RuntimeError(f"Failed to write unstructured grid to {output_path}")
+
+
+def extract_surface_triangles(tetrahedral_mesh: "vtk.vtkUnstructuredGrid") -> list[int]:
+    """Extract surface triangle indices from a tetrahedral mesh.
+
+    This function identifies the boundary faces of a 3D tetrahedral mesh and
+    returns the vertex indices that form triangular faces on the surface.
+    This is essential for visualization and boundary condition application.
+
+    Args:
+        tetrahedral_mesh: VTK unstructured grid containing tetrahedral elements.
+
+    Returns:
+        List of vertex indices forming surface triangles. Every three consecutive
+        indices define one triangle.
+
+    Raises:
+        NotImplementedError: If the surface contains non-triangular faces.
+
+    """
+    # Extract the surface using VTK filter
+    surface_filter = vtk.vtkDataSetSurfaceFilter()
+    surface_filter.SetInputData(tetrahedral_mesh)
+    surface_filter.Update()
+
+    # Wrap with PyVista for easier manipulation
+    import pyvista as pv
+
+    surface_mesh = pv.wrap(surface_filter.GetOutput())
+    triangle_indices = []
+
+    # Process faces - PyVista stores faces as [n_vertices, v1, v2, ..., vn]
+    faces = surface_mesh.faces.reshape((-1, 4))
+    for face in faces:
+        if face[0] == 3:  # Triangle (3 vertices)
+            triangle_indices.extend([face[1], face[2], face[3]])
+        else:
+            raise NotImplementedError(
+                f"Non-triangular face found with {face[0]} vertices"
+            )
+
+    return triangle_indices
+
+
+def convert_to_tet_mesh(polydata: "vtk.vtkPolyData") -> "vtk.vtkUnstructuredGrid":
+    """Convert surface polydata to a tetrahedral volumetric mesh.
+
+    This function performs tetrahedralization of a surface mesh, creating
+    a 3D volumetric mesh suitable for finite element analysis. The process
+    fills the interior of the surface with tetrahedral elements.
+
+    Args:
+        polydata: VTK polydata representing a closed surface mesh.
+
+    Returns:
+        VTK unstructured grid containing tetrahedral elements filling the
+        volume enclosed by the input surface.
+
+    Raises:
+        RuntimeError: If tetrahedralization fails (e.g., non-manifold surface).
+
+    """
+    tetrahedral_filter = vtkDataSetTriangleFilter()
+    tetrahedral_filter.SetInputData(polydata)
+    tetrahedral_filter.Update()
+
+    tetrahedral_mesh = tetrahedral_filter.GetOutput()
+    return tetrahedral_mesh
+
+
+def convert_point_data_to_cell_data(input_data: "vtk.vtkDataSet") -> "vtk.vtkDataSet":
+    """Convert point-based field data to cell-based field data.
+
+    This function transforms field variables defined at mesh vertices (nodes)
+    to values defined at cell centers. This conversion is often needed when
+    switching between different numerical methods or visualization requirements.
+
+    Args:
+        input_data: VTK dataset with point data to be converted.
+
+    Returns:
+        VTK dataset with the same geometry but field data moved from points to cells.
+        Values are typically averaged from the surrounding points.
+
+    """
+    point_to_cell_filter = vtk.vtkPointDataToCellData()
+    point_to_cell_filter.SetInputData(input_data)
+    point_to_cell_filter.Update()
+
+    return point_to_cell_filter.GetOutput()
+
+
+def get_node_to_elem(polydata: "vtk.vtkDataSet") -> "vtk.vtkDataSet":
+    """Convert point data to cell data for VTK dataset.
+
+    This function transforms field variables defined at mesh vertices to
+    values defined at cell centers using VTK's built-in conversion filter.
+
+    Args:
+        polydata: VTK dataset with point data to be converted.
+
+    Returns:
+        VTK dataset with field data moved from points to cells.
+
+    """
+    point_to_cell_filter = vtk.vtkPointDataToCellData()
+    point_to_cell_filter.SetInputData(polydata)
+    point_to_cell_filter.Update()
+    cell_data = point_to_cell_filter.GetOutput()
+    return cell_data
+
+
+def get_fields_from_cell(
+    cell_data: "vtk.vtkCellData", variable_names: list[str]
+) -> np.ndarray:
+    """Extract field variables from VTK cell data.
+
+    This function extracts multiple field variables from VTK cell data and
+    organizes them into a structured NumPy array. Each variable becomes a
+    column in the output array.
+
+    Args:
+        cell_data: VTK cell data object containing field variables.
+        variable_names: List of variable names to extract from the cell data.
+
+    Returns:
+        NumPy array of shape (n_cells, n_variables) containing the extracted
+        field data. Variables are ordered according to the input list.
+
+    Raises:
+        ValueError: If a requested variable name is not found in the cell data.
+
+    """
+    extracted_fields = []
+    for variable_name in variable_names:
+        variable_array = cell_data.GetArray(variable_name)
+        if variable_array is None:
+            raise ValueError(f"Variable '{variable_name}' not found in cell data")
+
+        num_tuples = variable_array.GetNumberOfTuples()
+        field_values = []
+        for tuple_idx in range(num_tuples):
+            variable_value = np.array(variable_array.GetTuple(tuple_idx))
+            field_values.append(variable_value)
+        field_values = np.asarray(field_values)
+        extracted_fields.append(field_values)
+
+    # Transpose to get shape (n_cells, n_variables)
+    extracted_fields = np.transpose(np.asarray(extracted_fields), (1, 0))
+    return extracted_fields
+
+
+def get_fields(
+    data_attributes: "vtk.vtkDataSetAttributes", variable_names: list[str]
+) -> list[np.ndarray]:
+    """Extract multiple field variables from VTK data attributes.
+
+    This function extracts field variables from VTK data attributes (either
+    point data or cell data) and returns them as a list of NumPy arrays.
+    It handles both point and cell data seamlessly.
+
+    Args:
+        data_attributes: VTK data attributes object (point data or cell data).
+        variable_names: List of variable names to extract.
+
+    Returns:
+        List of NumPy arrays, one for each requested variable. Each array
+        has shape (n_points/n_cells, n_components) where n_components
+        depends on the variable (1 for scalars, 3 for vectors, etc.).
+
+    Raises:
+        ValueError: If a requested variable is not found in the data attributes.
+
+    """
+    extracted_fields = []
+    for variable_name in variable_names:
+        try:
+            vtk_array = data_attributes.GetArray(variable_name)
+        except ValueError as e:
+            raise ValueError(
+                f"Failed to get array '{variable_name}' from the data attributes: {e}"
+            )
+
+        # Convert VTK array to NumPy array with proper shape
+        numpy_array = numpy_support.vtk_to_numpy(vtk_array).reshape(
+            vtk_array.GetNumberOfTuples(), vtk_array.GetNumberOfComponents()
+        )
+        extracted_fields.append(numpy_array)
+
+    return extracted_fields
+
+
+def get_vertices(polydata: "vtk.vtkPolyData") -> np.ndarray:
+    """Extract vertex coordinates from VTK polydata object.
+
+    This function converts VTK polydata to a NumPy array containing the 3D
+    coordinates of all vertices in the mesh.
+
+    Args:
+        polydata: VTK polydata object containing mesh geometry.
+
+    Returns:
+        NumPy array of shape (n_points, 3) containing [x, y, z] coordinates
+        for each vertex.
+
+    """
+    vtk_points = polydata.GetPoints()
+    vertices = numpy_support.vtk_to_numpy(vtk_points.GetData())
+    return vertices
+
+
+def get_volume_data(
+    polydata: "vtk.vtkPolyData", variable_names: list[str]
+) -> tuple[np.ndarray, list[np.ndarray]]:
+    """Extract vertices and field data from 3D volumetric mesh.
+
+    This function extracts both geometric information (vertex coordinates)
+    and field data from a 3D volumetric mesh. It's commonly used for
+    processing finite element analysis results.
+
+    Args:
+        polydata: VTK polydata representing a 3D volumetric mesh.
+        variable_names: List of field variable names to extract.
+
+    Returns:
+        Tuple containing:
+        - Vertex coordinates as NumPy array of shape (n_vertices, 3)
+        - List of field arrays, one per variable
+
+    """
+    vertices = get_vertices(polydata)
+    point_data = polydata.GetPointData()
+    fields = get_fields(point_data, variable_names)
+
+    return vertices, fields
+
+
+def get_surface_data(
+    polydata: "vtk.vtkPolyData", variable_names: list[str]
+) -> tuple[np.ndarray, list[np.ndarray], list[tuple[int, int]]]:
+    """Extract surface mesh data including vertices, fields, and edge connectivity.
+
+    This function extracts comprehensive surface mesh information including
+    vertex coordinates, field data at vertices, and edge connectivity information.
+    It's commonly used for processing CFD surface results and boundary conditions.
+
+    Args:
+        polydata: VTK polydata representing a surface mesh.
+        variable_names: List of field variable names to extract from the mesh.
+
+    Returns:
+        Tuple containing:
+        - Vertex coordinates as NumPy array of shape (n_vertices, 3)
+        - List of field arrays, one per variable
+        - List of edge tuples representing mesh connectivity
+
+    Raises:
+        ValueError: If a requested variable is not found or polygon data is missing.
+
+    """
+    points = polydata.GetPoints()
+    vertices = np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())])
+
+    point_data = polydata.GetPointData()
+    fields = []
+    for array_name in variable_names:
+        try:
+            array = point_data.GetArray(array_name)
+        except ValueError:
+            raise ValueError(
+                f"Failed to get array {array_name} from the unstructured grid."
+            )
+        array_data = np.zeros(
+            (points.GetNumberOfPoints(), array.GetNumberOfComponents())
+        )
+        for j in range(points.GetNumberOfPoints()):
+            array.GetTuple(j, array_data[j])
+        fields.append(array_data)
+
+    polys = polydata.GetPolys()
+    if polys is None:
+        raise ValueError("Failed to get polygons from the polydata.")
+    polys.InitTraversal()
+    edges = []
+    id_list = vtk.vtkIdList()
+    for _ in range(polys.GetNumberOfCells()):
+        polys.GetNextCell(id_list)
+        num_ids = id_list.GetNumberOfIds()
+        edges = [
+            (id_list.GetId(j), id_list.GetId((j + 1) % num_ids)) for j in range(num_ids)
+        ]
+
+    return vertices, fields, edges

From 8590afd3de9117268e56fcc1928a193fddf2b2ef Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 25 Aug 2025 16:18:27 +0000
Subject: [PATCH 04/98] Add new dataset to read DrivaerML like data in various
 formats.

Separate the dataloading from the data processing in DoMINO datapipe.
---
 .../domino/src/benchmark_dataloader.py        |  250 ++++
 physicsnemo/datapipes/cae/domino_datapipe2.py | 1174 +++++++++++++++++
 .../datapipes/cae/drivaer_ml_datapipe.py      |  888 +++++++++++++
 physicsnemo/utils/domino/utils.py             |   51 +-
 4 files changed, 2337 insertions(+), 26 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
 create mode 100644 physicsnemo/datapipes/cae/domino_datapipe2.py
 create mode 100644 physicsnemo/datapipes/cae/drivaer_ml_datapipe.py

diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
new file mode 100644
index 0000000000..95b39cedd3
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -0,0 +1,250 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code defines a distributed pipeline for training the DoMINO model on
+CFD datasets. It includes the computation of scaling factors, instantiating
+the DoMINO model and datapipe, automatically loading the most recent checkpoint,
+training the model in parallel using DistributedDataParallel across multiple
+GPUs, calculating the loss and updating model parameters using mixed precision.
+This is a common recipe that enables training of combined models for surface and
+volume as well either of them separately. Validation is also conducted every epoch,
+where predictions are compared against ground truth values. The code logs training
+and validation metrics to TensorBoard. The train tab in config.yaml can be used to
+specify batch size, number of epochs and other training parameters.
+"""
+
+import time
+import os
+import re
+import torch
+import torchinfo
+
+from typing import Literal, Any
+
+import apex
+import numpy as np
+import hydra
+from hydra.utils import to_absolute_path
+from omegaconf import DictConfig, OmegaConf
+import torch.distributed as dist
+from torch.cuda.amp import GradScaler, autocast
+from torch.nn.parallel import DistributedDataParallel
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from torch.utils.tensorboard import SummaryWriter
+from nvtx import annotate as nvtx_annotate
+import torch.cuda.nvtx as nvtx
+
+
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
+from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
+
+from physicsnemo.datapipes.cae.domino_datapipe import (
+    DoMINODataPipe,
+    compute_scaling_factors,
+    create_domino_dataset,
+)
+from physicsnemo.models.domino.model import DoMINO
+from physicsnemo.utils.domino.utils import *
+
+# This is included for GPU memory tracking:
+from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
+import time
+
+# Initialize NVML
+nvmlInit()
+
+
+from physicsnemo.utils.profiling import profile, Profiler
+
+
+@profile
+def train_epoch(
+    dataloader,
+    sampler,
+    logger,
+    gpu_handle,
+    epoch_index,
+    device,
+):
+    dist = DistributedManager()
+
+    indices = list(iter(sampler))
+    print(f"indices: {indices}")
+    # If you tell the dataloader the indices in advance, it will preload
+    # and pre-preprocess data
+    dataloader.set_indices(indices)
+
+    gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+    start_time = time.perf_counter()
+    for i_batch, sample_batched in enumerate(dataloader):
+        # sampled_batched = dict_to_device(sample_batched, device)
+
+        # for key in sampled_batched.keys():
+        #     print(f"{key}: {sampled_batched[key].shape}")
+
+        # Gather data and report
+        elapsed_time = time.perf_counter() - start_time
+        start_time = time.perf_counter()
+        gpu_end_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+        gpu_memory_used = gpu_end_info.used / (1024**3)
+        gpu_memory_delta = (gpu_end_info.used - gpu_start_info.used) / (1024**3)
+
+        logging_string = f"Device {device}, batch processed: {i_batch + 1}\n"
+        logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
+        logging_string += f"  GPU memory delta: {gpu_memory_delta:.3f} Gb\n"
+        logging_string += f"  Time taken: {elapsed_time:.2f} seconds\n"
+        logger.info(logging_string)
+        gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+
+    return
+
+
+def get_or_compute_scaling_factors(
+    cfg: DictConfig,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Get or compute scaling factors for volume and surface fields normalization.
+
+    This function either loads pre-computed scaling factors from disk or computes them
+    if they don't exist. The scaling factors are used for normalizing volume and surface
+    fields data based on the specified normalization method in the config.
+
+    Args:
+        cfg (DictConfig): Configuration object containing:
+            - project.name: Project name for saving/loading scaling factors
+            - model.normalization: Type of normalization ("min_max_scaling" or "mean_std_scaling")
+            - data.input_dir: Input directory path
+            - data_processor.use_cache: Whether to use cached data
+
+    Returns:
+        tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
+            - vol_factors: Scaling factors for volume fields (max/min or mean/std)
+            - surf_factors: Scaling factors for surface fields (max/min or mean/std)
+            Each factor is a numpy array containing the respective scaling values.
+
+    Raises:
+        ValueError: If an invalid normalization type is specified in the config.
+    """
+    # Compute or load the scaling factors:
+    vol_save_path = os.path.join(
+        "outputs", cfg.project.name, "volume_scaling_factors.npy"
+    )
+    surf_save_path = os.path.join(
+        "outputs", cfg.project.name, "surface_scaling_factors.npy"
+    )
+
+    if not os.path.exists(vol_save_path) or not os.path.exists(surf_save_path):
+        # Save the scaling factors if needed:
+        mean, std, min_val, max_val = compute_scaling_factors(
+            cfg=cfg,
+            input_path=cfg.data.input_dir,
+            use_cache=cfg.data_processor.use_cache,
+        )
+
+        v_mean = mean["volume_fields"].cpu().numpy()
+        v_std = std["volume_fields"].cpu().numpy()
+        v_min = min_val["volume_fields"].cpu().numpy()
+        v_max = max_val["volume_fields"].cpu().numpy()
+
+        s_mean = mean["surface_fields"].cpu().numpy()
+        s_std = std["surface_fields"].cpu().numpy()
+        s_min = min_val["surface_fields"].cpu().numpy()
+        s_max = max_val["surface_fields"].cpu().numpy()
+
+        np.save(vol_save_path, [v_mean, v_std, v_min, v_max])
+        np.save(surf_save_path, [s_mean, s_std, s_min, s_max])
+    else:
+        v_mean, v_std, v_min, v_max = np.load(vol_save_path)
+        s_mean, s_std, s_min, s_max = np.load(surf_save_path)
+
+    if cfg.model.normalization == "min_max_scaling":
+        vol_factors = [v_max, v_min]
+    elif cfg.model.normalization == "mean_std_scaling":
+        vol_factors = [v_mean, v_std]
+    else:
+        raise ValueError(f"Invalid normalization type: {cfg.model.normalization}")
+
+    if cfg.model.normalization == "min_max_scaling":
+        surf_factors = [s_max, s_min]
+    elif cfg.model.normalization == "mean_std_scaling":
+        surf_factors = [s_mean, s_std]
+    else:
+        raise ValueError(f"Invalid normalization type: {cfg.model.normalization}")
+
+    return vol_factors, surf_factors
+
+
+@hydra.main(version_base="1.3", config_path="conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+    # initialize distributed manager
+    DistributedManager.initialize()
+    dist = DistributedManager()
+
+    # Initialize NVML
+    nvmlInit()
+
+    gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index)
+
+    model_type = cfg.model.model_type
+
+    logger = PythonLogger("Train")
+    logger = RankZeroLoggingWrapper(logger, dist)
+
+    logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
+
+    vol_factors, surf_factors = get_or_compute_scaling_factors(cfg)
+
+    train_dataset = create_domino_dataset(
+        cfg,
+        phase="train",
+        volume_variable_names="volume_fields",
+        surface_variable_names="surface_fields",
+        vol_factors=vol_factors,
+        surf_factors=surf_factors,
+    )
+    train_sampler = DistributedSampler(
+        train_dataset, num_replicas=dist.world_size, rank=dist.rank
+    )
+
+    # train_dataloader = DataLoader(
+    #     train_dataset,
+    #     sampler=train_sampler,
+    #     **cfg.train.dataloader,
+    # )
+
+    for epoch in range(0, cfg.train.epochs):
+        start_time = time.perf_counter()
+        logger.info(f"Device {dist.device}, epoch {epoch}:")
+
+        epoch_start_time = time.perf_counter()
+        train_epoch(
+            dataloader=train_dataset,
+            sampler=train_sampler,
+            logger=logger,
+            gpu_handle=gpu_handle,
+            epoch_index=epoch,
+            device=dist.device,
+        )
+        epoch_end_time = time.perf_counter()
+        logger.info(
+            f"Device {dist.device}, Epoch {epoch} took {epoch_end_time - epoch_start_time:.3f} seconds"
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
new file mode 100644
index 0000000000..310493e3cb
--- /dev/null
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -0,0 +1,1174 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code provides the datapipe for reading the processed npy files,
+generating multi-res grids, calculating signed distance fields,
+positional encodings, sampling random points in the volume and on surface,
+normalizing fields and returning the output tensors as a dictionary.
+
+This datapipe also non-dimensionalizes the fields, so the order in which the variables should
+be fixed: velocity, pressure, turbulent viscosity for volume variables and
+pressure, wall-shear-stress for surface variables. The different parameters such as
+variable names, domain resolution, sampling size etc. are configurable in config.yaml.
+"""
+
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal, Optional, Protocol, Sequence, Union
+
+import numpy as np
+import torch
+import torch.cuda.nvtx as nvtx
+from omegaconf import DictConfig
+from torch import Tensor
+from torch.utils.data import Dataset
+
+from physicsnemo.datapipes.cae.drivaer_ml_datapipe import (
+    DrivaerMLDataset,
+    compute_mean_std_min_max,
+)
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.utils.domino.utils import (
+    ArrayType,
+    area_weighted_shuffle_array,
+    calculate_center_of_mass,
+    calculate_normal_positional_encoding,
+    create_grid,
+    get_filenames,
+    normalize,
+    pad,
+    shuffle_array,
+    standardize,
+)
+from physicsnemo.utils.neighbors import knn
+from physicsnemo.utils.profiling import profile
+from physicsnemo.utils.sdf import signed_distance_field
+
+
+class BoundingBox(Protocol):
+    """
+    Type definition for the required format of bounding box dimensions.
+    """
+
+    min: ArrayType
+    max: ArrayType
+
+
+@dataclass
+class DoMINODataConfig:
+    """Configuration for DoMINO dataset processing pipeline.
+
+    Attributes:
+        data_path: Path to the dataset to load.
+        phase: Which phase of data to load ("train", "val", or "test").
+        surface_variables: (Surface specific) Names of surface variables.
+        surface_points_sample: (Surface specific) Number of surface points to sample per batch.
+        num_surface_neighbors: (Surface specific) Number of surface neighbors to consider for nearest neighbors approach.
+        resample_surfaces: (Surface specific) Whether to resample the surface before kdtree/knn. Not available if caching.
+        resampling_points: (Surface specific) Number of points to resample the surface to.
+        surface_sampling_algorithm: (Surface specific) Algorithm to use for surface sampling ("area_weighted" or "random").
+        surface_factors: (Surface specific) Non-dimensionalization factors for surface variables.
+            If set, and scaling_type is:
+            - min_max_scaling -> rescale surface_fields to the min/max set here
+            - mean_std_scaling -> rescale surface_fields to the mean and std set here.
+        bounding_box_dims_surf: (Surface specific) Dimensions of bounding box. Must be an object with min/max
+            attributes that are arraylike.
+        volume_variables: (Volume specific) Names of volume variables.
+        volume_points_sample: (Volume specific) Number of volume points to sample per batch.
+        volume_factors: (Volume specific) Non-dimensionalization factors for volume variables scaling.
+            If set, and scaling_type is:
+            - min_max_scaling -> rescale volume_fields to the min/max set here
+            - mean_std_scaling -> rescale volume_fields to the mean and std set here.
+        bounding_box_dims: (Volume specific) Dimensions of bounding box. Must be an object with min/max
+            attributes that are arraylike.
+        grid_resolution: Resolution of the latent grid.
+        normalize_coordinates: Whether to normalize coordinates based on min/max values.
+            For surfaces: uses s_min/s_max, defined from:
+            - Surface bounding box, if defined.
+            - Min/max of the stl_vertices
+            For volumes: uses c_min/c_max, defined from:
+            - Volume bounding_box if defined,
+            - 1.5x s_min/max otherwise, except c_min[2] = s_min[2] in this case
+        sample_in_bbox: Whether to sample points in a specified bounding box.
+            Uses the same min/max points as coordinate normalization.
+            Only performed if compute_scaling_factors is false.
+        sampling: Whether to downsample the full resolution mesh to fit in GPU memory.
+            Surface and volume sampling points are configured separately as:
+            - surface.points_sample
+            - volume.points_sample
+        geom_points_sample: Number of STL points sampled per batch.
+            Independent of volume.points_sample and surface.points_sample.
+        positional_encoding: Whether to use positional encoding. Affects the calculation of:
+            - pos_volume_closest
+            - pos_volume_center_of_mass
+            - pos_surface_centter_of_mass
+        scaling_type: Scaling type for volume variables.
+            If used, will rescale the volume_fields and surface fields outputs.
+            Requires volume.factor and surface.factor to be set.
+        compute_scaling_factors: Whether to compute scaling factors.
+            Not available if caching.
+            Many preprocessing pieces are disabled if computing scaling factors.
+        caching: Whether this is for caching or serving.
+        deterministic: Whether to use a deterministic seed for sampling and random numbers.
+        gpu_preprocessing: Whether to do preprocessing on the GPU (False for CPU).
+        gpu_output: Whether to return output on the GPU as cupy arrays.
+            If False, returns numpy arrays.
+            You might choose gpu_preprocessing=True and gpu_output=False if caching.
+    """
+
+    data_path: Path
+    phase: Literal["train", "val", "test"]
+
+    # Surface-specific variables:
+    surface_variables: Optional[Sequence] = ("pMean", "wallShearStress")
+    surface_points_sample: int = 1024
+    num_surface_neighbors: int = 11
+    resample_surfaces: bool = False
+    resampling_points: int = 1_000_000
+    surface_sampling_algorithm: str = Literal["area_weighted", "random"]
+    surface_factors: Optional[Sequence] = None
+    bounding_box_dims_surf: Optional[Union[BoundingBox, Sequence]] = None
+
+    # Volume specific variables:
+    volume_variables: Optional[Sequence] = ("UMean", "pMean")
+    volume_points_sample: int = 1024
+    volume_factors: Optional[Sequence] = None
+    bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None
+
+    grid_resolution: Union[Sequence, ArrayType] = (256, 96, 64)
+    normalize_coordinates: bool = False
+    sample_in_bbox: bool = False
+    sampling: bool = False
+    geom_points_sample: int = 300000
+    positional_encoding: bool = False
+    scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None
+    compute_scaling_factors: bool = False
+    caching: bool = False
+    deterministic: bool = False
+    gpu_preprocessing: bool = True
+    gpu_output: bool = True
+
+    def __post_init__(self):
+        # Ensure data_path is a Path object:
+        if isinstance(self.data_path, str):
+            self.data_path = Path(self.data_path)
+        self.data_path = self.data_path.expanduser()
+
+        if not self.data_path.exists():
+            raise ValueError(f"Path {self.data_path} does not exist")
+
+        if not self.data_path.is_dir():
+            raise ValueError(f"Path {self.data_path} is not a directory")
+
+        # Object if caching settings are impossible:
+        if self.caching:
+            if self.sampling:
+                raise ValueError("Sampling should be False for caching")
+            if self.compute_scaling_factors:
+                raise ValueError("Compute scaling factors should be False for caching")
+            if self.resample_surfaces:
+                raise ValueError("Resample surface should be False for caching")
+
+        if self.phase not in [
+            "train",
+            "val",
+            "test",
+        ]:
+            raise ValueError(
+                f"phase should be one of ['train', 'val', 'test'], got {self.phase}"
+            )
+        if self.scaling_type is not None:
+            if self.scaling_type not in [
+                "min_max_scaling",
+                "mean_std_scaling",
+            ]:
+                raise ValueError(
+                    f"scaling_type should be one of ['min_max_scaling', 'mean_std_scaling'], got {self.scaling_type}"
+                )
+
+
+##### TODO
+# - check the bounding box protocol works
+
+
+class DoMINODataPipe(Dataset):
+    """
+    Datapipe for DoMINO
+
+    Leverages a dataset for the actual reading of the data, and this
+    object is responsible for preprocessing the data.
+
+    """
+
+    def __init__(
+        self,
+        input_path,
+        model_type: Literal["surface", "volume", "combined"],
+        **data_config_overrides,
+    ):
+        # Perform config packaging and validation
+        self.config = DoMINODataConfig(data_path=input_path, **data_config_overrides)
+
+        # Set up the distributed manager:
+        if not DistributedManager.is_initialized():
+            DistributedManager.initialize()
+
+        dist = DistributedManager()
+        if self.config.gpu_preprocessing or self.config.gpu_output:
+            # Make sure we move data to the right device:
+            target_device = dist.device
+        else:
+            target_device = torch.device("cpu")
+
+        self.device = target_device
+
+        self.model_type = model_type
+
+        # Update the arrays for bounding boxes:
+        if hasattr(self.config.bounding_box_dims, "max") and hasattr(
+            self.config.bounding_box_dims, "min"
+        ):
+            self.config.bounding_box_dims = [
+                torch.tensor(
+                    self.config.bounding_box_dims.max,
+                    device=self.device,
+                    dtype=torch.float32,
+                ),
+                torch.tensor(
+                    self.config.bounding_box_dims.min,
+                    device=self.device,
+                    dtype=torch.float32,
+                ),
+            ]
+            self.volume_grid = create_grid(
+                self.config.bounding_box_dims[0],
+                self.config.bounding_box_dims[1],
+                self.config.grid_resolution,
+            )
+
+        if hasattr(self.config.bounding_box_dims_surf, "max") and hasattr(
+            self.config.bounding_box_dims_surf, "min"
+        ):
+            self.config.bounding_box_dims_surf = [
+                torch.tensor(
+                    self.config.bounding_box_dims_surf.max,
+                    device=self.device,
+                    dtype=torch.float32,
+                ),
+                torch.tensor(
+                    self.config.bounding_box_dims_surf.min,
+                    device=self.device,
+                    dtype=torch.float32,
+                ),
+            ]
+
+            self.surf_grid = create_grid(
+                self.config.bounding_box_dims_surf[0],
+                self.config.bounding_box_dims_surf[1],
+                self.config.grid_resolution,
+            )
+
+        # Ensure the volume and surface scaling factors are torch tensors
+        # and on the right device:
+        if self.config.volume_factors is not None:
+            self.config.volume_factors = torch.tensor(
+                self.config.volume_factors, device=self.device, dtype=torch.float32
+            )
+        if self.config.surface_factors is not None:
+            self.config.surface_factors = torch.tensor(
+                self.config.surface_factors, device=self.device, dtype=torch.float32
+            )
+
+        # Always read these keys:
+        self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"]
+
+        self.keys_to_read_if_available = {
+            "global_params_values": torch.tensor([30.0, 1.226], device=self.device),
+            "global_params_reference": torch.tensor([30.0, 1.226], device=self.device),
+        }
+
+        self.volume_keys = ["volume_mesh_centers", "volume_fields"]
+        self.surface_keys = [
+            "surface_mesh_centers",
+            "surface_normals",
+            "surface_areas",
+            "surface_fields",
+        ]
+
+        if self.model_type == "volume" or self.model_type == "combined":
+            self.keys_to_read.extend(self.volume_keys)
+        if self.model_type == "surface" or self.model_type == "combined":
+            self.keys_to_read.extend(self.surface_keys)
+
+        self.dataset = DrivaerMLDataset(
+            data_dir=self.config.data_path,
+            keys_to_read=self.keys_to_read,
+            output_device=self.device,
+        )
+
+        # This is thread storage for data preprocessing:
+        self._preprocess_queue = {}
+        self._preprocess_events = {}
+        self.preprocess_depth = 2
+        self.preprocess_executor = ThreadPoolExecutor(max_workers=2)
+
+    def set_indices(self, indices: list[int]):
+        """
+        Set the indices for the dataset for this epoch.
+        """
+        self.indices = indices
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def compute_stl_scaling(
+        self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None
+    ):
+        """
+        Compute the min and max for the defining mesh.
+
+        """
+
+        s_min = torch.amin(stl_vertices, 0)
+        s_max = torch.amax(stl_vertices, 0)
+
+        length_scale = torch.amax(s_max - s_min)
+
+        # if dynamic_bbox_scaling:
+        # Check the bounding box is not unit length
+
+        if bounding_box_dims_surf is not None:
+            s_max = bounding_box_dims_surf[0]
+            s_min = bounding_box_dims_surf[1]
+            surf_grid = self.surf_grid
+        else:
+            # Create the grid:
+            surf_grid = create_grid(s_max, s_min, self.grid_resolution)
+
+        surf_grid_max_min = torch.stack([s_min, s_max])
+
+        return s_min, s_max, length_scale, surf_grid_max_min, surf_grid
+
+    @profile
+    def process_combined(
+        self,
+        s_min,
+        s_max,
+        surf_grid,
+        stl_vertices,
+        mesh_indices_flattened,
+    ):
+        # SDF calculation on the grid using WARP
+        nx, ny, nz = self.config.grid_resolution
+
+        sdf_surf_grid, _ = signed_distance_field(
+            stl_vertices,
+            mesh_indices_flattened,
+            surf_grid,
+            use_sign_winding_number=True,
+        )
+
+        if self.config.sampling:
+            geometry_points = self.config.geom_points_sample
+            geometry_coordinates_sampled, idx_geometry = shuffle_array(
+                stl_vertices, geometry_points
+            )
+            if geometry_coordinates_sampled.shape[0] < geometry_points:
+                geometry_coordinates_sampled = pad(
+                    geometry_coordinates_sampled, geometry_points, pad_value=-100.0
+                )
+            geom_centers = geometry_coordinates_sampled
+        else:
+            geom_centers = stl_vertices
+
+        return (sdf_surf_grid, geom_centers)
+
+    @profile
+    def process_surface(
+        self,
+        s_min: torch.Tensor,
+        s_max: torch.Tensor,
+        center_of_mass: torch.Tensor,
+        surf_grid: torch.Tensor,
+        surface_coordinates: torch.Tensor,
+        surface_normals: torch.Tensor,
+        surface_sizes: torch.Tensor,
+        surface_fields: torch.Tensor,
+    ) -> dict[str, torch.Tensor]:
+        nx, ny, nz = self.config.grid_resolution
+
+        return_dict = {}
+
+        # Remove any sizes <= 0:
+        idx = surface_sizes > 0
+        surface_sizes = surface_sizes[idx]
+        surface_fields = surface_fields[idx]
+        surface_normals = surface_normals[idx]
+        surface_coordinates = surface_coordinates[idx]
+
+        if self.config.resample_surfaces:
+            if self.config.resampling_points > surface_coordinates.shape[0]:
+                resampling_points = surface_coordinates.shape[0]
+            else:
+                resampling_points = self.config.resampling_points
+
+            surface_coordinates, idx_s = shuffle_array(
+                surface_coordinates, resampling_points
+            )
+            surface_normals = surface_normals[idx_s]
+            surface_sizes = surface_sizes[idx_s]
+            surface_fields = surface_fields[idx_s]
+
+        c_max = self.config.bounding_box_dims[0]
+        c_min = self.config.bounding_box_dims[1]
+
+        if self.config.sample_in_bbox:
+            ids_min = surface_coordinates[:] > c_min
+            ids_max = surface_coordinates[:] < c_max
+
+            ids_in_bbox = ids_min & ids_max
+            ids_in_bbox = ids_in_bbox.all(dim=-1)
+
+            surface_coordinates = surface_coordinates[ids_in_bbox]
+            surface_normals = surface_normals[ids_in_bbox]
+            surface_sizes = surface_sizes[ids_in_bbox]
+            surface_fields = surface_fields[ids_in_bbox]
+
+        # Compute the positional encoding before sampling
+        if self.config.positional_encoding:
+            dx, dy, dz = (
+                (s_max[0] - s_min[0]) / nx,
+                (s_max[1] - s_min[1]) / ny,
+                (s_max[2] - s_min[2]) / nz,
+            )
+            pos_normals_com_surface = calculate_normal_positional_encoding(
+                surface_coordinates, center_of_mass, cell_length=[dx, dy, dz]
+            )
+        else:
+            pos_normals_com_surface = surface_coordinates - center_of_mass
+
+        if self.config.sampling:
+            # Perform the down sampling:
+
+            if self.config.surface_sampling_algorithm == "area_weighted":
+                weights = surface_sizes
+                # (
+                #     surface_coordinates_sampled,
+                #     idx_surface,
+                # ) = area_weighted_shuffle_array(
+                #     surface_coordinates,
+                #     self.config.surface_points_sample,
+                #     surface_sizes,
+                # )
+            else:
+                weights = None
+                # surface_coordinates_sampled, idx_surface = shuffle_array(
+                #     surface_coordinates, self.config.surface_points_sample
+                # )
+
+            surface_coordinates_sampled, idx_surface = shuffle_array(
+                surface_coordinates,
+                self.config.surface_points_sample,
+                weights=weights,
+            )
+
+            if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample:
+                surface_coordinates_sampled = pad(
+                    surface_coordinates_sampled,
+                    self.config.surface_points_sample,
+                    pad_value=-10.0,
+                )
+
+            # Select out the sampled points for non-neighbor arrays:
+            surface_fields = surface_fields[idx_surface]
+            pos_normals_com_surface = pos_normals_com_surface[idx_surface]
+
+            # Perform a kNN on the full set of points vs. sampled points
+            # to select the neighbors:
+            # if self.config.num_surface_neighbors > 1:
+            #     if self.array_provider == cp:
+            #         knn = cuml.neighbors.NearestNeighbors(
+            #             n_neighbors=self.config.num_surface_neighbors,
+            #             algorithm="rbc",
+            #         )
+            #         knn.fit(surface_coordinates)
+            #     else:
+            #         # Under the hood this is instantiating a KDTree.
+            #         # aka here knn is a type, not a class, technically.
+            #         interp_func = KDTree(surface_coordinates)
+
+            # Now, perform the kNN on the sampled points:
+            if self.config.num_surface_neighbors > 1:
+                neighbor_indices, neighbor_distances = knn(
+                    points=surface_coordinates,
+                    queries=surface_coordinates_sampled,
+                    k=self.config.num_surface_neighbors,
+                )
+
+                # Pull out the neighbor elements.  Note that ii is the index into the original
+                # points - but only exists for the sampled points
+                # In other words, a point from `surface_coordinates_sampled` has neighbors
+                # from the full `surface_coordinates` array.
+                surface_neighbors = surface_coordinates[neighbor_indices][:, 1:]
+                surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:]
+                surface_neighbors_sizes = surface_sizes[neighbor_indices][:, 1:]
+            else:
+                surface_neighbors = surface_coordinates
+                surface_neighbors_normals = surface_normals
+                surface_neighbors_sizes = surface_sizes
+
+            # Subsample the normals and sizes:
+            surface_normals = surface_normals[idx_surface]
+            surface_sizes = surface_sizes[idx_surface]
+
+            # Update the coordinates to the sampled points:
+            surface_coordinates = surface_coordinates_sampled
+
+        else:
+            neighbor_indices, _ = knn(
+                points=surface_coordinates,
+                queries=surface_coordinates,
+                k=self.config.num_surface_neighbors,
+            )
+
+            # Construct the neighbors arrays:
+            surface_neighbors = surface_coordinates[neighbor_indices][:, 1:]
+            surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:]
+            surface_neighbors_sizes = surface_sizes[neighbor_indices][:, 1:]
+
+        # Have to normalize neighbors after the kNN and sampling
+        if self.config.normalize_coordinates:
+            surf_grid = normalize(surf_grid, s_max, s_min)
+            surface_coordinates = normalize(surface_coordinates, s_max, s_min)
+            surface_neighbors = normalize(surface_neighbors, s_max, s_min)
+
+        if self.config.scaling_type is not None:
+            if self.config.surface_factors is not None:
+                if self.config.scaling_type == "mean_std_scaling":
+                    surf_mean = self.config.surface_factors[0]
+                    surf_std = self.config.surface_factors[1]
+                    # TODO - Are these array calls needed?
+                    surface_fields = standardize(surface_fields, surf_mean, surf_std)
+                elif self.config.scaling_type == "min_max_scaling":
+                    surf_min = self.config.surface_factors[1]
+                    surf_max = self.config.surface_factors[0]
+                    # TODO - Are these array calls needed?
+                    surface_fields = normalize(surface_fields, surf_max, surf_min)
+
+        return_dict.update(
+            {
+                "pos_surface_center_of_mass": pos_normals_com_surface,
+                "surface_mesh_centers": surface_coordinates,
+                "surface_mesh_neighbors": surface_neighbors,
+                "surface_normals": surface_normals,
+                "surface_neighbors_normals": surface_neighbors_normals,
+                "surface_areas": surface_sizes,
+                "surface_neighbors_areas": surface_neighbors_sizes,
+                "surface_fields": surface_fields,
+            }
+        )
+
+        return return_dict
+
+    @profile
+    def process_volume(
+        self,
+        s_min: torch.Tensor,
+        s_max: torch.Tensor,
+        volume_coordinates: torch.Tensor,
+        volume_fields: torch.Tensor,
+        stl_vertices: torch.Tensor,
+        mesh_indices_flattened: torch.Tensor,
+        center_of_mass: torch.Tensor,
+    ) -> dict[str, torch.Tensor]:
+        return_dict = {}
+
+        nx, ny, nz = self.config.grid_resolution
+
+        # Determine the volume min / max locations
+        if self.config.bounding_box_dims is None:
+            c_max = s_max + (s_max - s_min) / 2
+            c_min = s_min - (s_max - s_min) / 2
+            c_min[2] = s_min[2]
+        else:
+            c_max = self.config.bounding_box_dims[0]
+            c_min = self.config.bounding_box_dims[1]
+
+        if self.config.sample_in_bbox:
+            # Remove points in the volume that are outside
+            # of the bbox area.
+            min_check = volume_coordinates[:] > c_min
+            max_check = volume_coordinates[:] < c_max
+
+            ids_in_bbox = min_check & max_check
+            ids_in_bbox = ids_in_bbox.all(dim=1)
+
+            volume_coordinates = volume_coordinates[ids_in_bbox]
+            volume_fields = volume_fields[ids_in_bbox]
+
+        dx, dy, dz = (
+            (c_max[0] - c_min[0]) / nx,
+            (c_max[1] - c_min[1]) / ny,
+            (c_max[2] - c_min[2]) / nz,
+        )
+
+        # TODO - we need to make sure if the bbox is dynamic,
+        # the bounds on the grid are correct
+
+        # # Generate a grid of specified resolution to map the bounding box
+        # # The grid is used for capturing structured geometry features and SDF representation of geometry
+        # grid = create_grid(c_max, c_min, [nx, ny, nz])
+        # grid_reshaped = grid.reshape(nx * ny * nz, 3)
+
+        # SDF calculation on the volume grid using WARP
+        sdf_grid, _ = signed_distance_field(
+            stl_vertices,
+            mesh_indices_flattened,
+            self.volume_grid,
+            use_sign_winding_number=True,
+        )
+
+        if self.config.sampling:
+            # Generate a series of idx to sample the volume
+            # without replacement
+
+            volume_coordinates_sampled, idx_volume = shuffle_array(
+                volume_coordinates, self.config.volume_points_sample
+            )
+            volume_coordinates_sampled = volume_coordinates[idx_volume]
+
+            if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample:
+                padding_size = (
+                    self.config.volume_points_sample
+                    - volume_coordinates_sampled.shape[0]
+                )
+                volume_coordinates_sampled = torch.nn.functional.pad(
+                    volume_coordinates_sampled,
+                    (0, 0, 0, 0, 0, padding_size),
+                    mode="constant",
+                    value=-10.0,
+                )
+                # volume_coordinates_sampled = pad(
+                #     volume_coordinates_sampled,
+                #     self.config.volume_points_sample,
+                #     pad_value=-10.0,
+                # )
+            volume_fields = volume_fields[idx_volume]
+            volume_coordinates = volume_coordinates_sampled
+
+        # Get the SDF of all the selected volume coordinates,
+        # And keep the closest point to each one.
+        sdf_nodes, sdf_node_closest_point = signed_distance_field(
+            stl_vertices,
+            mesh_indices_flattened,
+            volume_coordinates,
+            use_sign_winding_number=True,
+        )
+
+        if self.config.positional_encoding:
+            pos_normals_closest_vol = calculate_normal_positional_encoding(
+                volume_coordinates,
+                sdf_node_closest_point,
+                cell_length=[dx, dy, dz],
+            )
+            pos_normals_com_vol = calculate_normal_positional_encoding(
+                volume_coordinates, center_of_mass, cell_length=[dx, dy, dz]
+            )
+        else:
+            pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
+            pos_normals_com_vol = volume_coordinates - center_of_mass
+
+        if self.config.normalize_coordinates:
+            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
+            grid = normalize(self.volume_grid, c_max, c_min)
+
+        if self.config.scaling_type is not None:
+            if self.config.volume_factors is not None:
+                if self.config.scaling_type == "mean_std_scaling":
+                    vol_mean = self.config.volume_factors[0]
+                    vol_std = self.config.volume_factors[1]
+                    volume_fields = standardize(volume_fields, vol_mean, vol_std)
+                elif self.config.scaling_type == "min_max_scaling":
+                    vol_min = self.config.volume_factors[1]
+                    vol_max = self.config.volume_factors[0]
+                    volume_fields = normalize(volume_fields, vol_max, vol_min)
+
+        vol_grid_max_min = torch.stack([c_min, c_max])
+
+        return_dict.update(
+            {
+                "pos_volume_closest": pos_normals_closest_vol,
+                "pos_volume_center_of_mass": pos_normals_com_vol,
+                "grid": grid,
+                "sdf_grid": sdf_grid,
+                "sdf_nodes": sdf_nodes,
+                "volume_fields": volume_fields,
+                "volume_mesh_centers": volume_coordinates,
+                "volume_min_max": vol_grid_max_min,
+            }
+        )
+
+        return return_dict
+
+    @profile
+    def process_data(self, data_dict):
+        # Start building the preprocessed return dict:
+        return_dict = {
+            "global_params_values": data_dict["global_params_values"],
+            "global_params_reference": data_dict["global_params_reference"],
+        }
+
+        # This function gets information about the surface scale,
+        # and decides what the surface grid will be:
+        (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = (
+            self.compute_stl_scaling(
+                data_dict["stl_coordinates"], self.config.bounding_box_dims_surf
+            )
+        )
+
+        # This is a center of mass computation for the stl surface,
+        # using the size of each mesh point as weight.
+
+        center_of_mass = calculate_center_of_mass(
+            data_dict["stl_centers"], data_dict["stl_areas"]
+        )
+
+        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
+        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
+
+        return_dict.update(
+            {
+                "length_scale": length_scale,
+                "surf_grid_max_min": surf_grid_max_min,
+            }
+        )
+
+        # This will compute the sdf on the surface grid and apply downsampling if needed
+        sdf_surf_grid, geom_centers = self.preprocess_combined(
+            s_min,
+            s_max,
+            surf_grid,
+            stl_vertices=data_dict["stl_coordinates"],
+            mesh_indices_flattened=mesh_indices_flattened,
+        )
+        return_dict["sdf_surf_grid"] = sdf_surf_grid
+        return_dict["geometry_coordinates"] = geom_centers
+
+        # Up to here works all in torch!
+
+        if self.model_type == "volume" or self.model_type == "combined":
+            volume_dict = self.preprocess_volume(
+                s_min,
+                s_max,
+                volume_coordinates=data_dict["volume_mesh_centers"],
+                volume_fields=data_dict["volume_fields"],
+                stl_vertices=data_dict["stl_coordinates"],
+                mesh_indices_flattened=mesh_indices_flattened,
+                center_of_mass=center_of_mass,
+            )
+
+            return_dict.update(volume_dict)
+
+        if self.model_type == "surface" or self.model_type == "combined":
+            surface_dict = self.preprocess_surface(
+                s_min,
+                s_max,
+                center_of_mass,
+                surf_grid,
+                surface_coordinates=data_dict["surface_mesh_centers"],
+                surface_normals=data_dict["surface_normals"],
+                surface_sizes=data_dict["surface_areas"],
+                surface_fields=data_dict["surface_fields"],
+            )
+            return_dict.update(surface_dict)
+
+        return return_dict
+
+    @profile
+    def __getitem__(self, idx):
+        """
+        Function for fetching and processing a single file's data.
+
+        Domino, in general, expects one example per file and the files
+        are relatively large due to the mesh size.
+        """
+
+        if self.config.deterministic:
+            torch.manual_seed(idx)
+
+        if hasattr(self, "indices"):
+            index = self.indices[idx]
+        else:
+            index = idx
+
+        data_dict = self.dataset[index]
+
+        for key in self.keys_to_read_if_available.keys():
+            if key not in data_dict:
+                data_dict[key] = self.keys_to_read_if_available[key]
+
+        return_dict = self.process_data(data_dict)
+
+        return return_dict
+
+    # def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]:
+    #     """
+    #     Get a data sample.
+
+    #     Flow is:
+    #     - Read data, or get preloaded data if this idx is preloaded.
+    #     - Move data to GPU, if needed.
+    #         - Preloading data will move to GPU if it can.
+    #     - If domain parallelism is enabled, convert to ShardTensors.
+    #     - Return
+
+    #     Args:
+    #         idx: Index of the sample to retrieve
+
+    #     Returns:
+    #         Dictionary containing tensors/ShardTensors for the requested data
+    #     """
+
+    #     if idx >= len(self._filenames):
+    #         raise IndexError(
+    #             f"Index {idx} out of range for dataset of size {len(self._filenames)}"
+    #         )
+
+    #     # Attempt to get preloaded data:
+    #     data = self.get_preloaded(idx)
+    #     if data is None:
+    #         # Read data from zarr file
+    #         data = self._read_file(self._filenames[idx])
+    #         data = self._move_to_gpu(data, idx)
+
+    #     # This blocks until the preprocessing has transferred to GPU
+    #     if idx in self._transfer_events:
+    #         torch.cuda.current_stream().wait_event(self._transfer_events[idx])
+    #         self._transfer_events.pop(idx)
+
+    #     # Convert to ShardTensors if using domain parallelism
+    #     if self.device_mesh is not None:
+    #         data = self._convert_to_shard_tensors(data)
+
+    #     return data
+
+    # def __iter__(self):
+    #     self.i = 0
+    #     return self
+
+    # def __next__(self):
+    #     """
+    #     When used in an iterator context, this datapipe will
+    #     leverage preloading and preprocessing to speed up the data
+    #     loading latency.
+
+    #     Each time "next" is called, the datapipe will ask the data
+    #     set to preload the data 2 steps ahead.  It will then ask
+    #     for the data from one step ahead, and start it processing.
+
+    #     Finally, it will return the data from this requested index
+    #     """
+    #     if self.i >= len(self._filenames):
+    #         self.i = 0
+    #         raise StopIteration
+
+    #     if self.preload_depth > 0 and self.i + 1 < len(self._filenames):
+    #         self.preload(this_index)
+    #     if self.preload_depth > 1 and self.i + 2 < len(self._filenames):
+    #         self.preload(this_index)
+
+    #     data = self.__getitem__(this_index)
+
+    #     self.i += 1
+
+    #     return data
+
+    # def preprocess(self, idx: int) -> None:
+    #     """
+    #     Asynchronously preload the data for the given index (up to CPU, not GPU).
+    #     Only one preload operation is supported at a time.
+
+    #     Args:
+    #         idx: Index of the sample to preload.
+    #     """
+    #     if idx in self._preload_queue:
+    #         # Skip items that are already in the queue
+    #         return
+
+    #     def _preload_worker():
+    #         try:
+    #             data = self._read_file(self._filenames[idx])
+    #             # Convert to torch tensors
+    #             return self._move_to_gpu(data, idx)
+    #         except Exception as e:
+    #             print(f"Exception in preload: {e}")
+    #             raise e
+
+    #     self._preload_queue[idx] = self.preload_executor.submit(_preload_worker)
+
+    # def get_preloaded(self, idx: int) -> dict[str, torch.Tensor] | None:
+    #     """
+    #     Retrieve the preloaded data (blocking if not ready).
+
+    #     Returns:
+    #         (idx, data) tuple where data is a dictionary of key to numpy array or torch tensor.
+
+    #     Raises:
+    #         RuntimeError: If no preload is in progress.
+    #         Exception: If preload failed.
+    #     """
+
+    #     if idx not in self._preload_queue:
+    #         return None
+
+    #     result = self._preload_queue[idx].result()  # This will block until the result is ready
+    #     self._preload_queue.pop(idx) # Clear the future after getting the result
+
+    #     return result
+
+
+@profile
+def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None:
+    # Create a dataset for just the field keys:
+
+    dataset = DrivaerMLDataset(
+        data_dir=input_path,
+        keys_to_read=["volume_fields", "surface_fields"],
+        output_device=torch.device("cuda"),  # TODO - configure this more carefully here
+    )
+
+    mean, std, min_val, max_val = compute_mean_std_min_max(
+        dataset,
+        field_keys=["volume_fields", "surface_fields"],
+    )
+
+    return mean, std, min_val, max_val
+
+
+class CachedDoMINODataset(Dataset):
+    """
+    Dataset for reading cached DoMINO data files, with optional resampling.
+    Acts as a drop-in replacement for DoMINODataPipe.
+    """
+
+    # @nvtx_annotate(message="CachedDoMINODataset __init__")
+    def __init__(
+        self,
+        data_path: Union[str, Path],
+        phase: Literal["train", "val", "test"] = "train",
+        sampling: bool = False,
+        volume_points_sample: Optional[int] = None,
+        surface_points_sample: Optional[int] = None,
+        geom_points_sample: Optional[int] = None,
+        model_type=None,  # Model_type, surface, volume or combined
+        deterministic_seed=False,
+        surface_sampling_algorithm="area_weighted",
+    ):
+        super().__init__()
+
+        self.model_type = model_type
+        if deterministic_seed:
+            np.random.seed(42)
+
+        if isinstance(data_path, str):
+            data_path = Path(data_path)
+        self.data_path = data_path.expanduser()
+
+        if not self.data_path.exists():
+            raise AssertionError(f"Path {self.data_path} does not exist")
+        if not self.data_path.is_dir():
+            raise AssertionError(f"Path {self.data_path} is not a directory")
+
+        self.deterministic_seed = deterministic_seed
+        self.sampling = sampling
+        self.volume_points = volume_points_sample
+        self.surface_points = surface_points_sample
+        self.geom_points = geom_points_sample
+        self.surface_sampling_algorithm = surface_sampling_algorithm
+
+        self.filenames = get_filenames(self.data_path, exclude_dirs=True)
+
+        total_files = len(self.filenames)
+
+        self.phase = phase
+        self.indices = np.array(range(total_files))
+
+        np.random.shuffle(self.indices)
+
+        if not self.filenames:
+            raise AssertionError(f"No cached files found in {self.data_path}")
+
+    def __len__(self):
+        return len(self.indices)
+
+    # @nvtx_annotate(message="CachedDoMINODataset __getitem__")
+    def __getitem__(self, idx):
+        if self.deterministic_seed:
+            np.random.seed(idx)
+        nvtx.range_push("Load cached file")
+
+        index = self.indices[idx]
+        cfd_filename = self.filenames[index]
+
+        filepath = self.data_path / cfd_filename
+        result = np.load(filepath, allow_pickle=True).item()
+        result = {
+            k: v.numpy() if isinstance(v, Tensor) else v for k, v in result.items()
+        }
+
+        nvtx.range_pop()
+        if not self.sampling:
+            return result
+
+        nvtx.range_push("Sample points")
+
+        # Sample volume points if present
+        if "volume_mesh_centers" in result and self.volume_points:
+            coords_sampled, idx_volume = shuffle_array(
+                result["volume_mesh_centers"], self.volume_points
+            )
+            if coords_sampled.shape[0] < self.volume_points:
+                coords_sampled = pad(
+                    coords_sampled, self.volume_points, pad_value=-10.0
+                )
+
+            result["volume_mesh_centers"] = coords_sampled
+            for key in [
+                "volume_fields",
+                "pos_volume_closest",
+                "pos_volume_center_of_mass",
+                "sdf_nodes",
+            ]:
+                if key in result:
+                    result[key] = result[key][idx_volume]
+
+        # Sample surface points if present
+        if "surface_mesh_centers" in result and self.surface_points:
+            if self.surface_sampling_algorithm == "area_weighted":
+                coords_sampled, idx_surface = area_weighted_shuffle_array(
+                    result["surface_mesh_centers"],
+                    self.surface_points,
+                    result["surface_areas"],
+                )
+            else:
+                coords_sampled, idx_surface = shuffle_array(
+                    result["surface_mesh_centers"], self.surface_points
+                )
+
+            if coords_sampled.shape[0] < self.surface_points:
+                coords_sampled = pad(
+                    coords_sampled, self.surface_points, pad_value=-10.0
+                )
+
+            ii = result["neighbor_indices"]
+            result["surface_mesh_neighbors"] = result["surface_mesh_centers"][ii]
+            result["surface_neighbors_normals"] = result["surface_normals"][ii]
+            result["surface_neighbors_areas"] = result["surface_areas"][ii]
+
+            result["surface_mesh_centers"] = coords_sampled
+
+            for key in [
+                "surface_fields",
+                "surface_areas",
+                "surface_normals",
+                "pos_surface_center_of_mass",
+                "surface_mesh_neighbors",
+                "surface_neighbors_normals",
+                "surface_neighbors_areas",
+            ]:
+                if key in result:
+                    result[key] = result[key][idx_surface]
+
+            del result["neighbor_indices"]
+
+        # Sample geometry points if present
+        if "geometry_coordinates" in result and self.geom_points:
+            coords_sampled, _ = shuffle_array(
+                result["geometry_coordinates"], self.geom_points
+            )
+            if coords_sampled.shape[0] < self.geom_points:
+                coords_sampled = pad(coords_sampled, self.geom_points, pad_value=-100.0)
+            result["geometry_coordinates"] = coords_sampled
+
+        nvtx.range_pop()
+        return result
+
+
+def create_domino_dataset(
+    cfg, phase, volume_variable_names, surface_variable_names, vol_factors, surf_factors
+):
+    if phase == "train":
+        input_path = cfg.data.input_dir
+    elif phase == "val":
+        input_path = cfg.data.input_dir_val
+    else:
+        raise ValueError(f"Invalid phase {phase}")
+
+    if cfg.data_processor.use_cache:
+        return CachedDoMINODataset(
+            input_path,
+            phase=phase,
+            sampling=True,
+            volume_points_sample=cfg.model.volume_points_sample,
+            surface_points_sample=cfg.model.surface_points_sample,
+            geom_points_sample=cfg.model.geom_points_sample,
+            model_type=cfg.model.model_type,
+            surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
+        )
+    else:
+        overrides = {}
+        if hasattr(cfg.data, "gpu_preprocessing"):
+            overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing
+
+        if hasattr(cfg.data, "gpu_output"):
+            overrides["gpu_output"] = cfg.data.gpu_output
+
+        return DoMINODataPipe(
+            input_path,
+            phase=phase,
+            grid_resolution=cfg.model.interp_res,
+            volume_variables=volume_variable_names,
+            surface_variables=surface_variable_names,
+            normalize_coordinates=True,
+            sampling=True,
+            sample_in_bbox=True,
+            volume_points_sample=cfg.model.volume_points_sample,
+            surface_points_sample=cfg.model.surface_points_sample,
+            geom_points_sample=cfg.model.geom_points_sample,
+            positional_encoding=cfg.model.positional_encoding,
+            volume_factors=vol_factors,
+            surface_factors=surf_factors,
+            scaling_type=cfg.model.normalization,
+            model_type=cfg.model.model_type,
+            bounding_box_dims=cfg.data.bounding_box,
+            bounding_box_dims_surf=cfg.data.bounding_box_surface,
+            num_surface_neighbors=cfg.model.num_neighbors_surface,
+            resample_surfaces=cfg.model.resampling_surface_mesh.resample,
+            resampling_points=cfg.model.resampling_surface_mesh.points,
+            surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
+            **overrides,
+        )
+
+
+if __name__ == "__main__":
+    fm_data = DoMINODataPipe(
+        data_path="/code/processed_data/new_models_1/",
+        phase="train",
+        sampling=False,
+        sample_in_bbox=False,
+    )
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py b/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py
new file mode 100644
index 0000000000..84eea51ea5
--- /dev/null
+++ b/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py
@@ -0,0 +1,888 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import multiprocessing as mp
+import os
+import pathlib
+import sys
+import time
+from abc import ABC, abstractmethod
+from concurrent.futures import ThreadPoolExecutor
+from multiprocessing import shared_memory
+
+import numpy as np
+import psutil
+import tensorstore as ts
+import torch
+import zarr
+
+from physicsnemo.distributed import ShardTensor, ShardTensorSpec
+
+# from physicsnemo.distributed.utils import compute_split_shapes
+
+# For use on systems where cpu_affinity is not available:
+psutil_process = psutil.Process()
+
+
+class FakeProcess:
+    """
+    Enable a fake cpu affinity setting if it's not available
+    """
+
+    def cpu_affinity(self, cpus: list[int] | None) -> None:
+        pass
+
+
+if not hasattr(psutil_process, "cpu_affinity"):
+    psutil_process = FakeProcess()
+
+# Abstractions:
+# - want to read npy/npz/.zarr/.stl/.vtp files
+# - Need to share next level abstractions
+# - Domain parallel dataloading is supported: output will be ShardTensor instead.
+# - need to be able to configure preprocessing
+# - CPU -> GPU transfer happens here, needs to be isolated in it's own stream
+# - Output of dataloader should be torch.Tensor objects.
+
+
+"""
+This datapipe handles reading files from Zarr and piping into torch.Tensor objects.
+
+It's expected that the files are organized as groups, with each .zarr
+file representing one training example.  To improve IO performance, the files 
+should be chunked for each array.  The reader takes a list of keys in the 
+group to read, and will not read keys that are not specified.  The exception
+is if _no_ keys are passed, in which case _all_ keys will be read.
+"""
+
+
+class BackendReader(ABC):
+    """
+    Abstract base class for backend readers.
+    """
+
+    def __init__(self, keys_to_read: list[str] | None) -> None:
+        """
+        Initialize the backend reader.
+        """
+        self.keys_to_read = keys_to_read
+
+    @abstractmethod
+    def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        pass
+
+    @abstractmethod
+    def read_file_sharded(
+        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
+    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        pass
+
+
+class NpyFileReader(BackendReader):
+    """
+    Reader for numpy files.
+    """
+
+    def __init__(self, keys_to_read: list[str] | None) -> None:
+        super().__init__(keys_to_read)
+
+    def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        data = np.load(filename, allow_pickle=True).item()
+
+        missing_keys = set(self.keys_to_read) - set(data.keys())
+
+        if len(missing_keys) > 0:
+            raise ValueError(f"Keys {missing_keys} not found in file {filename}")
+
+        data = {key: torch.from_numpy(data[key]) for key in self.keys_to_read}
+
+        return data
+
+    def read_file_sharded(
+        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
+    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        pass
+
+
+class ZarrFileReader(BackendReader):
+    """
+    Reader for zarr files.
+    """
+
+    def __init__(self, keys_to_read: list[str] | None) -> None:
+        super().__init__(keys_to_read)
+
+    def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        group = zarr.open_group(filename, mode="r")
+
+        missing_keys = set(self.keys_to_read) - set(group.keys())
+
+        if len(missing_keys) > 0:
+            raise ValueError(f"Keys {missing_keys} not found in file {filename}")
+
+        # This is a slower basic way to do this, to be improved:
+        data = {key: torch.from_numpy(group[key][:]) for key in self.keys_to_read}
+
+        return data
+
+    def read_file_sharded(
+        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
+    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        pass
+
+
+class TensorStoreZarrReader(BackendReader):
+    """
+    Reader for tensorstore zarr files.
+    """
+
+    def __init__(self, keys_to_read: list[str] | None) -> None:
+        super().__init__(keys_to_read)
+
+        self.spec_template = {
+            "driver": "zarr2",
+            "kvstore": {
+                "driver": "file",
+                "path": None,
+            },
+        }
+
+        self.context = ts.Context(
+            {
+                "cache_pool": {"total_bytes_limit": 10000000},
+                "data_copy_concurrency": {"limit": 32},
+            }
+        )
+
+    def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        read_futures = {}
+        for key in self.keys_to_read:
+            spec = self.spec_template.copy()
+            spec["kvstore"]["path"] = str(filename) + "/" + str(key)
+
+            read_futures[key] = ts.open(
+                spec, create=False, open=True, context=self.context
+            )
+
+        results = {
+            key: np.array(read_futures[key].result()) for key in self.keys_to_read
+        }
+
+        data = {
+            key: torch.as_tensor(results[key], dtype=torch.float32)
+            for key in self.keys_to_read
+        }
+
+        return data
+
+    def read_file_sharded(
+        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
+    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        pass
+
+
+class ZarrReadWorker:
+    """
+    This class is a worker for the ZarrReadController.
+    It reads tasks from the task queue and writes to the shared memory buffer.
+    It then sends an acknowledgement to the controller.
+    """
+
+    def __init__(self, task_q: mp.Queue, ack_q: mp.Queue):
+        """
+
+        task_q is the incoming Queue of chunks to read
+        ack_q is the outgoing acknowledgement of reads
+        """
+
+        self.task_q = task_q
+        self.ack_q = ack_q
+
+        self.current_group = None
+        self.current_array = None
+
+        self.zarr_cache = {}
+
+    def run(self):
+        """
+        This function is the main loop for the worker.
+        It reads tasks from the task queue and writes to the shared memory buffer.
+        It then sends an acknowledgement to the controller.
+        """
+
+        while True:
+            # Run until killed
+
+            task = self.task_q.get()
+
+            if task is None:
+                break
+
+            # Task organization:
+            # (
+            #     zarr_path - file name we're reading, a group
+            #     array_name,  - array in that group
+            #     read_idx - a unique integer representing the read we're about to do
+            #     index slice_to_read - the np.slice object representing what in the original file to read
+            #     shared_buffer_name - the unique name of the shared buffer this read will use
+            #     shared_buffer_loc - the slice of the shared buffer to store into
+            # )
+
+            (
+                zarr_path,
+                array_name,
+                read_idx,
+                slice_to_read,
+                shared_buffer_name,
+                shared_buffer_slice,
+            ) = task
+
+            if zarr_path not in self.zarr_cache:
+                self.zarr_cache[zarr_path] = zarr.open_group(zarr_path)
+
+            z = self.zarr_cache[zarr_path]
+
+            arr = z[array_name]
+
+            # Get the shared memory instance:
+            shm = shared_memory.SharedMemory(name=shared_buffer_name)
+            buf = np.ndarray(arr.shape, arr.dtype, buffer=shm.buf)
+
+            # Perform the local read (and implicit decompress):
+            buf[shared_buffer_slice] = arr[slice_to_read]
+
+            shm.close()
+
+            # Send completion signal:
+            self.ack_q.put(
+                (
+                    "done",
+                    read_idx,
+                )
+            )
+
+
+def spawn_worker(task_q: mp.Queue, ack_q: mp.Queue):
+    worker = ZarrReadWorker(task_q, ack_q)
+    worker.run()
+
+
+class ZarrReadController(BackendReader):
+    """
+    This class maintains a persistent pool of processes to enable shared
+    memory reading of zarr groups.  Users can control how many processes
+    to use, and which pool of CPUs they reside on.
+
+    By default reading is done by passing chunks to each worker to read.
+    Reads are round-robin across children processes.  Master process will
+    not return until all children reads have acknowledged.
+    """
+
+    def __init__(
+        self,
+        keys_to_read: list[str] | None,
+        num_read_processes: int | None = None,
+    ):
+        super().__init__(keys_to_read)
+
+        self.available_cpus = psutil.Process().cpu_affinity()
+        if num_read_processes is None:
+            # Use all but one CPU, unless there is only one...
+            num_read_processes = max(1, len(self.available_cpus) - 1)
+
+        self.num_read_processes = num_read_processes
+        print(f"num_read_processes: {num_read_processes}")
+        # If the target_cpus aren't set, we use some default settings:
+
+        # Initialize Queues:
+        self.task_q = mp.Queue()
+        self.ack_q = mp.Queue()
+
+        self.children = []
+
+        self.memory_buffers = {}
+
+        self.spawn_children()
+
+    def spawn_children(
+        self,
+    ):
+        if mp.get_start_method() != "fork" and not hasattr(sys, "frozen"):
+            # Prevent accidental spawn in child imports
+            if not hasattr(self, "_spawn_guard"):
+                self._spawn_guard = True
+            else:
+                return
+
+        # Create processes, using psutil to set affinity at spawn time.
+
+        stride = len(self.available_cpus) // self.num_read_processes
+        cpus_by_proc = [
+            self.available_cpus[i * stride : (i + 1) * stride]
+            for i in range(self.num_read_processes)
+        ]
+
+        # split the available cpus into num_read_processes chunks
+
+        for i, cpus in enumerate(cpus_by_proc):
+            psutil_process = psutil.Process()
+            psutil_process.cpu_affinity(cpus)
+            proc = mp.Process(target=spawn_worker, args=(self.task_q, self.ack_q))
+            psutil_process.cpu_affinity(self.available_cpus)
+            self.children.append(proc)
+
+        for worker in self.children:
+            worker.start()
+
+    def free_shared_memory(self, zarr_file):
+        # Free all the shared memory buffers that were opened for the specified file
+        if zarr_file in self.memory_buffers:
+            for buffer in self.memory_buffers[zarr_file]:
+                buffer.close()
+                buffer.unlink()
+
+        self.memory_buffers.pop(zarr_file)
+
+    def read_file(self, zarr_file: str):
+        print(f"zarr_file: {zarr_file}")
+        file_id = os.path.basename(zarr_file)
+
+        # Open the file:
+        z = zarr.open_group(zarr_file)
+
+        output_arrays = {}
+
+        if zarr_file in self.memory_buffers:
+            self.free_shared_memory(zarr_file)
+
+        self.memory_buffers[zarr_file] = []
+
+        required_idx = []
+
+        for key in self.keys_to_read:
+            # Get the metadata for this key:
+            arr = z[key]
+
+            # Allocate the entire buffer:
+            buffer_size = np.prod(arr.shape) * np.dtype(arr.dtype).itemsize
+
+            shm = shared_memory.SharedMemory(
+                create=True,
+                size=buffer_size,
+            )
+            np_buffer = np.ndarray(arr.shape, dtype=arr.dtype, buffer=shm.buf)
+
+            # Make sure we don't unlink it prematurely:
+            self.memory_buffers[zarr_file].append(shm)
+
+            output_arrays[key] = np_buffer
+
+            zarr_chunk_size = arr.chunks[0]
+
+            # Define the read boundaries for slicing:
+            slice_starts = list(range(0, arr.shape[0], zarr_chunk_size))
+            slice_stops = [start + zarr_chunk_size for start in slice_starts]
+
+            # Correct the last stop point:
+            slice_stops[-1] = arr.shape[0]
+
+            # Task organization:
+            # (
+            #     zarr_path - file name we're reading, a group
+            #     array_name,  - array in that group
+            #     read_idx - a unique integer representing the read we're about to do
+            #     index slice_to_read - the np.slice object representing what in the original file to read
+            #     shared_buffer_name - the unique name of the shared buffer this read will use
+            #     shared_buffer_loc - the slice of the shared buffer to store into
+            # )
+
+            for i, (slice_start, slice_stop) in enumerate(
+                zip(slice_starts, slice_stops)
+            ):
+                cpu_slice = np.s_[slice_start:slice_stop]
+                zarr_slice = np.s_[slice_start:slice_stop]
+
+                length = slice_stop - slice_start
+
+                read_idx = f"{file_id}_{key}_{i}_{length}"
+                required_idx.append(read_idx)
+
+                task_args = (
+                    zarr_file,
+                    key,
+                    read_idx,
+                    zarr_slice,
+                    shm.name,
+                    cpu_slice,
+                )
+                self.task_q.put(task_args)
+
+        # Now, let's check for completeness before returning:
+        completed = False
+        while not completed:
+            status, idx = self.ack_q.get()
+            if status == "done":
+                if idx not in required_idx:
+                    # Put it back in the queue, it's for another file:
+                    self.ack_q.put((status, idx))
+                else:
+                    required_idx.remove(idx)
+            completed = len(required_idx) == 0
+
+        return {key: torch.as_tensor(output_arrays[key]) for key in self.keys_to_read}
+
+    def read_file_sharded(
+        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
+    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        pass
+
+    def __del__(self):
+        """
+        Make sure we're not leaving things open we shouldn't be
+        """
+
+        # Stop all the workers by sending None on the queue:
+        for child in self.children:
+            self.task_q.put(None)
+
+        for child in self.children:
+            child.join()
+
+        open_files = list(self.memory_buffers.keys())
+        for zf in open_files:
+            self.free_shared_memory(zf)
+
+
+class DrivaerMLDataset:
+    """
+    Dataset reader for DrivaerML and similar datasets.  In general, this
+    dataset supports reading dictionary-like data, and returning a
+    dictionary of torch.Tensor objects.
+
+    When constructed, the user must pass a directory of data examples.
+    The dataset will inspect the folder, identify all children, and decide:
+    - If every file is a directory ending in .zarr, the zarr reader is used.
+    - If every file is .npy, the .npy reader is used.
+    - If every file is .npz, the .npz reader is used.
+    - If every file is a directory without an extension, it's assumed to be .stl/.vtp/.vtu
+
+    The user can optionally force one path with a parameter.
+
+    The flow of this dataset is:
+    - Load data from file, using a thread.
+        - Each individual file reading tool may or may not have it's own threading
+          or multi processing enabled.  That's up to it.  This just does async
+          loading.
+        - Data should come out of the readers in dict{str : torch.Tensor} format
+    - The data is transferred from CPU to GPU in a separate stream.
+
+    Users can call __getitem__(i), which will trigger the pipeline,
+    or they can call `preload(i)`, which will start the pipeline for index `i`.
+    Subsequent calls to `__getitem__(i)` should be faster since the IO is in
+    progress or complete.
+
+    Using the `__iter__` functionality will automatically enable preloading.
+
+    """
+
+    def __init__(
+        self,
+        data_dir: str | pathlib.Path,
+        keys_to_read: list[str] | None,
+        output_device: torch.device,
+        preload_depth: int = 2,
+        device_mesh: torch.distributed.DeviceMesh | None = None,
+        placements: dict[str, torch.distributed.tensor.Placement] | None = None,
+    ) -> None:
+        if isinstance(data_dir, str):
+            data_dir = pathlib.Path(data_dir)
+
+        # Verify the data directory exists:
+        if not data_dir.exists():
+            raise FileNotFoundError(f"Data directory {data_dir} does not exist")
+
+        # Verify the data directory is a directory:
+        if not data_dir.is_dir():
+            raise NotADirectoryError(f"Data directory {data_dir} is not a directory")
+
+        self._file_type, self._filenames = self._infer_file_type_and_filenames(data_dir)
+
+        # Initialize the file reader object
+        # Note that for some of these, they could be functions
+        # But others benefit from having a state, so we use classes:
+        if self._file_type == "npy":
+            self.file_reader = NpyFileReader(keys_to_read)
+        elif self._file_type == "zarr":
+            # self.file_reader = ZarrFileReader(keys_to_read)
+            # self.file_reader = ZarrReadController(keys_to_read)
+            self.file_reader = TensorStoreZarrReader(keys_to_read)
+        else:
+            raise ValueError(f"Unsupported file type: {self._file_type}")
+
+        self._keys_to_read = keys_to_read
+
+        # Check the file names; some can be read well in parallel, while others
+        # are not parallelizable.
+
+        self._length = len(self._filenames)
+
+        self.output_device = output_device
+        if output_device.type == "cuda":
+            self._data_loader_stream = torch.cuda.Stream()
+        else:
+            self._data_loader_stream = None
+
+        self.device_mesh = device_mesh
+        self.placements = placements
+
+        # This is thread storage for data preloading:
+        self._preload_queue = {}
+        self._transfer_events = {}
+        self.preload_depth = preload_depth
+        self.preload_executor = ThreadPoolExecutor(max_workers=preload_depth)
+
+    def _infer_file_type_and_filenames(
+        self, data_dir: pathlib.Path
+    ) -> tuple[str, list[str]]:
+        """
+        Infer the file type and filenames from the data directory.
+        """
+
+        # We validated the directory exists and is a directory already.
+
+        # List the files:
+        files = list(data_dir.iterdir())
+
+        if all(file.suffix == ".npy" for file in files):
+            return "npy", files
+        elif all(file.suffix == ".zarr" and file.is_dir() for file in files):
+            return "zarr", files
+        else:
+            # TODO - support folders of stl, vtp, vtu.
+            raise ValueError(f"Unsupported file type: {files}")
+
+    def _move_to_gpu(
+        self, data: dict[str, torch.Tensor], idx: int
+    ) -> dict[str, torch.Tensor]:
+        """Convert numpy arrays to torch tensors and move to GPU if available.
+
+        Args:
+            data: Dictionary of key to torch tensor.
+
+        Returns:
+            Dictionary of key to torch tensor on GPU if available.
+        """
+
+        if self.output_device.type != "cuda":
+            return data
+
+        result = {}
+
+        with torch.cuda.stream(self._data_loader_stream):
+            for key in data.keys():
+                # Move to GPU if available
+                result[key] = data[key].to(self.output_device, non_blocking=True)
+
+            self._transfer_events[idx] = torch.cuda.Event()
+            self._transfer_events[idx].record(self._data_loader_stream)
+
+        return result
+
+    def _convert_to_shard_tensors(
+        self, tensors: dict[str, torch.Tensor]
+    ) -> dict[str, ShardTensor]:
+        """Convert tensors to ShardTensor objects for distributed training.
+
+        Args:
+            tensors: Dictionary of key to torch tensor.
+
+        Returns:
+            Dictionary of key to torch tensor or ShardTensor.
+        """
+
+        if self.device_mesh is None:
+            return tensors
+
+        raise NotImplementedError("Converting to ShardTensor here not implemented yet.")
+
+        # result = {}
+
+        # for key, tensor in tensors.items():
+        #     # Create a ShardTensor with whatever layout the data is actually in:
+        #     st = ShardTensor.__new__(
+        #         ShardTensor,
+        #         local_tensor=tensor,
+        #         spec=self.tensor_specs[key],
+        #         requires_grad=False,  # By default, the data pipe output doesn't need a grad.
+        #     )
+
+        #     # Find out the desired placement:
+        #     if tensor.numel() > 1:
+        #         if isinstance(self.placements, dict):
+        #             target_placement = self.placements[key]
+        #         else:
+        #             target_placement = self.placements
+        #     else:
+        #         target_placement = (Replicate(),)
+
+        #     # Redistribute if necessary:
+        #     # (Recall that this is one dimensional mesh only)
+        #     if st._spec.placements[0] != target_placement[0]:
+        #         st = st.redistribute(placements=target_placement)
+
+        #     result[key] = st
+
+        # return result
+
+    def preload(self, idx: int) -> None:
+        """
+        Asynchronously preload the data for the given index (up to CPU, not GPU).
+        Only one preload operation is supported at a time.
+
+        Args:
+            idx: Index of the sample to preload.
+        """
+        if idx in self._preload_queue:
+            # Skip items that are already in the queue
+            return
+
+        def _preload_worker():
+            try:
+                data = self._read_file(self._filenames[idx])
+                # Convert to torch tensors
+                return self._move_to_gpu(data, idx)
+            except Exception as e:
+                print(f"Exception in preload: {e}")
+                raise e
+
+        self._preload_queue[idx] = self.preload_executor.submit(_preload_worker)
+
+    def get_preloaded(self, idx: int) -> dict[str, torch.Tensor] | None:
+        """
+        Retrieve the preloaded data (blocking if not ready).
+
+        Returns:
+            (idx, data) tuple where data is a dictionary of key to numpy array or torch tensor.
+
+        Raises:
+            RuntimeError: If no preload is in progress.
+            Exception: If preload failed.
+        """
+
+        if idx not in self._preload_queue:
+            return None
+
+        result = self._preload_queue[
+            idx
+        ].result()  # This will block until the result is ready
+        self._preload_queue.pop(idx)  # Clear the future after getting the result
+
+        return result
+
+    def __iter__(self):
+        self.i = 0
+        return self
+
+    def __next__(self):
+        if self.i >= len(self._filenames):
+            self.i = 0
+            raise StopIteration
+
+        if self.preload_depth > 0 and self.i + 1 < len(self._filenames):
+            self.preload(self.i + 1)
+        if self.preload_depth > 1 and self.i + 2 < len(self._filenames):
+            self.preload(self.i + 2)
+
+        data = self.__getitem__(self.i)
+
+        self.i += 1
+
+        return data
+
+    def __len__(self):
+        return len(self._filenames)
+
+    def _read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        return self.file_reader.read_file(filename)
+
+    def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]:
+        """
+        Get a data sample.
+
+        Flow is:
+        - Read data, or get preloaded data if this idx is preloaded.
+        - Move data to GPU, if needed.
+            - Preloading data will move to GPU if it can.
+        - If domain parallelism is enabled, convert to ShardTensors.
+        - Return
+
+        Args:
+            idx: Index of the sample to retrieve
+
+        Returns:
+            Dictionary containing tensors/ShardTensors for the requested data
+        """
+
+        if idx >= len(self._filenames):
+            raise IndexError(
+                f"Index {idx} out of range for dataset of size {len(self._filenames)}"
+            )
+
+        # Attempt to get preloaded data:
+        data = self.get_preloaded(idx)
+        if data is None:
+            # Read data from zarr file
+            data = self._read_file(self._filenames[idx])
+            data = self._move_to_gpu(data, idx)
+
+        # This blocks until the preprocessing has transferred to GPU
+        if idx in self._transfer_events:
+            torch.cuda.current_stream().wait_event(self._transfer_events[idx])
+            self._transfer_events.pop(idx)
+
+        # Convert to ShardTensors if using domain parallelism
+        if self.device_mesh is not None:
+            data = self._convert_to_shard_tensors(data)
+
+        return data
+
+
+def compute_mean_std_min_max(
+    dataset: DrivaerMLDataset, field_keys: list[str], max_samples: int = 20
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Compute the mean, standard deviation, minimum, and maximum for a specified field
+    across all samples in a dataset.
+
+    Uses a numerically stable online algorithm for mean and variance.
+
+    Args:
+        dataset (DrivaerMLDataset): The dataset to process.
+        field_key (str): The key for the field to normalize.
+
+    Returns:
+        tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            mean, std, min, max tensors for the field.
+    """
+    N = {}
+    mean = {}
+    M2 = {}  # Sum of squares of differences from the current mean
+    min_val = {}
+    max_val = {}
+
+    # Read the first data item to get the shapes:
+    example_data = dataset[0]
+
+    # Create placeholders for the accumulators:
+    for key in field_keys:
+        N[key] = torch.zeros(1, dtype=torch.int64, device=example_data[key].device)
+        mean[key] = torch.zeros(
+            example_data[key].shape[-1],
+            device=example_data[key].device,
+            dtype=torch.float64,
+        )
+        M2[key] = torch.zeros(
+            example_data[key].shape[-1],
+            device=example_data[key].device,
+            dtype=torch.float64,
+        )
+        min_val[key] = torch.full(
+            (example_data[key].shape[-1],),
+            float("inf"),
+            device=example_data[key].device,
+        )
+        max_val[key] = torch.full(
+            (example_data[key].shape[-1],),
+            float("-inf"),
+            device=example_data[key].device,
+        )
+
+    global_start = time.perf_counter()
+    start = time.perf_counter()
+    for i, data in enumerate(dataset):
+        if i >= max_samples:
+            break
+
+        for field_key in field_keys:
+            field_data = data[field_key]
+
+            # Compute batch statistics
+            batch_mean = field_data.mean(axis=(0))
+            batch_M2 = ((field_data - batch_mean) ** 2).sum(axis=(0))
+            batch_n = field_data.shape[0]
+
+            # Update min/max
+            batch_min = field_data.amin(dim=(0))
+            batch_max = field_data.amax(dim=(0))
+            min_val[field_key] = torch.minimum(min_val[field_key], batch_min)
+            max_val[field_key] = torch.maximum(max_val[field_key], batch_max)
+
+            # Update running mean and M2 (Welford's algorithm)
+            delta = batch_mean - mean[field_key]
+            N[field_key] += batch_n  # batch_n should also be torch.int64
+            mean[field_key] = mean[field_key] + delta * (batch_n / N[field_key])
+            M2[field_key] = (
+                M2[field_key]
+                + batch_M2
+                + delta**2 * (batch_n * N[field_key]) / N[field_key]
+            )
+
+        end = time.perf_counter()
+        iteration_time = end - start
+        print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds")
+        start = time.perf_counter()
+
+    global_end = time.perf_counter()
+    global_time = global_end - global_start
+
+    print(f"Total time: {global_time:.2f} seconds for {max_samples} samples")
+
+    var = {}
+    std = {}
+    for field_key in field_keys:
+        var[field_key] = M2[field_key] / (
+            N[field_key].item() - 1
+        )  # Convert N to Python int for division
+        std[field_key] = torch.sqrt(var[field_key])
+
+    return mean, std, min_val, max_val
diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index 15437dca9e..01ee143253 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -485,10 +485,8 @@ def shuffle_array(
     points_per_chunk = [
         round(n_points * c.shape[0] / N_input_points) for c in chunk_weights
     ]
-    print(f"points_per_chunk: {points_per_chunk}")
 
     gap = n_points - sum(points_per_chunk)
-    print(f"gap: {gap}")
 
     if gap > 0:
         for g in range(gap):
@@ -703,8 +701,8 @@ def combine_dict(old_dict: dict[Any, Any], new_dict: dict[Any, Any]) -> dict[Any
 
 
 def create_grid(
-    max_coords: ArrayType, min_coords: ArrayType, resolution: ArrayType
-) -> ArrayType:
+    max_coords: torch.Tensor, min_coords: torch.Tensor, resolution: torch.Tensor
+) -> torch.Tensor:
     """Create a 3D regular grid from coordinate bounds and resolution.
 
     This function generates a regular 3D grid spanning from min_coords to
@@ -721,36 +719,37 @@ def create_grid(
         grid point. The last dimension contains [x, y, z] coordinates.
 
     Examples:
-        >>> import numpy as np
-        >>> min_bounds = np.array([0.0, 0.0, 0.0])
-        >>> max_bounds = np.array([1.0, 1.0, 1.0])
-        >>> grid_res = np.array([2, 2, 2])
+        >>> import torch
+        >>> min_bounds = torch.tensor([0.0, 0.0, 0.0])
+        >>> max_bounds = torch.tensor([1.0, 1.0, 1.0])
+        >>> grid_res = torch.tensor([2, 2, 2])
         >>> grid = create_grid(max_bounds, min_bounds, grid_res)
         >>> grid.shape
         (2, 2, 2, 3)
-        >>> np.allclose(grid[0, 0, 0], [0.0, 0.0, 0.0])
+        >>> torch.allclose(grid[0, 0, 0], torch.tensor([0.0, 0.0, 0.0]))
         True
-        >>> np.allclose(grid[1, 1, 1], [1.0, 1.0, 1.0])
+        >>> torch.allclose(grid[1, 1, 1], torch.tensor([1.0, 1.0, 1.0]))
         True
     """
-    xp = array_type(max_coords)
+    # Linspace to make evenly spaced steps along each axis:
+    dd = [
+        torch.linspace(
+            min_coords[i],
+            max_coords[i],
+            resolution[i],
+            dtype=max_coords.dtype,
+            device=max_coords.device,
+        )
+        for i in range(3)
+    ]
 
-    dx = xp.linspace(
-        min_coords[0], max_coords[0], resolution[0], dtype=max_coords.dtype
-    )
-    dy = xp.linspace(
-        min_coords[1], max_coords[1], resolution[1], dtype=max_coords.dtype
-    )
-    dz = xp.linspace(
-        min_coords[2], max_coords[2], resolution[2], dtype=max_coords.dtype
-    )
+    # Combine them with meshgrid:
+    xv, yv, zv = torch.meshgrid(*dd)
 
-    xv, yv, zv = xp.meshgrid(dx, dy, dz)
-    xv = xp.expand_dims(xv, -1)
-    yv = xp.expand_dims(yv, -1)
-    zv = xp.expand_dims(zv, -1)
-    grid = xp.concatenate((xv, yv, zv), axis=-1)
-    grid = xp.transpose(grid, (1, 0, 2, 3))
+    xv = xv.unsqueeze(-1)
+    yv = yv.unsqueeze(-1)
+    zv = zv.unsqueeze(-1)
+    grid = torch.concatenate((xv, yv, zv), axis=-1)
     return grid
 
 
From caf02908ad65370a1f65e2a8ffcdf86d33a378c7 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 27 Aug 2025 14:17:51 +0000
Subject: [PATCH 05/98] Adding a torch-centric domino datapipe and a separated,
 data-agnostic data set for IO.

This is reaching IO throughputs of about 5GB/s on ORD, so getting better.
---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 397 ++++++++--------
 .../datapipes/cae/drivaer_ml_datapipe.py      | 430 ++++--------------
 physicsnemo/utils/sdf.py                      |  95 ++--
 3 files changed, 332 insertions(+), 590 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 310493e3cb..86e2f88539 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -232,8 +232,10 @@ def __init__(
         if self.config.gpu_preprocessing or self.config.gpu_output:
             # Make sure we move data to the right device:
             target_device = dist.device
+            self.preprocess_stream = torch.cuda.Stream()
         else:
             target_device = torch.device("cpu")
+            self.preprocess_stream = None
 
         self.device = target_device
 
@@ -319,23 +321,28 @@ def __init__(
             data_dir=self.config.data_path,
             keys_to_read=self.keys_to_read,
             output_device=self.device,
+            consumer_stream=self.preprocess_stream,
         )
 
         # This is thread storage for data preprocessing:
         self._preprocess_queue = {}
         self._preprocess_events = {}
         self.preprocess_depth = 2
-        self.preprocess_executor = ThreadPoolExecutor(max_workers=2)
+        self.preprocess_executor = ThreadPoolExecutor(max_workers=1)
 
     def set_indices(self, indices: list[int]):
         """
         Set the indices for the dataset for this epoch.
         """
+
+        # TODO - this needs to block while anything is in the preprocess queue.
+
         self.indices = indices
 
     def __len__(self):
         return len(self.dataset)
 
+    @torch.compile(dynamic=True)
     def compute_stl_scaling(
         self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None
     ):
@@ -382,6 +389,7 @@ def process_combined(
             surf_grid,
             use_sign_winding_number=True,
         )
+        sdf_surf_grid = surf_grid
 
         if self.config.sampling:
             geometry_points = self.config.geom_points_sample
@@ -398,7 +406,7 @@ def process_combined(
 
         return (sdf_surf_grid, geom_centers)
 
-    @profile
+    @torch.compile(dynamic=True)
     def process_surface(
         self,
         s_min: torch.Tensor,
@@ -467,19 +475,9 @@ def process_surface(
 
             if self.config.surface_sampling_algorithm == "area_weighted":
                 weights = surface_sizes
-                # (
-                #     surface_coordinates_sampled,
-                #     idx_surface,
-                # ) = area_weighted_shuffle_array(
-                #     surface_coordinates,
-                #     self.config.surface_points_sample,
-                #     surface_sizes,
-                # )
+
             else:
                 weights = None
-                # surface_coordinates_sampled, idx_surface = shuffle_array(
-                #     surface_coordinates, self.config.surface_points_sample
-                # )
 
             surface_coordinates_sampled, idx_surface = shuffle_array(
                 surface_coordinates,
@@ -498,20 +496,6 @@ def process_surface(
             surface_fields = surface_fields[idx_surface]
             pos_normals_com_surface = pos_normals_com_surface[idx_surface]
 
-            # Perform a kNN on the full set of points vs. sampled points
-            # to select the neighbors:
-            # if self.config.num_surface_neighbors > 1:
-            #     if self.array_provider == cp:
-            #         knn = cuml.neighbors.NearestNeighbors(
-            #             n_neighbors=self.config.num_surface_neighbors,
-            #             algorithm="rbc",
-            #         )
-            #         knn.fit(surface_coordinates)
-            #     else:
-            #         # Under the hood this is instantiating a KDTree.
-            #         # aka here knn is a type, not a class, technically.
-            #         interp_func = KDTree(surface_coordinates)
-
             # Now, perform the kNN on the sampled points:
             if self.config.num_surface_neighbors > 1:
                 neighbor_indices, neighbor_distances = knn(
@@ -585,7 +569,7 @@ def process_surface(
 
         return return_dict
 
-    @profile
+    @torch.compile(dynamic=True)
     def process_volume(
         self,
         s_min: torch.Tensor,
@@ -663,11 +647,7 @@ def process_volume(
                     mode="constant",
                     value=-10.0,
                 )
-                # volume_coordinates_sampled = pad(
-                #     volume_coordinates_sampled,
-                #     self.config.volume_points_sample,
-                #     pad_value=-10.0,
-                # )
+
             volume_fields = volume_fields[idx_volume]
             volume_coordinates = volume_coordinates_sampled
 
@@ -726,76 +706,94 @@ def process_volume(
         return return_dict
 
     @profile
-    def process_data(self, data_dict):
-        # Start building the preprocessed return dict:
-        return_dict = {
-            "global_params_values": data_dict["global_params_values"],
-            "global_params_reference": data_dict["global_params_reference"],
-        }
+    def process_data(self, data_dict, idx: int):
+        for key in self.keys_to_read_if_available.keys():
+            if key not in data_dict:
+                data_dict[key] = self.keys_to_read_if_available[key]
 
-        # This function gets information about the surface scale,
-        # and decides what the surface grid will be:
-        (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = (
-            self.compute_stl_scaling(
-                data_dict["stl_coordinates"], self.config.bounding_box_dims_surf
-            )
-        )
+        with torch.cuda.stream(self.preprocess_stream):
+            if self.config.deterministic:
+                torch.manual_seed(idx)
 
-        # This is a center of mass computation for the stl surface,
-        # using the size of each mesh point as weight.
+            # Start building the preprocessed return dict:
+            return_dict = {
+                "global_params_values": data_dict["global_params_values"],
+                "global_params_reference": data_dict["global_params_reference"],
+            }
 
-        center_of_mass = calculate_center_of_mass(
-            data_dict["stl_centers"], data_dict["stl_areas"]
-        )
+            # This function gets information about the surface scale,
+            # and decides what the surface grid will be:
+            (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = (
+                self.compute_stl_scaling(
+                    data_dict["stl_coordinates"], self.config.bounding_box_dims_surf
+                )
+            )
 
-        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
-        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
+            # This is a center of mass computation for the stl surface,
+            # using the size of each mesh point as weight.
 
-        return_dict.update(
-            {
-                "length_scale": length_scale,
-                "surf_grid_max_min": surf_grid_max_min,
-            }
-        )
+            center_of_mass = calculate_center_of_mass(
+                data_dict["stl_centers"], data_dict["stl_areas"]
+            )
 
-        # This will compute the sdf on the surface grid and apply downsampling if needed
-        sdf_surf_grid, geom_centers = self.preprocess_combined(
-            s_min,
-            s_max,
-            surf_grid,
-            stl_vertices=data_dict["stl_coordinates"],
-            mesh_indices_flattened=mesh_indices_flattened,
-        )
-        return_dict["sdf_surf_grid"] = sdf_surf_grid
-        return_dict["geometry_coordinates"] = geom_centers
+            # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
+            mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
 
-        # Up to here works all in torch!
+            return_dict.update(
+                {
+                    "length_scale": length_scale,
+                    "surf_grid_max_min": surf_grid_max_min,
+                }
+            )
 
-        if self.model_type == "volume" or self.model_type == "combined":
-            volume_dict = self.preprocess_volume(
+            # This will compute the sdf on the surface grid and apply downsampling if needed
+            sdf_surf_grid, geom_centers = self.process_combined(
                 s_min,
                 s_max,
-                volume_coordinates=data_dict["volume_mesh_centers"],
-                volume_fields=data_dict["volume_fields"],
+                surf_grid,
                 stl_vertices=data_dict["stl_coordinates"],
                 mesh_indices_flattened=mesh_indices_flattened,
-                center_of_mass=center_of_mass,
             )
+            return_dict["sdf_surf_grid"] = sdf_surf_grid
+            return_dict["geometry_coordinates"] = geom_centers
+
+            # Up to here works all in torch!
+
+            if self.model_type == "volume" or self.model_type == "combined":
+                volume_dict = self.process_volume(
+                    s_min,
+                    s_max,
+                    volume_coordinates=data_dict["volume_mesh_centers"],
+                    volume_fields=data_dict["volume_fields"],
+                    stl_vertices=data_dict["stl_coordinates"],
+                    mesh_indices_flattened=mesh_indices_flattened,
+                    center_of_mass=center_of_mass,
+                )
 
-            return_dict.update(volume_dict)
+                return_dict.update(volume_dict)
+
+            if self.model_type == "surface" or self.model_type == "combined":
+                surface_dict = self.process_surface(
+                    s_min,
+                    s_max,
+                    center_of_mass,
+                    surf_grid,
+                    surface_coordinates=data_dict["surface_mesh_centers"],
+                    surface_normals=data_dict["surface_normals"],
+                    surface_sizes=data_dict["surface_areas"],
+                    surface_fields=data_dict["surface_fields"],
+                )
+                return_dict.update(surface_dict)
 
-        if self.model_type == "surface" or self.model_type == "combined":
-            surface_dict = self.preprocess_surface(
-                s_min,
-                s_max,
-                center_of_mass,
-                surf_grid,
-                surface_coordinates=data_dict["surface_mesh_centers"],
-                surface_normals=data_dict["surface_normals"],
-                surface_sizes=data_dict["surface_areas"],
-                surface_fields=data_dict["surface_fields"],
-            )
-            return_dict.update(surface_dict)
+            if self.device.type == "cuda":
+                self._preprocess_events[idx] = torch.cuda.Event()
+                self._preprocess_events[idx].record(self.preprocess_stream)
+
+            # Mark all cuda tensors to be consumed on the main stream:
+            if self.device.type == "cuda":
+                for key in return_dict.keys():
+                    if isinstance(return_dict[key], torch.Tensor):
+                        return_dict[key].record_stream(torch.cuda.default_stream())
 
         return return_dict
 
@@ -808,138 +806,111 @@ def __getitem__(self, idx):
         are relatively large due to the mesh size.
         """
 
-        if self.config.deterministic:
-            torch.manual_seed(idx)
+        index = self.idx_to_index(idx)
+
+        # Get the preprocessed data:
+        data_dict = self.get_preprocessed(idx)
+        if data_dict is None:
+            # If no preprocessing was done for this index, process it now
+
+            # Get the data from the dataset.
+            # Under the hood, this may be fetching preloaded data.
+            data_dict = self.dataset[index]
+            data_dict = self.process_data(data_dict, idx)
+
+        # This blocks the main stream until the preprocessing has transferred to GPU
+        if idx in self._preprocess_events:
+            torch.cuda.current_stream().wait_event(self._preprocess_events[idx])
+            self._preprocess_events.pop(idx)
+
+        return data_dict
 
+    def idx_to_index(self, idx):
         if hasattr(self, "indices"):
-            index = self.indices[idx]
-        else:
-            index = idx
+            return self.indices[idx]
 
-        data_dict = self.dataset[index]
+        return idx
 
-        for key in self.keys_to_read_if_available.keys():
-            if key not in data_dict:
-                data_dict[key] = self.keys_to_read_if_available[key]
+    def preprocess(self, idx: int) -> None:
+        """
+        Start preprocessing for the given index (1 step ahead).
+        This processes preloaded data or loads it if not available.
+        """
+        if idx in self._preprocess_queue:
+            # Skip items that are already being preprocessed
+            return
+
+        def _preprocess_worker():
+            index = self.idx_to_index(idx)
+            # Try to get preloaded data first
+            data_dict = self.dataset[index]
+            # Process the data
+            return self.process_data(data_dict, idx)
+
+        # Submit preprocessing task to thread pool
+        self._preprocess_queue[idx] = self.preprocess_executor.submit(
+            _preprocess_worker
+        )
 
-        return_dict = self.process_data(data_dict)
+    def get_preprocessed(self, idx: int) -> dict | None:
+        """
+        Retrieve preprocessed data (blocking if not ready).
+        Returns None if no preprocessing is in progress for this index.
+        """
+        if idx not in self._preprocess_queue:
+            return None
 
-        return return_dict
+        result = self._preprocess_queue[idx].result()  # Block until ready
+        self._preprocess_queue.pop(idx)  # Clear after getting result
+
+        return result
+
+    def __next__(self):
+        # To iterate through the data efficiently, he have to implement the
+        # following, assuming a steady state
+
+        # - start the dataset loading at idx + 2
+        # - start the preprocessing pipe at idx + 1
+        #   - the preprocessing pipe has to implicitly wait for idx +1 in the dataset
+        # - wait for the preprocessing pipe at idx to finish
+        # return the data.
+        if self.i >= len(self.dataset):
+            self.i = 0
+            raise StopIteration
+
+        current_idx = self.i
+
+        # Start loading two ahead:
+        if len(self.dataset) >= current_idx + 2:
+            self.dataset.preload(self.idx_to_index(current_idx + 2))
+
+        # Start preprocessing one ahead:
+        if len(self.dataset) >= current_idx + 1:
+            self.preprocess(current_idx + 1)
+
+        # If no preprocessing was done for this index, process it now
+        data = self.__getitem__(current_idx)
+
+        self.i += 1
+        return data
+
+    def __iter__(self):
+        # When starting the iterator method, start loading the data
+        # at idx = 0, idx = 1
+        # Start preprocessing at idx = 0, when the load completes
+
+        self.i = 0
+
+        # Trigger the dataset to start loading index 0:
+        if len(self.dataset) >= 1:
+            self.dataset.preload(self.idx_to_index(self.i))
+        if len(self.dataset) >= 2:
+            self.dataset.preload(self.idx_to_index(self.i + 1))
+
+        # Start preprocessing index 0
+        self.preprocess(self.i)
 
-    # def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]:
-    #     """
-    #     Get a data sample.
-
-    #     Flow is:
-    #     - Read data, or get preloaded data if this idx is preloaded.
-    #     - Move data to GPU, if needed.
-    #         - Preloading data will move to GPU if it can.
-    #     - If domain parallelism is enabled, convert to ShardTensors.
-    #     - Return
-
-    #     Args:
-    #         idx: Index of the sample to retrieve
-
-    #     Returns:
-    #         Dictionary containing tensors/ShardTensors for the requested data
-    #     """
-
-    #     if idx >= len(self._filenames):
-    #         raise IndexError(
-    #             f"Index {idx} out of range for dataset of size {len(self._filenames)}"
-    #         )
-
-    #     # Attempt to get preloaded data:
-    #     data = self.get_preloaded(idx)
-    #     if data is None:
-    #         # Read data from zarr file
-    #         data = self._read_file(self._filenames[idx])
-    #         data = self._move_to_gpu(data, idx)
-
-    #     # This blocks until the preprocessing has transferred to GPU
-    #     if idx in self._transfer_events:
-    #         torch.cuda.current_stream().wait_event(self._transfer_events[idx])
-    #         self._transfer_events.pop(idx)
-
-    #     # Convert to ShardTensors if using domain parallelism
-    #     if self.device_mesh is not None:
-    #         data = self._convert_to_shard_tensors(data)
-
-    #     return data
-
-    # def __iter__(self):
-    #     self.i = 0
-    #     return self
-
-    # def __next__(self):
-    #     """
-    #     When used in an iterator context, this datapipe will
-    #     leverage preloading and preprocessing to speed up the data
-    #     loading latency.
-
-    #     Each time "next" is called, the datapipe will ask the data
-    #     set to preload the data 2 steps ahead.  It will then ask
-    #     for the data from one step ahead, and start it processing.
-
-    #     Finally, it will return the data from this requested index
-    #     """
-    #     if self.i >= len(self._filenames):
-    #         self.i = 0
-    #         raise StopIteration
-
-    #     if self.preload_depth > 0 and self.i + 1 < len(self._filenames):
-    #         self.preload(this_index)
-    #     if self.preload_depth > 1 and self.i + 2 < len(self._filenames):
-    #         self.preload(this_index)
-
-    #     data = self.__getitem__(this_index)
-
-    #     self.i += 1
-
-    #     return data
-
-    # def preprocess(self, idx: int) -> None:
-    #     """
-    #     Asynchronously preload the data for the given index (up to CPU, not GPU).
-    #     Only one preload operation is supported at a time.
-
-    #     Args:
-    #         idx: Index of the sample to preload.
-    #     """
-    #     if idx in self._preload_queue:
-    #         # Skip items that are already in the queue
-    #         return
-
-    #     def _preload_worker():
-    #         try:
-    #             data = self._read_file(self._filenames[idx])
-    #             # Convert to torch tensors
-    #             return self._move_to_gpu(data, idx)
-    #         except Exception as e:
-    #             print(f"Exception in preload: {e}")
-    #             raise e
-
-    #     self._preload_queue[idx] = self.preload_executor.submit(_preload_worker)
-
-    # def get_preloaded(self, idx: int) -> dict[str, torch.Tensor] | None:
-    #     """
-    #     Retrieve the preloaded data (blocking if not ready).
-
-    #     Returns:
-    #         (idx, data) tuple where data is a dictionary of key to numpy array or torch tensor.
-
-    #     Raises:
-    #         RuntimeError: If no preload is in progress.
-    #         Exception: If preload failed.
-    #     """
-
-    #     if idx not in self._preload_queue:
-    #         return None
-
-    #     result = self._preload_queue[idx].result()  # This will block until the result is ready
-    #     self._preload_queue.pop(idx) # Clear the future after getting the result
-
-    #     return result
+        return self
 
 
 @profile
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py b/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py
index 84eea51ea5..67e137bf13 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py
@@ -14,21 +14,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import multiprocessing as mp
-import os
 import pathlib
-import sys
 import time
 from abc import ABC, abstractmethod
 from concurrent.futures import ThreadPoolExecutor
-from multiprocessing import shared_memory
 
 import numpy as np
 import psutil
-import tensorstore as ts
 import torch
 import zarr
 
+try:
+    import tensorstore as ts
+
+    TENSORSTORE_AVAILABLE = True
+except ImportError:
+    TENSORSTORE_AVAILABLE = False
+
 from physicsnemo.distributed import ShardTensor, ShardTensorSpec
 
 # from physicsnemo.distributed.utils import compute_split_shapes
@@ -159,335 +161,70 @@ def read_file_sharded(
         pass
 
 
-class TensorStoreZarrReader(BackendReader):
-    """
-    Reader for tensorstore zarr files.
-    """
-
-    def __init__(self, keys_to_read: list[str] | None) -> None:
-        super().__init__(keys_to_read)
-
-        self.spec_template = {
-            "driver": "zarr2",
-            "kvstore": {
-                "driver": "file",
-                "path": None,
-            },
-        }
-
-        self.context = ts.Context(
-            {
-                "cache_pool": {"total_bytes_limit": 10000000},
-                "data_copy_concurrency": {"limit": 32},
-            }
-        )
-
-    def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
-        """
-        Read a file and return a dictionary of tensors.
-        """
-        read_futures = {}
-        for key in self.keys_to_read:
-            spec = self.spec_template.copy()
-            spec["kvstore"]["path"] = str(filename) + "/" + str(key)
-
-            read_futures[key] = ts.open(
-                spec, create=False, open=True, context=self.context
-            )
-
-        results = {
-            key: np.array(read_futures[key].result()) for key in self.keys_to_read
-        }
-
-        data = {
-            key: torch.as_tensor(results[key], dtype=torch.float32)
-            for key in self.keys_to_read
-        }
-
-        return data
-
-    def read_file_sharded(
-        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
-    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
-        """
-        Read a file and return a dictionary of tensors.
-        """
-        pass
-
-
-class ZarrReadWorker:
-    """
-    This class is a worker for the ZarrReadController.
-    It reads tasks from the task queue and writes to the shared memory buffer.
-    It then sends an acknowledgement to the controller.
-    """
+if TENSORSTORE_AVAILABLE:
 
-    def __init__(self, task_q: mp.Queue, ack_q: mp.Queue):
+    class TensorStoreZarrReader(BackendReader):
         """
-
-        task_q is the incoming Queue of chunks to read
-        ack_q is the outgoing acknowledgement of reads
+        Reader for tensorstore zarr files.
         """
 
-        self.task_q = task_q
-        self.ack_q = ack_q
-
-        self.current_group = None
-        self.current_array = None
-
-        self.zarr_cache = {}
-
-    def run(self):
-        """
-        This function is the main loop for the worker.
-        It reads tasks from the task queue and writes to the shared memory buffer.
-        It then sends an acknowledgement to the controller.
-        """
-
-        while True:
-            # Run until killed
-
-            task = self.task_q.get()
-
-            if task is None:
-                break
-
-            # Task organization:
-            # (
-            #     zarr_path - file name we're reading, a group
-            #     array_name,  - array in that group
-            #     read_idx - a unique integer representing the read we're about to do
-            #     index slice_to_read - the np.slice object representing what in the original file to read
-            #     shared_buffer_name - the unique name of the shared buffer this read will use
-            #     shared_buffer_loc - the slice of the shared buffer to store into
-            # )
-
-            (
-                zarr_path,
-                array_name,
-                read_idx,
-                slice_to_read,
-                shared_buffer_name,
-                shared_buffer_slice,
-            ) = task
+        def __init__(self, keys_to_read: list[str] | None) -> None:
+            super().__init__(keys_to_read)
 
-            if zarr_path not in self.zarr_cache:
-                self.zarr_cache[zarr_path] = zarr.open_group(zarr_path)
-
-            z = self.zarr_cache[zarr_path]
-
-            arr = z[array_name]
-
-            # Get the shared memory instance:
-            shm = shared_memory.SharedMemory(name=shared_buffer_name)
-            buf = np.ndarray(arr.shape, arr.dtype, buffer=shm.buf)
-
-            # Perform the local read (and implicit decompress):
-            buf[shared_buffer_slice] = arr[slice_to_read]
-
-            shm.close()
+            self.spec_template = {
+                "driver": "zarr2",
+                "kvstore": {
+                    "driver": "file",
+                    "path": None,
+                },
+            }
 
-            # Send completion signal:
-            self.ack_q.put(
-                (
-                    "done",
-                    read_idx,
-                )
+            self.context = ts.Context(
+                {
+                    "cache_pool": {"total_bytes_limit": 30_000_000},
+                    "data_copy_concurrency": {"limit": 60},
+                }
             )
 
+        def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
+            """
+            Read a file and return a dictionary of tensors.
+            """
+            read_futures = {}
+            for key in self.keys_to_read:
+                spec = self.spec_template.copy()
+                spec["kvstore"]["path"] = str(filename) + "/" + str(key)
+
+                read_futures[key] = ts.open(
+                    spec, create=False, open=True, context=self.context
+                )
 
-def spawn_worker(task_q: mp.Queue, ack_q: mp.Queue):
-    worker = ZarrReadWorker(task_q, ack_q)
-    worker.run()
-
-
-class ZarrReadController(BackendReader):
-    """
-    This class maintains a persistent pool of processes to enable shared
-    memory reading of zarr groups.  Users can control how many processes
-    to use, and which pool of CPUs they reside on.
-
-    By default reading is done by passing chunks to each worker to read.
-    Reads are round-robin across children processes.  Master process will
-    not return until all children reads have acknowledged.
-    """
-
-    def __init__(
-        self,
-        keys_to_read: list[str] | None,
-        num_read_processes: int | None = None,
-    ):
-        super().__init__(keys_to_read)
-
-        self.available_cpus = psutil.Process().cpu_affinity()
-        if num_read_processes is None:
-            # Use all but one CPU, unless there is only one...
-            num_read_processes = max(1, len(self.available_cpus) - 1)
-
-        self.num_read_processes = num_read_processes
-        print(f"num_read_processes: {num_read_processes}")
-        # If the target_cpus aren't set, we use some default settings:
-
-        # Initialize Queues:
-        self.task_q = mp.Queue()
-        self.ack_q = mp.Queue()
-
-        self.children = []
-
-        self.memory_buffers = {}
-
-        self.spawn_children()
-
-    def spawn_children(
-        self,
-    ):
-        if mp.get_start_method() != "fork" and not hasattr(sys, "frozen"):
-            # Prevent accidental spawn in child imports
-            if not hasattr(self, "_spawn_guard"):
-                self._spawn_guard = True
-            else:
-                return
-
-        # Create processes, using psutil to set affinity at spawn time.
-
-        stride = len(self.available_cpus) // self.num_read_processes
-        cpus_by_proc = [
-            self.available_cpus[i * stride : (i + 1) * stride]
-            for i in range(self.num_read_processes)
-        ]
-
-        # split the available cpus into num_read_processes chunks
-
-        for i, cpus in enumerate(cpus_by_proc):
-            psutil_process = psutil.Process()
-            psutil_process.cpu_affinity(cpus)
-            proc = mp.Process(target=spawn_worker, args=(self.task_q, self.ack_q))
-            psutil_process.cpu_affinity(self.available_cpus)
-            self.children.append(proc)
-
-        for worker in self.children:
-            worker.start()
-
-    def free_shared_memory(self, zarr_file):
-        # Free all the shared memory buffers that were opened for the specified file
-        if zarr_file in self.memory_buffers:
-            for buffer in self.memory_buffers[zarr_file]:
-                buffer.close()
-                buffer.unlink()
-
-        self.memory_buffers.pop(zarr_file)
-
-    def read_file(self, zarr_file: str):
-        print(f"zarr_file: {zarr_file}")
-        file_id = os.path.basename(zarr_file)
-
-        # Open the file:
-        z = zarr.open_group(zarr_file)
-
-        output_arrays = {}
-
-        if zarr_file in self.memory_buffers:
-            self.free_shared_memory(zarr_file)
-
-        self.memory_buffers[zarr_file] = []
-
-        required_idx = []
-
-        for key in self.keys_to_read:
-            # Get the metadata for this key:
-            arr = z[key]
+            results = {
+                key: np.array(read_futures[key].result()) for key in self.keys_to_read
+            }
 
-            # Allocate the entire buffer:
-            buffer_size = np.prod(arr.shape) * np.dtype(arr.dtype).itemsize
+            data = {
+                key: torch.as_tensor(results[key], dtype=torch.float32)
+                for key in self.keys_to_read
+            }
 
-            shm = shared_memory.SharedMemory(
-                create=True,
-                size=buffer_size,
-            )
-            np_buffer = np.ndarray(arr.shape, dtype=arr.dtype, buffer=shm.buf)
-
-            # Make sure we don't unlink it prematurely:
-            self.memory_buffers[zarr_file].append(shm)
-
-            output_arrays[key] = np_buffer
-
-            zarr_chunk_size = arr.chunks[0]
-
-            # Define the read boundaries for slicing:
-            slice_starts = list(range(0, arr.shape[0], zarr_chunk_size))
-            slice_stops = [start + zarr_chunk_size for start in slice_starts]
-
-            # Correct the last stop point:
-            slice_stops[-1] = arr.shape[0]
-
-            # Task organization:
-            # (
-            #     zarr_path - file name we're reading, a group
-            #     array_name,  - array in that group
-            #     read_idx - a unique integer representing the read we're about to do
-            #     index slice_to_read - the np.slice object representing what in the original file to read
-            #     shared_buffer_name - the unique name of the shared buffer this read will use
-            #     shared_buffer_loc - the slice of the shared buffer to store into
-            # )
-
-            for i, (slice_start, slice_stop) in enumerate(
-                zip(slice_starts, slice_stops)
-            ):
-                cpu_slice = np.s_[slice_start:slice_stop]
-                zarr_slice = np.s_[slice_start:slice_stop]
-
-                length = slice_stop - slice_start
-
-                read_idx = f"{file_id}_{key}_{i}_{length}"
-                required_idx.append(read_idx)
-
-                task_args = (
-                    zarr_file,
-                    key,
-                    read_idx,
-                    zarr_slice,
-                    shm.name,
-                    cpu_slice,
-                )
-                self.task_q.put(task_args)
-
-        # Now, let's check for completeness before returning:
-        completed = False
-        while not completed:
-            status, idx = self.ack_q.get()
-            if status == "done":
-                if idx not in required_idx:
-                    # Put it back in the queue, it's for another file:
-                    self.ack_q.put((status, idx))
-                else:
-                    required_idx.remove(idx)
-            completed = len(required_idx) == 0
-
-        return {key: torch.as_tensor(output_arrays[key]) for key in self.keys_to_read}
+            return data
 
-    def read_file_sharded(
-        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
-    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
-        """
-        Read a file and return a dictionary of tensors.
-        """
-        pass
+        def read_file_sharded(
+            self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
+        ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+            """
+            Read a file and return a dictionary of tensors.
+            """
+            pass
+else:
 
-    def __del__(self):
+    class TensorStoreZarrReader(BackendReader):
         """
-        Make sure we're not leaving things open we shouldn't be
+        Null reader for tensorstore zarr files.
         """
 
-        # Stop all the workers by sending None on the queue:
-        for child in self.children:
-            self.task_q.put(None)
-
-        for child in self.children:
-            child.join()
-
-        open_files = list(self.memory_buffers.keys())
-        for zf in open_files:
-            self.free_shared_memory(zf)
+        pass
 
 
 class DrivaerMLDataset:
@@ -530,6 +267,7 @@ def __init__(
         preload_depth: int = 2,
         device_mesh: torch.distributed.DeviceMesh | None = None,
         placements: dict[str, torch.distributed.tensor.Placement] | None = None,
+        consumer_stream: torch.cuda.Stream | None = None,
     ) -> None:
         if isinstance(data_dir, str):
             data_dir = pathlib.Path(data_dir)
@@ -542,21 +280,10 @@ def __init__(
         if not data_dir.is_dir():
             raise NotADirectoryError(f"Data directory {data_dir} is not a directory")
 
-        self._file_type, self._filenames = self._infer_file_type_and_filenames(data_dir)
-
-        # Initialize the file reader object
-        # Note that for some of these, they could be functions
-        # But others benefit from having a state, so we use classes:
-        if self._file_type == "npy":
-            self.file_reader = NpyFileReader(keys_to_read)
-        elif self._file_type == "zarr":
-            # self.file_reader = ZarrFileReader(keys_to_read)
-            # self.file_reader = ZarrReadController(keys_to_read)
-            self.file_reader = TensorStoreZarrReader(keys_to_read)
-        else:
-            raise ValueError(f"Unsupported file type: {self._file_type}")
-
         self._keys_to_read = keys_to_read
+        self.file_reader, self._filenames = self._infer_file_type_and_filenames(
+            data_dir
+        )
 
         # Check the file names; some can be read well in parallel, while others
         # are not parallelizable.
@@ -578,6 +305,11 @@ def __init__(
         self.preload_depth = preload_depth
         self.preload_executor = ThreadPoolExecutor(max_workers=preload_depth)
 
+        if consumer_stream is None and self.output_device.type == "cuda":
+            consumer_stream = torch.cuda.current_stream()
+
+        self.consumer_stream = consumer_stream
+
     def _infer_file_type_and_filenames(
         self, data_dir: pathlib.Path
     ) -> tuple[str, list[str]]:
@@ -590,10 +322,19 @@ def _infer_file_type_and_filenames(
         # List the files:
         files = list(data_dir.iterdir())
 
+        # Initialize the file reader object
+        # Note that for some of these, they could be functions
+        # But others benefit from having a state, so we use classes:
+
         if all(file.suffix == ".npy" for file in files):
-            return "npy", files
+            file_reader = NpyFileReader(self._keys_to_read)
+            return file_reader, files
         elif all(file.suffix == ".zarr" and file.is_dir() for file in files):
-            return "zarr", files
+            if TENSORSTORE_AVAILABLE:
+                file_reader = TensorStoreZarrReader(self._keys_to_read)
+            else:
+                file_reader = ZarrFileReader(self._keys_to_read)
+            return file_reader, files
         else:
             # TODO - support folders of stl, vtp, vtu.
             raise ValueError(f"Unsupported file type: {files}")
@@ -613,15 +354,18 @@ def _move_to_gpu(
         if self.output_device.type != "cuda":
             return data
 
+        # result = StreamDict()
         result = {}
 
         with torch.cuda.stream(self._data_loader_stream):
             for key in data.keys():
                 # Move to GPU if available
                 result[key] = data[key].to(self.output_device, non_blocking=True)
-
-            self._transfer_events[idx] = torch.cuda.Event()
-            self._transfer_events[idx].record(self._data_loader_stream)
+                result[key].record_stream(self.consumer_stream)
+            # Mark the consumer stream:
+            transfer_event = torch.cuda.Event()
+            transfer_event.record(self._data_loader_stream)
+            # result.set_event("transfer", transfer_event)
 
         return result
 
@@ -684,13 +428,9 @@ def preload(self, idx: int) -> None:
             return
 
         def _preload_worker():
-            try:
-                data = self._read_file(self._filenames[idx])
-                # Convert to torch tensors
-                return self._move_to_gpu(data, idx)
-            except Exception as e:
-                print(f"Exception in preload: {e}")
-                raise e
+            data = self._read_file(self._filenames[idx])
+            # Convert to torch tensors
+            return self._move_to_gpu(data, idx)
 
         self._preload_queue[idx] = self.preload_executor.submit(_preload_worker)
 
@@ -777,7 +517,7 @@ def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]:
 
         # This blocks until the preprocessing has transferred to GPU
         if idx in self._transfer_events:
-            torch.cuda.current_stream().wait_event(self._transfer_events[idx])
+            self.consumer_stream.wait_event(self._transfer_events[idx])
             self._transfer_events.pop(idx)
 
         # Convert to ShardTensors if using domain parallelism
diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py
index 446b7b5d54..495b0839be 100644
--- a/physicsnemo/utils/sdf.py
+++ b/physicsnemo/utils/sdf.py
@@ -67,13 +67,14 @@ def _bvh_query_distance(
     sdf_hit_point[tid] = p_closest
 
 
+@torch.library.custom_op("physicsnemo::signed_distance_field", mutates_args=())
 def signed_distance_field(
     mesh_vertices: torch.Tensor,
     mesh_indices: torch.Tensor,
     input_points: torch.Tensor,
     max_dist: float = 1e8,
     use_sign_winding_number: bool = False,
-):
+) -> tuple[torch.Tensor, torch.Tensor]:
     """
     Computes the signed distance field (SDF) for a given mesh and input points.
 
@@ -131,23 +132,6 @@ def signed_distance_field(
     sdf = torch.zeros(N, dtype=torch.float32, device=input_points.device)
     sdf_hit_point = torch.zeros(N, 3, dtype=torch.float32, device=input_points.device)
 
-    wp.init()
-
-    # zero copy the vertices, indices, and input points to warp:
-    wp_vertices = wp.from_torch(mesh_vertices, dtype=wp.vec3)
-    wp_indices = wp.from_torch(mesh_indices, dtype=wp.int32)
-    wp_input_points = wp.from_torch(input_points, dtype=wp.vec3)
-
-    # Convert output points:
-    wp_sdf = wp.from_torch(sdf, dtype=wp.float32)
-    wp_sdf_hit_point = wp.from_torch(sdf_hit_point, dtype=wp.vec3f)
-
-    mesh = wp.Mesh(
-        points=wp_vertices,
-        indices=wp_indices,
-        support_winding_number=use_sign_winding_number,
-    )
-
     if input_points.device.type == "cuda":
         wp_launch_stream = wp.stream_from_torch(
             torch.cuda.current_stream(input_points.device)
@@ -157,23 +141,70 @@ def signed_distance_field(
         wp_launch_stream = None
         wp_launch_device = "cpu"  # CPUs have no streams
 
-    wp.launch(
-        kernel=_bvh_query_distance,
-        dim=N,
-        inputs=[
-            mesh.id,
-            wp_input_points,
-            max_dist,
-            wp_sdf,
-            wp_sdf_hit_point,
-            use_sign_winding_number,
-        ],
-        device=wp_launch_device,
-        stream=wp_launch_stream,
-    )
+    with wp.ScopedStream(wp_launch_stream):
+        wp.init()
+
+        # zero copy the vertices, indices, and input points to warp:
+        wp_vertices = wp.from_torch(mesh_vertices, dtype=wp.vec3)
+        wp_indices = wp.from_torch(mesh_indices, dtype=wp.int32)
+        wp_input_points = wp.from_torch(input_points, dtype=wp.vec3)
+
+        # Convert output points:
+        wp_sdf = wp.from_torch(sdf, dtype=wp.float32)
+        wp_sdf_hit_point = wp.from_torch(sdf_hit_point, dtype=wp.vec3f)
+
+        mesh = wp.Mesh(
+            points=wp_vertices,
+            indices=wp_indices,
+            support_winding_number=use_sign_winding_number,
+        )
+
+        wp.launch(
+            kernel=_bvh_query_distance,
+            dim=N,
+            inputs=[
+                mesh.id,
+                wp_input_points,
+                max_dist,
+                wp_sdf,
+                wp_sdf_hit_point,
+                use_sign_winding_number,
+            ],
+            device=wp_launch_device,
+            stream=wp_launch_stream,
+        )
 
     # Unflatten the output to be like the input:
     sdf = sdf.reshape(input_shape[:-1] + (1,))
     sdf_hit_point = sdf_hit_point.reshape(input_shape)
 
     return sdf, sdf_hit_point
+
+
+@signed_distance_field.register_fake
+def _(
+    mesh_vertices: torch.Tensor,
+    mesh_indices: torch.Tensor,
+    input_points: torch.Tensor,
+    max_dist: float = 1e8,
+    use_sign_winding_number: bool = False,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    if mesh_vertices.device != input_points.device:
+        raise RuntimeError("mesh_vertices and input_points must be on the same device")
+
+    if mesh_vertices.device != mesh_indices.device:
+        raise RuntimeError("mesh_vertices and mesh_indices must be on the same device")
+
+    if mesh_vertices.shape[0] != mesh_indices.shape[0]:
+        raise RuntimeError(
+            "mesh_vertices and mesh_indices must have the same number of points"
+        )
+
+    N = input_points.shape[0]
+
+    sdf_output = torch.empty(N, 1, device=input_points.device, dtype=input_points.dtype)
+    sdf_hit_point_output = torch.empty(
+        N, 3, device=input_points.device, dtype=input_points.dtype
+    )
+
+    return sdf_output, sdf_hit_point_output

From 7fb5f8eb938e8be8d41363547cc0b5d388b72d9c Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 27 Aug 2025 14:19:16 +0000
Subject: [PATCH 06/98] Rename datapipe file to dataset.

---
 .../cae/{drivaer_ml_datapipe.py => drivaer_ml_dataset.py}         | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename physicsnemo/datapipes/cae/{drivaer_ml_datapipe.py => drivaer_ml_dataset.py} (100%)

diff --git a/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
similarity index 100%
rename from physicsnemo/datapipes/cae/drivaer_ml_datapipe.py
rename to physicsnemo/datapipes/cae/drivaer_ml_dataset.py

From 0fb0ed25e08b3dd1a4fbf6ceacf416a0786f70dc Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 27 Aug 2025 15:22:04 +0000
Subject: [PATCH 07/98] Update SDF function and test.  Auto convert higher
 precisions to match the kernel precision.

The test had some expected numbers that, I believe, were incorrect.
---
 physicsnemo/utils/sdf.py |  8 +++----
 test/utils/test_sdf.py   | 51 +++++++++++++++++++++++++---------------
 2 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py
index 495b0839be..f9216bdd16 100644
--- a/physicsnemo/utils/sdf.py
+++ b/physicsnemo/utils/sdf.py
@@ -145,9 +145,9 @@ def signed_distance_field(
         wp.init()
 
         # zero copy the vertices, indices, and input points to warp:
-        wp_vertices = wp.from_torch(mesh_vertices, dtype=wp.vec3)
-        wp_indices = wp.from_torch(mesh_indices, dtype=wp.int32)
-        wp_input_points = wp.from_torch(input_points, dtype=wp.vec3)
+        wp_vertices = wp.from_torch(mesh_vertices.to(torch.float32), dtype=wp.vec3)
+        wp_indices = wp.from_torch(mesh_indices.to(torch.int32), dtype=wp.int32)
+        wp_input_points = wp.from_torch(input_points.to(torch.float32), dtype=wp.vec3)
 
         # Convert output points:
         wp_sdf = wp.from_torch(sdf, dtype=wp.float32)
@@ -178,7 +178,7 @@ def signed_distance_field(
     sdf = sdf.reshape(input_shape[:-1] + (1,))
     sdf_hit_point = sdf_hit_point.reshape(input_shape)
 
-    return sdf, sdf_hit_point
+    return sdf.to(input_points.dtype), sdf_hit_point.to(input_points.dtype)
 
 
 @signed_distance_field.register_fake
diff --git a/test/utils/test_sdf.py b/test/utils/test_sdf.py
index 107e5e0316..f449469b5a 100644
--- a/test/utils/test_sdf.py
+++ b/test/utils/test_sdf.py
@@ -16,12 +16,13 @@
 # ruff: noqa: E402
 
 
-import numpy as np
+import pytest
+import torch
 from pytest_utils import import_or_fail
 
 
 def tet_verts(flip_x=1):
-    tet = np.array(
+    tet = torch.tensor(
         [
             flip_x * 0,
             0,
@@ -60,35 +61,47 @@ def tet_verts(flip_x=1):
             0,
             1,
         ],
-        dtype=np.float64,
+        dtype=torch.float64,
     )
 
     return tet
 
 
 @import_or_fail("warp")
-def test_sdf(pytestconfig):
+@pytest.mark.parametrize("dtype", [torch.float32, torch.float64])
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_sdf(pytestconfig, dtype, device):
     from physicsnemo.utils.sdf import signed_distance_field
 
-    tet = tet_verts()
+    mesh_vertices = tet_verts().reshape(-1, 3)
 
-    sdf_tet = signed_distance_field(
-        tet,
-        np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
-        np.array([1, 1, 1, 0.1, 0.1, 0.1], dtype=np.float64),
+    if device == "cuda":
+        device = torch.device("cuda")
+    else:
+        device = torch.device("cpu")
+
+    mesh_indices = torch.tensor(
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=torch.int32
     )
-    np.testing.assert_allclose(sdf_tet, [1.15470052, -0.1], atol=1e-7)
+    input_points = torch.tensor([[1, 1, 1], [0.05, 0.1, 0.1]], dtype=torch.float64)
+
+    mesh_vertices = mesh_vertices.to(dtype)
+    input_points = input_points.to(dtype)
 
-    sdf_tet, sdf_hit_point, sdf_hit_point_id = signed_distance_field(
-        tet,
-        np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=np.int32),
-        np.array([1, 1, 1, 0.12, 0.11, 0.1], dtype=np.float64),
-        include_hit_points=True,
-        include_hit_points_id=True,
+    sdf_tet, sdf_hit_point = signed_distance_field(
+        mesh_vertices,
+        mesh_indices,
+        input_points,
+        use_sign_winding_number=False,
     )
-    np.testing.assert_allclose(
+
+    expected_sdf = torch.tensor([[1.1547], [-0.05]], dtype=dtype)
+    assert torch.allclose(sdf_tet, expected_sdf, atol=1e-7)
+
+    assert torch.allclose(
         sdf_hit_point,
-        [[0.33333322, 0.33333334, 0.3333334], [0.12000002, 0.11, 0.0]],
+        torch.tensor(
+            [[0.33333322, 0.33333334, 0.3333334], [0.0, 0.10, 0.10]], dtype=dtype
+        ),
         atol=1e-7,
     )
-    np.testing.assert_allclose(sdf_hit_point_id, [3, 0], atol=1e-7)

From 60c3535ab2f209c7773846cd574005351187d195 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 27 Aug 2025 15:32:20 +0000
Subject: [PATCH 08/98] Add IO benchmark

---
 .../domino/src/benchmark_dataloader.py        | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index 95b39cedd3..b1f5184fc6 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -54,7 +54,7 @@
 from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-from physicsnemo.datapipes.cae.domino_datapipe import (
+from physicsnemo.datapipes.cae.domino_datapipe2 import (
     DoMINODataPipe,
     compute_scaling_factors,
     create_domino_dataset,
@@ -88,13 +88,14 @@ def train_epoch(
     print(f"indices: {indices}")
     # If you tell the dataloader the indices in advance, it will preload
     # and pre-preprocess data
-    dataloader.set_indices(indices)
+    # dataloader.set_indices(indices)
 
     gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
     start_time = time.perf_counter()
     for i_batch, sample_batched in enumerate(dataloader):
         # sampled_batched = dict_to_device(sample_batched, device)
-
+        # if i_batch == 7:
+        # break
         # for key in sampled_batched.keys():
         #     print(f"{key}: {sampled_batched[key].shape}")
 
@@ -232,14 +233,15 @@ def main(cfg: DictConfig) -> None:
         logger.info(f"Device {dist.device}, epoch {epoch}:")
 
         epoch_start_time = time.perf_counter()
-        train_epoch(
-            dataloader=train_dataset,
-            sampler=train_sampler,
-            logger=logger,
-            gpu_handle=gpu_handle,
-            epoch_index=epoch,
-            device=dist.device,
-        )
+        with Profiler():
+            train_epoch(
+                dataloader=train_dataset,
+                sampler=train_sampler,
+                logger=logger,
+                gpu_handle=gpu_handle,
+                epoch_index=epoch,
+                device=dist.device,
+            )
         epoch_end_time = time.perf_counter()
         logger.info(
             f"Device {dist.device}, Epoch {epoch} took {epoch_end_time - epoch_start_time:.3f} seconds"
@@ -247,4 +249,7 @@ def main(cfg: DictConfig) -> None:
 
 
 if __name__ == "__main__":
+    # Profiler().enable("torch")
+    # Profiler().initialize()
     main()
+    # Profiler().finalize()

From 0c668d94a67f0772f2010e7c66d98b92aa69d8c8 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 27 Aug 2025 09:07:46 -0700
Subject: [PATCH 09/98] Minor bug fixes

---
 physicsnemo/datapipes/cae/domino_datapipe2.py   | 5 +++--
 physicsnemo/datapipes/cae/drivaer_ml_dataset.py | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 86e2f88539..9aa5ae40cf 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -38,7 +38,7 @@
 from torch import Tensor
 from torch.utils.data import Dataset
 
-from physicsnemo.datapipes.cae.drivaer_ml_datapipe import (
+from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
     DrivaerMLDataset,
     compute_mean_std_min_max,
 )
@@ -742,7 +742,7 @@ def process_data(self, data_dict, idx: int):
             return_dict.update(
                 {
                     "length_scale": length_scale,
-                    "surf_grid_max_min": surf_grid_max_min,
+                    "surface_min_max": surf_grid_max_min,
                 }
             )
 
@@ -754,6 +754,7 @@ def process_data(self, data_dict, idx: int):
                 stl_vertices=data_dict["stl_coordinates"],
                 mesh_indices_flattened=mesh_indices_flattened,
             )
+            return_dict["surf_grid"] = surf_grid
             return_dict["sdf_surf_grid"] = sdf_surf_grid
             return_dict["geometry_coordinates"] = geom_centers
 
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index 67e137bf13..aac34197ea 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -181,8 +181,8 @@ def __init__(self, keys_to_read: list[str] | None) -> None:
 
             self.context = ts.Context(
                 {
-                    "cache_pool": {"total_bytes_limit": 30_000_000},
-                    "data_copy_concurrency": {"limit": 60},
+                    "cache_pool": {"total_bytes_limit": 10_000_000},
+                    "data_copy_concurrency": {"limit": 72},
                 }
             )
 

From 70e6130b62c272cff5d1654899f44f0a154745f5 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 27 Aug 2025 10:57:12 -0700
Subject: [PATCH 10/98] Few bug fixes

---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 10 ++++++----
 physicsnemo/utils/sdf.py                      |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 9aa5ae40cf..6a0ce133fa 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -300,8 +300,8 @@ def __init__(
         self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"]
 
         self.keys_to_read_if_available = {
-            "global_params_values": torch.tensor([30.0, 1.226], device=self.device),
-            "global_params_reference": torch.tensor([30.0, 1.226], device=self.device),
+            "global_params_values": torch.tensor([[30.0], [1.226]], device=self.device),
+            "global_params_reference": torch.tensor([[30.0], [1.226]], device=self.device),
         }
 
         self.volume_keys = ["volume_mesh_centers", "volume_fields"]
@@ -389,7 +389,6 @@ def process_combined(
             surf_grid,
             use_sign_winding_number=True,
         )
-        sdf_surf_grid = surf_grid
 
         if self.config.sampling:
             geometry_points = self.config.geom_points_sample
@@ -497,12 +496,14 @@ def process_surface(
             pos_normals_com_surface = pos_normals_com_surface[idx_surface]
 
             # Now, perform the kNN on the sampled points:
+            print(self.config.num_surface_neighbors)
             if self.config.num_surface_neighbors > 1:
                 neighbor_indices, neighbor_distances = knn(
                     points=surface_coordinates,
                     queries=surface_coordinates_sampled,
                     k=self.config.num_surface_neighbors,
                 )
+                print(f"datapipe neighbor_indices: {neighbor_indices.shape}")
 
                 # Pull out the neighbor elements.  Note that ii is the index into the original
                 # points - but only exists for the sampled points
@@ -529,7 +530,7 @@ def process_surface(
                 queries=surface_coordinates,
                 k=self.config.num_surface_neighbors,
             )
-
+            print(f"datapipe neighbor_indices: {neighbor_indices.shape}")
             # Construct the neighbors arrays:
             surface_neighbors = surface_coordinates[neighbor_indices][:, 1:]
             surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:]
@@ -755,6 +756,7 @@ def process_data(self, data_dict, idx: int):
                 mesh_indices_flattened=mesh_indices_flattened,
             )
             return_dict["surf_grid"] = surf_grid
+            print(f"datapipe sdf_surf_grid: {sdf_surf_grid.shape}")
             return_dict["sdf_surf_grid"] = sdf_surf_grid
             return_dict["geometry_coordinates"] = geom_centers
 
diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py
index f9216bdd16..08f9c8c4c2 100644
--- a/physicsnemo/utils/sdf.py
+++ b/physicsnemo/utils/sdf.py
@@ -175,7 +175,7 @@ def signed_distance_field(
         )
 
     # Unflatten the output to be like the input:
-    sdf = sdf.reshape(input_shape[:-1] + (1,))
+    sdf = sdf.reshape(input_shape[:-1])
     sdf_hit_point = sdf_hit_point.reshape(input_shape)
 
     return sdf.to(input_points.dtype), sdf_hit_point.to(input_points.dtype)

From 4c26ae128a509c06f3a1d7111cd811fc9be425c6 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 27 Aug 2025 12:27:10 -0700
Subject: [PATCH 11/98] A few more fixes for domino.

---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 20 +++++++++++--------
 physicsnemo/models/domino/model.py            |  2 ++
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 6a0ce133fa..e88c988a0c 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -496,14 +496,12 @@ def process_surface(
             pos_normals_com_surface = pos_normals_com_surface[idx_surface]
 
             # Now, perform the kNN on the sampled points:
-            print(self.config.num_surface_neighbors)
             if self.config.num_surface_neighbors > 1:
                 neighbor_indices, neighbor_distances = knn(
                     points=surface_coordinates,
                     queries=surface_coordinates_sampled,
                     k=self.config.num_surface_neighbors,
                 )
-                print(f"datapipe neighbor_indices: {neighbor_indices.shape}")
 
                 # Pull out the neighbor elements.  Note that ii is the index into the original
                 # points - but only exists for the sampled points
@@ -660,7 +658,8 @@ def process_volume(
             volume_coordinates,
             use_sign_winding_number=True,
         )
-
+        sdf_nodes = sdf_nodes.reshape((-1, 1))
+        
         if self.config.positional_encoding:
             pos_normals_closest_vol = calculate_normal_positional_encoding(
                 volume_coordinates,
@@ -756,7 +755,7 @@ def process_data(self, data_dict, idx: int):
                 mesh_indices_flattened=mesh_indices_flattened,
             )
             return_dict["surf_grid"] = surf_grid
-            print(f"datapipe sdf_surf_grid: {sdf_surf_grid.shape}")
+
             return_dict["sdf_surf_grid"] = sdf_surf_grid
             return_dict["geometry_coordinates"] = geom_centers
 
@@ -788,16 +787,18 @@ def process_data(self, data_dict, idx: int):
                 )
                 return_dict.update(surface_dict)
 
-            if self.device.type == "cuda":
-                self._preprocess_events[idx] = torch.cuda.Event()
-                self._preprocess_events[idx].record(self.preprocess_stream)
-
             # Mark all cuda tensors to be consumed on the main stream:
             if self.device.type == "cuda":
                 for key in return_dict.keys():
                     if isinstance(return_dict[key], torch.Tensor):
                         return_dict[key].record_stream(torch.cuda.default_stream())
 
+
+            if self.device.type == "cuda":
+                self._preprocess_events[idx] = torch.cuda.Event()
+                self._preprocess_events[idx].record(self.preprocess_stream)
+
+
         return return_dict
 
     @profile
@@ -826,6 +827,9 @@ def __getitem__(self, idx):
             torch.cuda.current_stream().wait_event(self._preprocess_events[idx])
             self._preprocess_events.pop(idx)
 
+        # Add a batch dimension to the data_dict
+        data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()}
+
         return data_dict
 
     def idx_to_index(self, idx):
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index c95f971e97..4aad8c4f35 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -157,6 +157,8 @@ def forward(
         batch_size = x.shape[0]
         nx, ny, nz = self.grid_resolution
 
+        print(f"p_grid shape: {p_grid.shape}")
+        print(f"x shape: {x.shape}")
         p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3))
 
         if reverse_mapping:

From 45100efe60badd27c28ed25d3fe4ed53521f5616 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 27 Aug 2025 19:38:24 +0000
Subject: [PATCH 12/98] Fix pre-commit issues

---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index e88c988a0c..a24084ae7b 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -301,7 +301,9 @@ def __init__(
 
         self.keys_to_read_if_available = {
             "global_params_values": torch.tensor([[30.0], [1.226]], device=self.device),
-            "global_params_reference": torch.tensor([[30.0], [1.226]], device=self.device),
+            "global_params_reference": torch.tensor(
+                [[30.0], [1.226]], device=self.device
+            ),
         }
 
         self.volume_keys = ["volume_mesh_centers", "volume_fields"]
@@ -659,7 +661,7 @@ def process_volume(
             use_sign_winding_number=True,
         )
         sdf_nodes = sdf_nodes.reshape((-1, 1))
-        
+
         if self.config.positional_encoding:
             pos_normals_closest_vol = calculate_normal_positional_encoding(
                 volume_coordinates,
@@ -793,12 +795,10 @@ def process_data(self, data_dict, idx: int):
                     if isinstance(return_dict[key], torch.Tensor):
                         return_dict[key].record_stream(torch.cuda.default_stream())
 
-
             if self.device.type == "cuda":
                 self._preprocess_events[idx] = torch.cuda.Event()
                 self._preprocess_events[idx].record(self.preprocess_stream)
 
-
         return return_dict
 
     @profile

From 675c5469321dad4bfd7a36a892c1dac97ed2d562 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 3 Sep 2025 06:27:13 -0700
Subject: [PATCH 13/98] Port domino utils from cupy/numpy to pure torch.

Update domino_datapipe2 (temporary name).
---
 physicsnemo/datapipes/cae/__init__.py         |   2 +-
 physicsnemo/datapipes/cae/domino_datapipe2.py | 164 ++++----
 .../datapipes/cae/drivaer_ml_dataset.py       |  10 +-
 physicsnemo/models/domino/model.py            |  30 +-
 physicsnemo/utils/domino/utils.py             | 377 +++++++-----------
 physicsnemo/utils/neighbors/knn/_cuml_impl.py |  11 +-
 .../neighbors/radius_search/_warp_impl.py     | 167 ++++----
 test/utils/test_domino_utils.py               | 120 +++---
 8 files changed, 394 insertions(+), 487 deletions(-)

diff --git a/physicsnemo/datapipes/cae/__init__.py b/physicsnemo/datapipes/cae/__init__.py
index c0d17ff723..9af8d88db2 100644
--- a/physicsnemo/datapipes/cae/__init__.py
+++ b/physicsnemo/datapipes/cae/__init__.py
@@ -14,5 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .domino_datapipe import DoMINODataPipe
+from .domino_datapipe2 import DoMINODataPipe
 from .mesh_datapipe import MeshDatapipe
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index a24084ae7b..dcc82d49cd 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -45,7 +45,6 @@
 from physicsnemo.distributed import DistributedManager
 from physicsnemo.utils.domino.utils import (
     ArrayType,
-    area_weighted_shuffle_array,
     calculate_center_of_mass,
     calculate_normal_positional_encoding,
     create_grid,
@@ -232,10 +231,8 @@ def __init__(
         if self.config.gpu_preprocessing or self.config.gpu_output:
             # Make sure we move data to the right device:
             target_device = dist.device
-            self.preprocess_stream = torch.cuda.Stream()
         else:
             target_device = torch.device("cpu")
-            self.preprocess_stream = None
 
         self.device = target_device
 
@@ -323,7 +320,7 @@ def __init__(
             data_dir=self.config.data_path,
             keys_to_read=self.keys_to_read,
             output_device=self.device,
-            consumer_stream=self.preprocess_stream,
+            consumer_stream=torch.cuda.default_stream(),
         )
 
         # This is thread storage for data preprocessing:
@@ -344,7 +341,6 @@ def set_indices(self, indices: list[int]):
     def __len__(self):
         return len(self.dataset)
 
-    @torch.compile(dynamic=True)
     def compute_stl_scaling(
         self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None
     ):
@@ -407,7 +403,6 @@ def process_combined(
 
         return (sdf_surf_grid, geom_centers)
 
-    @torch.compile(dynamic=True)
     def process_surface(
         self,
         s_min: torch.Tensor,
@@ -530,7 +525,7 @@ def process_surface(
                 queries=surface_coordinates,
                 k=self.config.num_surface_neighbors,
             )
-            print(f"datapipe neighbor_indices: {neighbor_indices.shape}")
+
             # Construct the neighbors arrays:
             surface_neighbors = surface_coordinates[neighbor_indices][:, 1:]
             surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:]
@@ -570,7 +565,6 @@ def process_surface(
 
         return return_dict
 
-    @torch.compile(dynamic=True)
     def process_volume(
         self,
         s_min: torch.Tensor,
@@ -707,101 +701,89 @@ def process_volume(
 
         return return_dict
 
-    @profile
+    @torch.no_grad()
     def process_data(self, data_dict, idx: int):
         for key in self.keys_to_read_if_available.keys():
             if key not in data_dict:
                 data_dict[key] = self.keys_to_read_if_available[key]
 
-        with torch.cuda.stream(self.preprocess_stream):
-            if self.config.deterministic:
-                torch.manual_seed(idx)
+        if self.config.deterministic:
+            torch.manual_seed(idx)
 
-            # Start building the preprocessed return dict:
-            return_dict = {
-                "global_params_values": data_dict["global_params_values"],
-                "global_params_reference": data_dict["global_params_reference"],
-            }
+        # Start building the preprocessed return dict:
+        return_dict = {
+            "global_params_values": data_dict["global_params_values"],
+            "global_params_reference": data_dict["global_params_reference"],
+        }
 
-            # This function gets information about the surface scale,
-            # and decides what the surface grid will be:
-            (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = (
-                self.compute_stl_scaling(
-                    data_dict["stl_coordinates"], self.config.bounding_box_dims_surf
-                )
+        # This function gets information about the surface scale,
+        # and decides what the surface grid will be:
+        (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = (
+            self.compute_stl_scaling(
+                data_dict["stl_coordinates"], self.config.bounding_box_dims_surf
             )
+        )
 
-            # This is a center of mass computation for the stl surface,
-            # using the size of each mesh point as weight.
+        # This is a center of mass computation for the stl surface,
+        # using the size of each mesh point as weight.
 
-            center_of_mass = calculate_center_of_mass(
-                data_dict["stl_centers"], data_dict["stl_areas"]
-            )
+        center_of_mass = calculate_center_of_mass(
+            data_dict["stl_centers"], data_dict["stl_areas"]
+        )
 
-            # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
-            mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
+        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
+        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
 
-            return_dict.update(
-                {
-                    "length_scale": length_scale,
-                    "surface_min_max": surf_grid_max_min,
-                }
-            )
+        return_dict.update(
+            {
+                "length_scale": length_scale,
+                "surface_min_max": surf_grid_max_min,
+            }
+        )
+
+        # This will compute the sdf on the surface grid and apply downsampling if needed
+        sdf_surf_grid, geom_centers = self.process_combined(
+            s_min,
+            s_max,
+            surf_grid,
+            stl_vertices=data_dict["stl_coordinates"],
+            mesh_indices_flattened=mesh_indices_flattened,
+        )
+        return_dict["surf_grid"] = surf_grid
+
+        return_dict["sdf_surf_grid"] = sdf_surf_grid
+        return_dict["geometry_coordinates"] = geom_centers
 
-            # This will compute the sdf on the surface grid and apply downsampling if needed
-            sdf_surf_grid, geom_centers = self.process_combined(
+        # Up to here works all in torch!
+
+        if self.model_type == "volume" or self.model_type == "combined":
+            volume_dict = self.process_volume(
                 s_min,
                 s_max,
-                surf_grid,
+                volume_coordinates=data_dict["volume_mesh_centers"],
+                volume_fields=data_dict["volume_fields"],
                 stl_vertices=data_dict["stl_coordinates"],
                 mesh_indices_flattened=mesh_indices_flattened,
+                center_of_mass=center_of_mass,
             )
-            return_dict["surf_grid"] = surf_grid
-
-            return_dict["sdf_surf_grid"] = sdf_surf_grid
-            return_dict["geometry_coordinates"] = geom_centers
-
-            # Up to here works all in torch!
-
-            if self.model_type == "volume" or self.model_type == "combined":
-                volume_dict = self.process_volume(
-                    s_min,
-                    s_max,
-                    volume_coordinates=data_dict["volume_mesh_centers"],
-                    volume_fields=data_dict["volume_fields"],
-                    stl_vertices=data_dict["stl_coordinates"],
-                    mesh_indices_flattened=mesh_indices_flattened,
-                    center_of_mass=center_of_mass,
-                )
 
-                return_dict.update(volume_dict)
-
-            if self.model_type == "surface" or self.model_type == "combined":
-                surface_dict = self.process_surface(
-                    s_min,
-                    s_max,
-                    center_of_mass,
-                    surf_grid,
-                    surface_coordinates=data_dict["surface_mesh_centers"],
-                    surface_normals=data_dict["surface_normals"],
-                    surface_sizes=data_dict["surface_areas"],
-                    surface_fields=data_dict["surface_fields"],
-                )
-                return_dict.update(surface_dict)
-
-            # Mark all cuda tensors to be consumed on the main stream:
-            if self.device.type == "cuda":
-                for key in return_dict.keys():
-                    if isinstance(return_dict[key], torch.Tensor):
-                        return_dict[key].record_stream(torch.cuda.default_stream())
+            return_dict.update(volume_dict)
 
-            if self.device.type == "cuda":
-                self._preprocess_events[idx] = torch.cuda.Event()
-                self._preprocess_events[idx].record(self.preprocess_stream)
+        if self.model_type == "surface" or self.model_type == "combined":
+            surface_dict = self.process_surface(
+                s_min,
+                s_max,
+                center_of_mass,
+                surf_grid,
+                surface_coordinates=data_dict["surface_mesh_centers"],
+                surface_normals=data_dict["surface_normals"],
+                surface_sizes=data_dict["surface_areas"],
+                surface_fields=data_dict["surface_fields"],
+            )
+            return_dict.update(surface_dict)
 
         return return_dict
 
-    @profile
     def __getitem__(self, idx):
         """
         Function for fetching and processing a single file's data.
@@ -822,11 +804,6 @@ def __getitem__(self, idx):
             data_dict = self.dataset[index]
             data_dict = self.process_data(data_dict, idx)
 
-        # This blocks the main stream until the preprocessing has transferred to GPU
-        if idx in self._preprocess_events:
-            torch.cuda.current_stream().wait_event(self._preprocess_events[idx])
-            self._preprocess_events.pop(idx)
-
         # Add a batch dimension to the data_dict
         data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()}
 
@@ -889,12 +866,9 @@ def __next__(self):
 
         # Start loading two ahead:
         if len(self.dataset) >= current_idx + 2:
+            self.dataset.preload(self.idx_to_index(current_idx + 1))
             self.dataset.preload(self.idx_to_index(current_idx + 2))
 
-        # Start preprocessing one ahead:
-        if len(self.dataset) >= current_idx + 1:
-            self.preprocess(current_idx + 1)
-
         # If no preprocessing was done for this index, process it now
         data = self.__getitem__(current_idx)
 
@@ -914,13 +888,9 @@ def __iter__(self):
         if len(self.dataset) >= 2:
             self.dataset.preload(self.idx_to_index(self.i + 1))
 
-        # Start preprocessing index 0
-        self.preprocess(self.i)
-
         return self
 
 
-@profile
 def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None:
     # Create a dataset for just the field keys:
 
@@ -1038,10 +1008,10 @@ def __getitem__(self, idx):
         # Sample surface points if present
         if "surface_mesh_centers" in result and self.surface_points:
             if self.surface_sampling_algorithm == "area_weighted":
-                coords_sampled, idx_surface = area_weighted_shuffle_array(
-                    result["surface_mesh_centers"],
-                    self.surface_points,
-                    result["surface_areas"],
+                coords_sampled, idx_surface = shuffle_array(
+                    points=result["surface_mesh_centers"],
+                    n_points=self.surface_points,
+                    weights=result["surface_areas"],
                 )
             else:
                 coords_sampled, idx_surface = shuffle_array(
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index aac34197ea..78f9407ebd 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -292,6 +292,7 @@ def __init__(
 
         self.output_device = output_device
         if output_device.type == "cuda":
+            # self._data_loader_stream = torch.cuda.default_stream()
             self._data_loader_stream = torch.cuda.Stream()
         else:
             self._data_loader_stream = None
@@ -362,10 +363,11 @@ def _move_to_gpu(
                 # Move to GPU if available
                 result[key] = data[key].to(self.output_device, non_blocking=True)
                 result[key].record_stream(self.consumer_stream)
-            # Mark the consumer stream:
-            transfer_event = torch.cuda.Event()
-            transfer_event.record(self._data_loader_stream)
-            # result.set_event("transfer", transfer_event)
+
+        # Mark the consumer stream:
+        transfer_event = torch.cuda.Event()
+        transfer_event.record(self._data_loader_stream)
+        self._transfer_events[idx] = transfer_event
 
         return result
 
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 4aad8c4f35..ff0a5482c8 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -155,11 +155,8 @@ def forward(
                 - outputs: Tensor containing coordinates of the neighboring points
         """
         batch_size = x.shape[0]
-        nx, ny, nz = self.grid_resolution
 
-        print(f"p_grid shape: {p_grid.shape}")
-        print(f"x shape: {x.shape}")
-        p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3))
+        p_grid = p_grid.reshape(batch_size, -1, 3)
 
         if reverse_mapping:
             mapping, outputs = radius_search(
@@ -594,15 +591,23 @@ def forward(
         if self.geo_encoding_type == "both" or self.geo_encoding_type == "stl":
             # Calculate multi-scale geoemtry dependency
             x_encoding = []
+
             for j in range(len(self.radii)):
-                mapping, k_short = self.bq_warp[j](x, p_grid)
-                x_encoding_inter = self.geo_conv_out[j](k_short, p_grid)
-                # Propagate information in the geometry enclosed BBox
-                for _ in range(self.hops):
-                    dx = self.geo_processors[j](x_encoding_inter) / self.hops
-                    x_encoding_inter = x_encoding_inter + dx
-                x_encoding_inter = self.geo_processor_out[j](x_encoding_inter)
-                x_encoding.append(x_encoding_inter)
+                with torch.autograd.profiler.record_function(f"bq_warp_{j}"):
+                    mapping, k_short = self.bq_warp[j](x, p_grid)
+                    x_encoding_inter = self.geo_conv_out[j](k_short, p_grid)
+                    # Propagate information in the geometry enclosed BBox
+                    for _i in range(self.hops):
+                        with torch.autograd.profiler.record_function(
+                            f"geo_processor_{j}_{_i}"
+                        ):
+                            dx = self.geo_processors[j](x_encoding_inter) / self.hops
+                            x_encoding_inter = x_encoding_inter + dx
+                    x_encoding_inter = self.geo_processor_out[j](x_encoding_inter)
+
+                    x_encoding.append(x_encoding_inter)
+
+            # current_stream.
             x_encoding = torch.cat(x_encoding, dim=1)
 
         if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf":
@@ -1661,6 +1666,7 @@ def calculate_solution(
         return_volume_neighbors=False,
     ):
         """Function to approximate solution sampling the neighborhood information"""
+
         if eval_mode == "volume":
             num_variables = self.num_variables_vol
             nn_basis = self.nn_basis_vol
diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index 5d63def82e..3abb968c5a 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -19,49 +19,15 @@
 
 This module provides essential utilities for computational fluid dynamics data processing,
 mesh manipulation, field normalization, and geometric computations. It supports both
-CPU (NumPy) and GPU (CuPy) operations with automatic fallbacks.
+torch.Tensor operations on either CPU or GPU.
 """
 
 from pathlib import Path
 from typing import Any, Sequence
 
-import numpy as np
 import torch
-from scipy.spatial import KDTree
 
-# Type alias for arrays that can be either NumPy or CuPy
-try:
-    import cupy as cp
-
-    ArrayType = np.ndarray | cp.ndarray
-except ImportError:
-    ArrayType = np.ndarray
-
-
-def array_type(array: ArrayType) -> "type[np] | type[cp]":
-    """Determine the array module (NumPy or CuPy) for the given array.
-
-    This function enables array-agnostic code by returning the appropriate
-    array module that can be used for operations on the input array.
-
-    Args:
-        array: Input array that can be either NumPy or CuPy array.
-
-    Returns:
-        The array module (numpy or cupy) corresponding to the input array type.
-
-    Examples:
-        >>> import numpy as np
-        >>> arr = np.array([1, 2, 3])
-        >>> xp = array_type(arr)
-        >>> result = xp.sum(arr)  # Uses numpy.sum
-    """
-    try:
-        import cupy as cp
-
-        return cp.get_array_module(array)
-    except ImportError:
-        return np
+from physicsnemo.utils.neighbors import knn
 
 
 def calculate_center_of_mass(
@@ -73,13 +39,13 @@ def calculate_center_of_mass(
     in computational fluid dynamics for mesh analysis and load balancing.
 
     Args:
-        centers: Array of shape (n_elements, 3) containing the centroid
+        centers: torch.Tensor of shape (n_elements, 3) containing the centroid
             coordinates of each element.
-        sizes: Array of shape (n_elements,) containing the volume
+        sizes: torch.Tensor of shape (n_elements,) containing the volume
             or area of each element used as weights.
 
     Returns:
-        Array of shape (1, 3) containing the x, y, z coordinates of the center of mass.
+        torch.Tensor of shape (1, 3) containing the x, y, z coordinates of the center of mass.
 
     Raises:
         ValueError: If centers and sizes have incompatible shapes.
@@ -111,7 +77,7 @@ def normalize(
     ensure numerical stability and faster convergence.
 
     Args:
-        field: Input field array to be normalized.
+        field: Input field tensor to be normalized.
         max_val: Maximum values for normalization, can be scalar or array.
             If None, computed from the field data.
         min_val: Minimum values for normalization, can be scalar or array.
@@ -136,9 +102,9 @@ def normalize(
     """
 
     if max_val is None:
-        max_val = field.max(axis=0, keepdim=True)
+        max_val, _ = field.max(axis=0, keepdim=True)
     if min_val is None:
-        min_val = field.min(axis=0, keepdim=True)
+        min_val, _ = field.min(axis=0, keepdim=True)
 
     field_range = max_val - min_val
     return 2.0 * (field - min_val) / field_range - 1.0
@@ -183,7 +149,7 @@ def standardize(
     when the data follows a normal distribution.
 
     Args:
-        field: Input field array to be standardized.
+        field: Input field tensor to be standardized.
         mean: Mean values for standardization. If None, computed from field data.
         std: Standard deviation values for standardization. If None, computed from field data.
 
@@ -242,10 +208,10 @@ def unstandardize(
 
 
 def calculate_normal_positional_encoding(
-    coordinates_a: ArrayType,
-    coordinates_b: ArrayType | None = None,
+    coordinates_a: torch.Tensor,
+    coordinates_b: torch.Tensor | None = None,
     cell_dimensions: Sequence[float] = (1.0, 1.0, 1.0),
-) -> ArrayType:
+) -> torch.Tensor:
     """Calculate sinusoidal positional encoding for 3D coordinates.
 
     This function computes transformer-style positional encodings for 3D spatial
@@ -254,51 +220,51 @@ def calculate_normal_positional_encoding(
     unique representations for each spatial position.
 
     Args:
-        coordinates_a: Primary coordinates array of shape (n_points, 3).
+        coordinates_a: Primary coordinates tensor of shape (n_points, 3).
         coordinates_b: Optional secondary coordinates for computing relative positions.
             If provided, the encoding is computed for (coordinates_a - coordinates_b).
         cell_dimensions: Characteristic length scales for x, y, z dimensions used
             for normalization. Defaults to unit dimensions.
 
     Returns:
-        Array of shape (n_points, 12) containing positional encodings with
+        torch.Tensor of shape (n_points, 12) containing positional encodings with
         4 encoding dimensions per spatial axis (x, y, z).
 
     Examples:
-        >>> import numpy as np
-        >>> coords = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]])
+        >>> import torch
+        >>> coords = torch.tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]])
         >>> cell_size = [0.1, 0.1, 0.1]
         >>> encoding = calculate_normal_positional_encoding(coords, cell_dimensions=cell_size)
         >>> encoding.shape
         (2, 12)
         >>> # Relative positioning example
-        >>> coords_b = np.array([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]])
+        >>> coords_b = torch.tensor([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]])
         >>> encoding_rel = calculate_normal_positional_encoding(coords, coords_b, cell_size)
         >>> encoding_rel.shape
         (2, 12)
     """
     dx, dy, dz = cell_dimensions[0], cell_dimensions[1], cell_dimensions[2]
-    xp = array_type(coordinates_a)
 
     if coordinates_b is not None:
         normals = coordinates_a - coordinates_b
-        pos_x = xp.asarray(calculate_pos_encoding(normals[:, 0] / dx, d=4))
-        pos_y = xp.asarray(calculate_pos_encoding(normals[:, 1] / dy, d=4))
-        pos_z = xp.asarray(calculate_pos_encoding(normals[:, 2] / dz, d=4))
-        pos_normals = xp.concatenate((pos_x, pos_y, pos_z), axis=0).reshape(-1, 12)
+        pos_x = torch.cat(calculate_pos_encoding(normals[:, 0] / dx, d=4), dim=-1)
+        pos_y = torch.cat(calculate_pos_encoding(normals[:, 1] / dy, d=4), dim=-1)
+        pos_z = torch.cat(calculate_pos_encoding(normals[:, 2] / dz, d=4), dim=-1)
+        pos_normals = torch.cat((pos_x, pos_y, pos_z), dim=0).reshape(-1, 12)
     else:
         normals = coordinates_a
-        pos_x = xp.asarray(calculate_pos_encoding(normals[:, 0] / dx, d=4))
-        pos_y = xp.asarray(calculate_pos_encoding(normals[:, 1] / dy, d=4))
-        pos_z = xp.asarray(calculate_pos_encoding(normals[:, 2] / dz, d=4))
-        pos_normals = xp.concatenate((pos_x, pos_y, pos_z), axis=0).reshape(-1, 12)
+        pos_x = torch.cat(calculate_pos_encoding(normals[:, 0] / dx, d=4), dim=-1)
+        pos_y = torch.cat(calculate_pos_encoding(normals[:, 1] / dy, d=4), dim=-1)
+        pos_z = torch.cat(calculate_pos_encoding(normals[:, 2] / dz, d=4), dim=-1)
+        print(pos_x.shape, pos_y.shape, pos_z.shape)
+        pos_normals = torch.cat((pos_x, pos_y, pos_z), dim=0).reshape(-1, 12)
 
     return pos_normals
 
 
 def nd_interpolator(
-    coordinates: ArrayType, field: ArrayType, grid: ArrayType, k: int = 2
-) -> ArrayType:
+    coordinates: torch.Tensor, field: torch.Tensor, grid: torch.Tensor, k: int = 2
+) -> torch.Tensor:
     """Perform n-dimensional interpolation using k-nearest neighbors.
 
     This function interpolates field values from scattered points to a regular
@@ -306,114 +272,126 @@ def nd_interpolator(
     fields on regular grids from irregular measurement points.
 
     Args:
-        coordinates: Array of shape (n_points, n_dims) containing source point coordinates.
-        field: Array of shape (n_points, n_fields) containing field values at source points.
-        grid: Array of shape (n_field_points, n_dims) containing target grid points for interpolation.
+        coordinates: torch.Tensor of shape (n_points, n_dims) containing source point coordinates.
+        field: torch.Tensor of shape (n_points, n_fields) containing field values at source points.
+        grid: torch.Tensor of shape (n_field_points, n_dims) containing target grid points for interpolation.
         k: Number of nearest neighbors to use for interpolation.
 
     Returns:
         Interpolated field values at grid points using k-nearest neighbor averaging.
 
-    Note:
-        This function currently uses SciPy's KDTree which only supports CPU arrays.
-        A future enhancement could add CuML support for GPU acceleration.
 
     Examples:
-        >>> import numpy as np
+        >>> import torch
         >>> # Simple 2D interpolation example
-        >>> coords = np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
-        >>> field_vals = np.array([[1.0], [2.0], [3.0], [4.0]])
-        >>> grid_points = np.array([[0.5, 0.5]])
-        >>> result = nd_interpolator([coords], field_vals, grid_points)
+        >>> coords = torch.tensor([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
+        >>> field_vals = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
+        >>> grid_points = torch.tensor([[0.5, 0.5]])
+        >>> result = nd_interpolator(coords, field_vals, grid_points)
         >>> result.shape[0] == 1  # One grid point
         True
     """
-    # TODO - this function should get updated for cuml if using cupy.
-    kdtree = KDTree(coordinates[0])
-    distances, neighbor_indices = kdtree.query(grid, k=k)
+    neighbor_indices, distances = knn(coordinates, grid, k=k)
 
     field_grid = field[neighbor_indices]
-    field_grid = np.mean(field_grid, axis=1)
+    field_grid = torch.mean(field_grid, dim=1)
     return field_grid
 
 
-def pad(arr: ArrayType, n_points: int, pad_value: float = 0.0) -> ArrayType:
-    """Pad 2D array with constant values to reach target size.
+def pad(arr: torch.Tensor, n_points: int, pad_value: float = 0.0) -> torch.Tensor:
+    """Pad 2D tensor with constant values to reach target size.
 
-    This function extends a 2D array by adding rows filled with a constant
-    value. It's commonly used to standardize array sizes in batch processing
+    This function extends a 2D tensor by adding rows filled with a constant
+    value. It's commonly used to standardize tensor sizes in batch processing
     for machine learning applications.
 
     Args:
-        arr: Input array of shape (n_points, n_features) to be padded.
+        arr: Input tensor of shape (n_points, n_features) to be padded.
         n_points: Target number of points (rows) after padding.
         pad_value: Constant value used for padding. Defaults to 0.0.
 
     Returns:
-        Padded array of shape (n_points, n_features). If n_points <= arr.shape[0],
-        returns the original array unchanged.
+        Padded tensor of shape (n_points, n_features). If n_points <= arr.shape[0],
+        returns the original tensor unchanged.
 
     Examples:
-        >>> import numpy as np
-        >>> arr = np.array([[1.0, 2.0], [3.0, 4.0]])
+        >>> import torch
+        >>> arr = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
         >>> padded = pad(arr, 4, -1.0)
         >>> padded.shape
         (4, 2)
-        >>> np.array_equal(padded[:2], arr)
+        >>> torch.allclose(padded[:2], arr)
         True
-        >>> bool(np.all(padded[2:] == -1.0))
+        >>> bool(torch.all(padded[2:] == -1.0))
         True
         >>> # No padding needed
         >>> same = pad(arr, 2)
-        >>> np.array_equal(same, arr)
+        >>> torch.allclose(same, arr)
         True
     """
-    xp = array_type(arr)
+
     if n_points <= arr.shape[0]:
         return arr
 
-    arr_pad = pad_value * xp.ones(
-        (n_points - arr.shape[0], arr.shape[1]), dtype=xp.float32
+    n_pad = n_points - arr.shape[0]
+    arr_padded = torch.nn.functional.pad(
+        arr,
+        (
+            0,
+            0,
+            0,
+            n_pad,
+        ),
+        mode="constant",
+        value=pad_value,
     )
-    arr_padded = xp.concatenate((arr, arr_pad), axis=0)
     return arr_padded
 
 
-def pad_inp(arr: ArrayType, n_points: int, pad_value: float = 0.0) -> ArrayType:
-    """Pad 3D array with constant values to reach target size.
+def pad_inp(arr: torch.Tensor, n_points: int, pad_value: float = 0.0) -> torch.Tensor:
+    """Pad 3D tensor with constant values to reach target size.
 
-    This function extends a 3D array by adding entries along the first dimension
+    This function extends a 3D tensor by adding entries along the first dimension
     filled with a constant value. Used for standardizing 3D tensor sizes in
     batch processing workflows.
 
     Args:
-        arr: Input array of shape (n_points, height, width) to be padded.
+        arr: Input tensor of shape (n_points, height, width) to be padded.
         n_points: Target number of points along first dimension after padding.
         pad_value: Constant value used for padding. Defaults to 0.0.
 
     Returns:
-        Padded array of shape (n_points, height, width). If n_points <= arr.shape[0],
-        returns the original array unchanged.
+        Padded tensor of shape (n_points, height, width). If n_points <= arr.shape[0],
+        returns the original tensor unchanged.
 
     Examples:
-        >>> import numpy as np
-        >>> arr = np.array([[[1.0, 2.0]], [[3.0, 4.0]]])
+        >>> import torch
+        >>> arr = torch.tensor([[[1.0, 2.0]], [[3.0, 4.0]]])
         >>> padded = pad_inp(arr, 4, 0.0)
         >>> padded.shape
         (4, 1, 2)
-        >>> np.array_equal(padded[:2], arr)
+        >>> torch.allclose(padded[:2], arr)
         True
-        >>> bool(np.all(padded[2:] == 0.0))
+        >>> bool(torch.all(padded[2:] == 0.0))
         True
     """
-    xp = array_type(arr)
     if n_points <= arr.shape[0]:
         return arr
 
-    arr_pad = pad_value * xp.ones(
-        (n_points - arr.shape[0], arr.shape[1], arr.shape[2]), dtype=xp.float32
+    n_pad = n_points - arr.shape[0]
+    arr_padded = torch.nn.functional.pad(
+        arr,
+        (
+            0,
+            0,
+            0,
+            0,
+            0,
+            n_pad,
+        ),
+        mode="constant",
+        value=pad_value,
     )
-    arr_padded = xp.concatenate((arr, arr_pad), axis=0)
     return arr_padded
 
 
@@ -423,9 +401,9 @@ def shuffle_array(
     weights: torch.Tensor = None,
 ):
     """
-    Randomly sample points from array without replacement.
+    Randomly sample points from tensor without replacement.
 
-    This function performs random sampling from the input array, selecting
+    This function performs random sampling from the input tensor, selecting
     n_points points without replacement. It's commonly used for creating training
     subsets and data augmentation in machine learning workflows.
 
@@ -435,14 +413,14 @@ def shuffle_array(
     If the input is larger than that, it will be split and sampled from each chunk.
 
     Args:
-        arr: Input array to sample from, shape (n_points, ...).
+        points: Input tensor to sample from, shape (n_points, ...).
         n_points: Number of points to sample. If greater than arr.shape[0],
             all points are returned.
         weights: Optional weights for sampling. If None, uniform weights are used.
 
     Returns:
         Tuple containing:
-        - Sampled array subset
+        - Sampled tensor subset
         - Indices of the selected points
 
     Examples:
@@ -454,7 +432,7 @@ def shuffle_array(
         (2, 2)
         >>> indices.shape
         (2,)
-        >>> len(np.unique(indices)) == 2  # No duplicates
+        >>> len(torch.unique(indices)) == 2  # No duplicates
         True
     """
 
@@ -514,62 +492,21 @@ def shuffle_array(
     return points_selected, idx
 
 
-# @profile
-# def shuffle_array(
-#     arr: ArrayType,
-#     n_points: int,
-# ) -> tuple[ArrayType, ArrayType]:
-#     """Randomly sample points from array without replacement.
-
-#     This function performs random sampling from the input array, selecting
-#     n_points points without replacement. It's commonly used for creating training
-#     subsets and data augmentation in machine learning workflows.
-
-#     Args:
-#         arr: Input array to sample from, shape (n_points, ...).
-#         n_points: Number of points to sample. If greater than arr.shape[0],
-#             all points are returned.
-
-#     Returns:
-#         Tuple containing:
-#         - Sampled array subset
-#         - Indices of the selected points
-
-#     Examples:
-#         >>> import numpy as np
-#         >>> np.random.seed(42)  # For reproducible results
-#         >>> data = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
-#         >>> subset, indices = shuffle_array(data, 2)
-#         >>> subset.shape
-#         (2, 2)
-#         >>> indices.shape
-#         (2,)
-#         >>> len(np.unique(indices)) == 2  # No duplicates
-#         True
-#     """
-#     xp = array_type(arr)
-#     if n_points > arr.shape[0]:
-#         # If asking too many points, truncate the ask but still shuffle.
-#         n_points = arr.shape[0]
-#     idx = xp.random.choice(arr.shape[0], size=n_points, replace=False)
-#     return arr[idx], idx
-
-
 def shuffle_array_without_sampling(
     arr: torch.Tensor,
 ) -> tuple[torch.Tensor, torch.Tensor]:
-    """Shuffle array order without changing the number of elements.
+    """Shuffle tensor order without changing the number of elements.
 
-    This function reorders all elements in the array randomly while preserving
+    This function reorders all elements in the tensor randomly while preserving
     all data points. It's useful for randomizing data order before training
     while maintaining the complete dataset.
 
     Args:
-        arr: Input array to shuffle, shape (n_points, ...).
+        arr: Input tensor to shuffle, shape (n_points, ...).
 
     Returns:
         Tuple containing:
-        - Shuffled array with same shape as input
+        - Shuffled tensor with same shape as input
         - Permutation indices used for shuffling
 
     Examples:
@@ -636,7 +573,7 @@ def get_filenames(filepath: str | Path, exclude_dirs: bool = False) -> list[str]
     return filenames
 
 
-def calculate_pos_encoding(nx: ArrayType, d: int = 8) -> list[ArrayType]:
+def calculate_pos_encoding(nx: torch.Tensor, d: int = 8) -> list[torch.Tensor]:
     """Calculate sinusoidal positional encoding for transformer architectures.
 
     This function computes positional encodings using alternating sine and cosine
@@ -648,12 +585,12 @@ def calculate_pos_encoding(nx: ArrayType, d: int = 8) -> list[ArrayType]:
         d: Encoding dimensionality. Must be even number. Defaults to 8.
 
     Returns:
-        List of d arrays containing alternating sine and cosine encodings.
+        List of d tensors containing alternating sine and cosine encodings.
         Each pair (sin, cos) uses progressively lower frequencies.
 
     Examples:
-        >>> import numpy as np
-        >>> positions = np.array([0.0, 1.0, 2.0])
+        >>> import torch
+        >>> positions = torch.tensor([0.0, 1.0, 2.0])
         >>> encodings = calculate_pos_encoding(positions, d=4)
         >>> len(encodings)
         4
@@ -661,10 +598,9 @@ def calculate_pos_encoding(nx: ArrayType, d: int = 8) -> list[ArrayType]:
         True
     """
     vec = []
-    xp = array_type(nx)
     for k in range(int(d / 2)):
-        vec.append(xp.sin(nx / 10000 ** (2 * k / d)))
-        vec.append(xp.cos(nx / 10000 ** (2 * k / d)))
+        vec.append(torch.sin(nx / 10000 ** (2 * k / d)))
+        vec.append(torch.cos(nx / 10000 ** (2 * k / d)))
     return vec
 
 
@@ -715,7 +651,7 @@ def create_grid(
         resolution: Number of grid points [nx, ny, nz] in each dimension.
 
     Returns:
-        Grid array of shape (nx, ny, nz, 3) containing 3D coordinates for each
+        Grid tensor of shape (nx, ny, nz, 3) containing 3D coordinates for each
         grid point. The last dimension contains [x, y, z] coordinates.
 
     Examples:
@@ -754,7 +690,7 @@ def create_grid(
 
 
 def mean_std_sampling(
-    field: ArrayType, mean: ArrayType, std: ArrayType, tolerance: float = 3.0
+    field: torch.Tensor, mean: torch.Tensor, std: torch.Tensor, tolerance: float = 3.0
 ) -> list[int]:
     """Identify outlier points based on statistical distance from mean.
 
@@ -763,7 +699,7 @@ def mean_std_sampling(
     It's useful for data cleaning and identifying regions of interest in CFD data.
 
     Args:
-        field: Input field array of shape (n_points, n_components).
+        field: Input field tensor of shape (n_points, n_components).
         mean: Mean values for each field component, shape (n_components,).
         std: Standard deviation for each component, shape (n_components,).
         tolerance: Number of standard deviations to use as outlier threshold.
@@ -773,20 +709,20 @@ def mean_std_sampling(
         List of indices identifying outlier points that exceed the statistical threshold.
 
     Examples:
-        >>> import numpy as np
+        >>> import torch
         >>> # Create test data with outliers
-        >>> field = np.array([[1.0], [2.0], [3.0], [10.0]])  # 10.0 is outlier
-        >>> field_mean = np.array([2.0])
-        >>> field_std = np.array([1.0])
+        >>> field = torch.tensor([[1.0], [2.0], [3.0], [10.0]])  # 10.0 is outlier
+        >>> field_mean = torch.tensor([2.0])
+        >>> field_std = torch.tensor([1.0])
         >>> outliers = mean_std_sampling(field, field_mean, field_std, 2.0)
         >>> 3 in outliers  # Index 3 (value 10.0) should be detected as outlier
         True
     """
-    xp = array_type(field)
+
     idx_all = []
     for v in range(field.shape[-1]):
         fv = field[:, v]
-        idx = xp.where(
+        idx = torch.where(
             (fv > mean[v] + tolerance * std[v]) | (fv < mean[v] - tolerance * std[v])
         )
         if len(idx[0]) != 0:
@@ -830,16 +766,16 @@ def dict_to_device(
 
 
 def area_weighted_shuffle_array(
-    arr: ArrayType, n_points: int, area: ArrayType, area_factor: float = 1.0
-) -> tuple[ArrayType, ArrayType]:
-    """Perform area-weighted random sampling from array.
+    arr: torch.Tensor, n_points: int, area: torch.Tensor, area_factor: float = 1.0
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Perform area-weighted random sampling from tensor.
 
-    This function samples points from an array with probability proportional to
+    This function samples points from a tensor with probability proportional to
     their associated area weights. This is particularly useful in CFD applications
     where larger cells or surface elements should have higher sampling probability.
 
     Args:
-        arr: Input array to sample from, shape (n_points, ...).
+        arr: Input tensor to sample from, shape (n_points, ...).
         n_points: Number of points to sample. If greater than arr.shape[0],
             samples all available points.
         area: Area weights for each point, shape (n_points,). Larger values
@@ -850,19 +786,18 @@ def area_weighted_shuffle_array(
 
     Returns:
         Tuple containing:
-        - Sampled array subset weighted by area
+        - Sampled tensor subset weighted by area
         - Indices of the selected points
 
     Note:
-        For GPU arrays (CuPy), the sampling is performed on CPU due to memory
-        efficiency considerations. The Alias method could be implemented for
-        future GPU acceleration.
+        For GPU tensors, the sampling is performed on the current device.
+        The sampling uses torch.multinomial for efficient weighted sampling.
 
     Examples:
-        >>> import numpy as np
-        >>> np.random.seed(42)  # For reproducible results
-        >>> mesh_data = np.array([[1.0], [2.0], [3.0], [4.0]])
-        >>> cell_areas = np.array([0.1, 0.1, 0.1, 10.0])  # Last point has much larger area
+        >>> import torch
+        >>> torch.manual_seed(42)  # For reproducible results
+        >>> mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
+        >>> cell_areas = torch.tensor([0.1, 0.1, 0.1, 10.0])  # Last point has much larger area
         >>> subset, indices = area_weighted_shuffle_array(mesh_data, 2, cell_areas)
         >>> subset.shape
         (2, 1)
@@ -874,40 +809,28 @@ def area_weighted_shuffle_array(
         >>> # Use higher area_factor for stronger bias toward large areas
         >>> subset_biased, _ = area_weighted_shuffle_array(mesh_data, 2, cell_areas, area_factor=2.0)
     """
-    xp = array_type(arr)
+
     # Calculate area-weighted probabilities
     sampling_probabilities = area**area_factor
-    sampling_probabilities /= xp.sum(sampling_probabilities)  # Normalize to sum to 1
-
-    # Ensure we don't request more points than available
-    n_points = min(n_points, arr.shape[0])
-
-    # Create index array for all available points
-    point_indices = xp.arange(arr.shape[0])
-
-    if xp != np:
-        point_indices = point_indices.get()
-        sampling_probabilities = sampling_probabilities.get()
-
-    selected_indices = np.random.choice(
-        point_indices, n_points, p=sampling_probabilities
-    )
-    selected_indices = xp.asarray(selected_indices)
+    sampling_probabilities /= sampling_probabilities.sum()  # Normalize to sum to 1
 
-    return arr[selected_indices], selected_indices
+    return shuffle_array(arr, n_points, sampling_probabilities)
 
 
 def solution_weighted_shuffle_array(
-    arr: ArrayType, n_points: int, solution: ArrayType, scaling_factor: float = 1.0
-) -> tuple[ArrayType, ArrayType]:
-    """Perform solution-weighted random sampling from array.
+    arr: torch.Tensor,
+    n_points: int,
+    solution: torch.Tensor,
+    scaling_factor: float = 1.0,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Perform solution-weighted random sampling from tensor.
 
-    This function samples points from an array with probability proportional to
+    This function samples points from a tensor with probability proportional to
     their associated solution weights. This is particularly useful in CFD applications
     where larger cells or surface elements should have higher sampling probability.
 
     Args:
-        arr: Input array to sample from, shape (n_points, ...).
+        arr: Input tensor to sample from, shape (n_points, ...).
         n_points: Number of points to sample. If greater than arr.shape[0],
             samples all available points.
         solution: Solution weights for each point, shape (n_points,). Larger values
@@ -918,19 +841,18 @@ def solution_weighted_shuffle_array(
 
     Returns:
         Tuple containing:
-        - Sampled array subset weighted by solution fields
+        - Sampled tensor subset weighted by solution fields
         - Indices of the selected points
 
     Note:
-        For GPU arrays (CuPy), the sampling is performed on CPU due to memory
-        efficiency considerations. The Alias method could be implemented for
-        future GPU acceleration.
+        For GPU tensors, the sampling is performed on the current device.
+        The sampling uses torch.multinomial for efficient weighted sampling.
 
     Examples:
-        >>> import numpy as np
-        >>> np.random.seed(42)  # For reproducible results
-        >>> mesh_data = np.array([[1.0], [2.0], [3.0], [4.0]])
-        >>> solution = np.array([0.1, 0.1, 0.1, 10.0])  # Last point has much larger solution field
+        >>> import torch
+        >>> torch.manual_seed(42)  # For reproducible results
+        >>> mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
+        >>> solution = torch.tensor([0.1, 0.1, 0.1, 10.0])  # Last point has much larger solution field
         >>> subset, indices = solution_weighted_shuffle_array(mesh_data, 2, solution)
         >>> subset.shape
         (2, 1)
@@ -942,24 +864,9 @@ def solution_weighted_shuffle_array(
         >>> # Use higher scaling_factor for stronger bias toward large solution fields
         >>> subset_biased, _ = solution_weighted_shuffle_array(mesh_data, 2, solution, scaling_factor=2.0)
     """
-    xp = array_type(arr)
+
     # Calculate solution-weighted probabilities
     sampling_probabilities = solution**scaling_factor
-    sampling_probabilities /= xp.sum(sampling_probabilities)  # Normalize to sum to 1
-
-    # Ensure we don't request more points than available
-    n_points = min(n_points, arr.shape[0])
-
-    # Create index array for all available points
-    point_indices = xp.arange(arr.shape[0])
-
-    if xp != np:
-        point_indices = point_indices.get()
-        sampling_probabilities = sampling_probabilities.get()
-
-    selected_indices = np.random.choice(
-        point_indices, n_points, p=sampling_probabilities
-    )
-    selected_indices = xp.asarray(selected_indices)
+    sampling_probabilities /= sampling_probabilities.sum()  # Normalize to sum to 1
 
-    return arr[selected_indices], selected_indices
+    return shuffle_array(arr, n_points, sampling_probabilities)
diff --git a/physicsnemo/utils/neighbors/knn/_cuml_impl.py b/physicsnemo/utils/neighbors/knn/_cuml_impl.py
index 10d20ce1f5..72546cf6a7 100644
--- a/physicsnemo/utils/neighbors/knn/_cuml_impl.py
+++ b/physicsnemo/utils/neighbors/knn/_cuml_impl.py
@@ -28,12 +28,21 @@
     def knn_impl(
         points: torch.Tensor, queries: torch.Tensor, k: int = 3
     ) -> tuple[torch.Tensor, torch.Tensor]:
+        # Create a cuml handle to ensure we use the right stream:
+        torch_stream = torch.cuda.current_stream()
+
+        # Get the raw CUDA stream pointer (as an integer)
+        ptr = torch_stream.cuda_stream
+
+        # Build a cuML handle with that stream
+        handle = cuml.Handle(stream=ptr)
+
         # Use dlpack to move the data without copying between pytorch and cuml:
         points = cp.from_dlpack(points)
         queries = cp.from_dlpack(queries)
 
         # Construct the knn:
-        knn = cuml.neighbors.NearestNeighbors(n_neighbors=k)
+        knn = cuml.neighbors.NearestNeighbors(n_neighbors=k, handle=handle)
         # First pass partitions everything in points to make lookups fast
         knn.fit(points)
 
diff --git a/physicsnemo/utils/neighbors/radius_search/_warp_impl.py b/physicsnemo/utils/neighbors/radius_search/_warp_impl.py
index 9b15b6816f..997f95d3fd 100644
--- a/physicsnemo/utils/neighbors/radius_search/_warp_impl.py
+++ b/physicsnemo/utils/neighbors/radius_search/_warp_impl.py
@@ -278,11 +278,6 @@ def radius_search_impl(
         if points.device != queries.device:
             raise ValueError("points and queries must be on the same device")
 
-        # We're in the warp-backended regime.  So, the first thing to do is to convert these torch tensors to warp
-        # These are readonly in warp, allocated with pytorch.
-        wp_points = wp.from_torch(points, dtype=wp.vec3)
-        wp_queries = wp.from_torch(queries, dtype=wp.vec3, return_ctype=True)
-
         N_queries = len(queries)
 
         # Compute follows data.
@@ -297,92 +292,104 @@ def radius_search_impl(
             wp_launch_stream = None
             wp_launch_device = "cpu"  # CPUs have no streams
 
-        # We need to create a hash grid:
-        grid = wp.HashGrid(dim_x=128, dim_y=128, dim_z=128, device=wp_points.device)
-        grid.reserve(N_queries)
-        grid.build(points=wp_points, radius=0.5 * radius)
-
-        # Now, the situations diverge based on max_points.
-
-        if max_points is None:
-            total_count, wp_offset = count_neighbors(
-                grid,
-                wp_points,
-                wp_queries,
-                wp_launch_device,
-                wp_launch_stream,
-                radius,
-                N_queries,
-            )
+        with wp.ScopedStream(wp_launch_stream):
+            # We're in the warp-backended regime.  So, the first thing to do is to convert these torch tensors to warp
+            # These are readonly in warp, allocated with pytorch.
+            wp_points = wp.from_torch(points, dtype=wp.vec3)
+            wp_queries = wp.from_torch(queries, dtype=wp.vec3, return_ctype=True)
 
-            if not total_count < 2**31 - 1:
-                raise RuntimeError(
-                    f"Total found neighbors is too large: {total_count} > 2**31 - 1"
-                )
+            # We need to create a hash grid:
+            grid = wp.HashGrid(dim_x=128, dim_y=128, dim_z=128, device=wp_points.device)
+            grid.reserve(N_queries)
+            grid.build(points=wp_points, radius=0.5 * radius)
 
-            return gather_neighbors(
-                grid,
-                points.device,
-                wp_points,
-                wp_queries,
-                wp_offset,
-                wp_launch_device,
-                wp_launch_stream,
-                radius,
-                N_queries,
-                return_dists,
-                return_points,
-                total_count,
-            )
+            # Now, the situations diverge based on max_points.
 
-        else:
-            # With a fixed number of output points, we have no need for a second kernel.
-            indices = torch.full(
-                (N_queries, max_points), 0, dtype=torch.int32, device=points.device
-            )
-            if return_dists:
-                distances = torch.zeros(
-                    (N_queries, max_points), dtype=torch.float32, device=points.device
+            if max_points is None:
+                total_count, wp_offset = count_neighbors(
+                    grid,
+                    wp_points,
+                    wp_queries,
+                    wp_launch_device,
+                    wp_launch_stream,
+                    radius,
+                    N_queries,
                 )
-            else:
-                distances = torch.empty(0, dtype=torch.float32, device=points.device)
-            num_neighbors = torch.zeros(
-                (N_queries,), dtype=torch.int32, device=points.device
-            )
 
-            if return_points:
-                points = torch.zeros(
-                    (len(queries), max_points, 3),
-                    dtype=torch.float32,
-                    device=points.device,
-                )
-            else:
-                points = torch.empty(
-                    (0, max_points, 3), dtype=torch.float32, device=points.device
-                )
-            # This kernel selects up to max_points hits per query.
-            # It is not necessarily deterministic.
-            # If the number of matches > max_points, you may get different results.
+                if not total_count < 2**31 - 1:
+                    raise RuntimeError(
+                        f"Total found neighbors is too large: {total_count} > 2**31 - 1"
+                    )
 
-            wp.launch(
-                kernel=radius_search_limited_select,
-                dim=N_queries,
-                inputs=[
-                    grid.id,
+                return gather_neighbors(
+                    grid,
+                    points.device,
                     wp_points,
                     wp_queries,
-                    max_points,
+                    wp_offset,
+                    wp_launch_device,
+                    wp_launch_stream,
                     radius,
-                    wp.from_torch(indices, return_ctype=True),
-                    wp.from_torch(num_neighbors, return_ctype=True),
+                    N_queries,
                     return_dists,
-                    wp.from_torch(distances, return_ctype=True),
                     return_points,
-                    wp.from_torch(points, return_ctype=True) if return_points else None,
-                ],
-                stream=wp_launch_stream,
-                device=wp_launch_device,
-            )
+                    total_count,
+                )
+
+            else:
+                # With a fixed number of output points, we have no need for a second kernel.
+                indices = torch.full(
+                    (N_queries, max_points), 0, dtype=torch.int32, device=points.device
+                )
+                if return_dists:
+                    distances = torch.zeros(
+                        (N_queries, max_points),
+                        dtype=torch.float32,
+                        device=points.device,
+                    )
+                else:
+                    distances = torch.empty(
+                        0, dtype=torch.float32, device=points.device
+                    )
+                num_neighbors = torch.zeros(
+                    (N_queries,), dtype=torch.int32, device=points.device
+                )
+
+                if return_points:
+                    points = torch.zeros(
+                        (len(queries), max_points, 3),
+                        dtype=torch.float32,
+                        device=points.device,
+                    )
+                else:
+                    points = torch.empty(
+                        (0, max_points, 3), dtype=torch.float32, device=points.device
+                    )
+                # This kernel selects up to max_points hits per query.
+                # It is not necessarily deterministic.
+                # If the number of matches > max_points, you may get different results.
+
+                wp.launch(
+                    kernel=radius_search_limited_select,
+                    dim=N_queries,
+                    inputs=[
+                        grid.id,
+                        wp_points,
+                        wp_queries,
+                        max_points,
+                        radius,
+                        wp.from_torch(indices, return_ctype=True),
+                        wp.from_torch(num_neighbors, return_ctype=True),
+                        return_dists,
+                        wp.from_torch(distances, return_ctype=True),
+                        return_points,
+                        wp.from_torch(points, return_ctype=True)
+                        if return_points
+                        else None,
+                    ],
+                    stream=wp_launch_stream,
+                    device=wp_launch_device,
+                )
 
         # Handle the matrix of return values:
         return indices, points, distances, num_neighbors
diff --git a/test/utils/test_domino_utils.py b/test/utils/test_domino_utils.py
index 8a0e03637b..a9e1166640 100644
--- a/test/utils/test_domino_utils.py
+++ b/test/utils/test_domino_utils.py
@@ -21,7 +21,10 @@
 module to ensure that the documented examples work correctly.
 """
 
-import numpy as np
+import math
+
+import pytest
+import torch
 
 from physicsnemo.utils.domino.utils import (
     area_weighted_shuffle_array,
@@ -45,67 +48,70 @@
 
 def test_calculate_center_of_mass():
     """Test calculate_center_of_mass function with docstring example."""
-    centers = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]])
-    sizes = np.array([1.0, 2.0, 3.0])
+    centers = torch.tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]])
+    sizes = torch.tensor([1.0, 2.0, 3.0])
     com = calculate_center_of_mass(centers, sizes)
-    expected = np.array([[4.0 / 3.0, 4.0 / 3.0, 4.0 / 3.0]])
-    assert np.allclose(com, expected)
+    expected = torch.tensor([[4.0 / 3.0, 4.0 / 3.0, 4.0 / 3.0]])
+    assert torch.allclose(com, expected)
 
 
 def test_normalize():
     """Test normalize function with docstring examples."""
     # Example 1: With explicit min/max
-    field = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-    normalized = normalize(field, 5.0, 1.0)
-    expected = np.array([-1.0, -0.5, 0.0, 0.5, 1.0])
-    assert np.allclose(normalized, expected)
+    field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
+    normalized = normalize(field, max_val=5.0, min_val=1.0)
+    expected = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0])
+    assert torch.allclose(normalized, expected)
 
     # Example 2: Auto-compute min/max
     normalized_auto = normalize(field)
-    expected_auto = np.array([-1.0, -0.5, 0.0, 0.5, 1.0])
-    assert np.allclose(normalized_auto, expected_auto)
+    expected_auto = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0])
+    assert torch.allclose(normalized_auto, expected_auto)
 
 
 def test_unnormalize():
     """Test unnormalize function with docstring example."""
-    normalized = np.array([-1.0, -0.5, 0.0, 0.5, 1.0])
+    normalized = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0])
     original = unnormalize(normalized, 5.0, 1.0)
-    expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-    assert np.allclose(original, expected)
+    expected = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
+    assert torch.allclose(original, expected)
 
 
 def test_standardize():
     """Test standardize function with docstring examples."""
     # Example 1: With explicit mean/std
-    field = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-    standardized = standardize(field, 3.0, np.sqrt(2.5))
-    expected = np.array([-1.265, -0.632, 0.0, 0.632, 1.265])
-    assert np.allclose(standardized, expected, atol=1e-3)
+    field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
+    standardized = standardize(field, 3.0, math.sqrt(2.5))
+    expected = torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265])
+    assert torch.allclose(standardized, expected, atol=1e-3)
 
     # Example 2: Auto-compute mean/std
     standardized_auto = standardize(field)
-    assert np.allclose(np.mean(standardized_auto), 0.0)
-    assert np.allclose(np.std(standardized_auto, ddof=0), 1.0)
+    assert torch.allclose(torch.mean(standardized_auto), torch.tensor(0.0))
+    assert torch.allclose(torch.std(standardized_auto, correction=1), torch.tensor(1.0))
 
 
 def test_unstandardize():
     """Test unstandardize function with docstring example."""
-    standardized = np.array([-1.265, -0.632, 0.0, 0.632, 1.265])
-    original = unstandardize(standardized, 3.0, np.sqrt(2.5))
-    expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
-    assert np.allclose(original, expected, atol=1e-3)
+    standardized = torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265])
+    original = unstandardize(standardized, 3.0, math.sqrt(2.5))
+    expected = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
+    assert torch.allclose(original, expected, atol=1e-3)
 
 
-def test_calculate_normal_positional_encoding():
+@pytest.mark.parametrize("relative", [True, False])
+def test_calculate_normal_positional_encoding(relative):
     """Test calculate_normal_positional_encoding function with docstring examples."""
     # Example 1: Basic coordinates
-    coords = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]])
+    coords = torch.tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]])
     cell_size = [0.1, 0.1, 0.1]
-    encoding = calculate_normal_positional_encoding(coords, cell_dimensions=cell_size)
-    assert encoding.shape == (2, 12)
 
     # Example 2: Relative positioning
-    coords_b = np.array([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]])
+    if relative:
+        coords_b = torch.tensor([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]])
+    else:
+        coords_b = None
+
     encoding_rel = calculate_normal_positional_encoding(coords, coords_b, cell_size)
     assert encoding_rel.shape == (2, 12)
 
@@ -113,9 +119,9 @@ def test_calculate_normal_positional_encoding():
 def test_nd_interpolator():
     """Test nd_interpolator function with docstring example."""
     # Simple 2D interpolation example
-    coords = np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
-    field_vals = np.array([[1.0], [2.0], [3.0], [4.0]])
-    grid_points = np.array([[0.5, 0.5]])
+    coords = torch.tensor([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
+    field_vals = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
+    grid_points = torch.tensor([[0.5, 0.5]])
     result = nd_interpolator([coords], field_vals, grid_points)
     assert result.shape[0] == 1  # One grid point
 
@@ -123,49 +129,49 @@ def test_nd_interpolator():
 def test_pad():
     """Test pad function with docstring examples."""
     # Example 1: Padding needed
-    arr = np.array([[1.0, 2.0], [3.0, 4.0]])
+    arr = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
     padded = pad(arr, 4, -1.0)
     assert padded.shape == (4, 2)
-    assert np.array_equal(padded[:2], arr)
-    assert bool(np.all(padded[2:] == -1.0))
+    assert torch.allclose(padded[:2], arr)
+    assert bool(torch.all(padded[2:] == -1.0))
 
     # Example 2: No padding needed
     same = pad(arr, 2)
-    assert np.array_equal(same, arr)
+    assert torch.allclose(same, arr)
 
 
 def test_pad_inp():
     """Test pad_inp function with docstring example."""
-    arr = np.array([[[1.0, 2.0]], [[3.0, 4.0]]])
+    arr = torch.tensor([[[1.0, 2.0]], [[3.0, 4.0]]])
     padded = pad_inp(arr, 4, 0.0)
     assert padded.shape == (4, 1, 2)
-    assert np.array_equal(padded[:2], arr)
-    assert bool(np.all(padded[2:] == 0.0))
+    assert torch.allclose(padded[:2], arr)
+    assert bool(torch.all(padded[2:] == 0.0))
 
 
 def test_shuffle_array():
     """Test shuffle_array function with docstring example."""
-    np.random.seed(42)  # For reproducible results
-    data = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
+    torch.manual_seed(42)  # For reproducible results
+    data = torch.tensor([[1, 2], [3, 4], [5, 6], [7, 8]])
     subset, indices = shuffle_array(data, 2)
     assert subset.shape == (2, 2)
     assert indices.shape == (2,)
-    assert len(np.unique(indices)) == 2  # No duplicates
+    assert len(torch.unique(indices)) == 2  # No duplicates
 
 
 def test_shuffle_array_without_sampling():
     """Test shuffle_array_without_sampling function with docstring example."""
-    np.random.seed(42)  # For reproducible results
-    data = np.array([[1], [2], [3], [4]])
+    torch.manual_seed(42)  # For reproducible results
+    data = torch.tensor([[1], [2], [3], [4]])
     shuffled, indices = shuffle_array_without_sampling(data)
     assert shuffled.shape == (4, 1)
     assert indices.shape == (4,)
-    assert set(indices) == set(range(4))  # All original indices present
+    assert set(indices.tolist()) == set(range(4))  # All original indices present
 
 
 def test_calculate_pos_encoding():
     """Test calculate_pos_encoding function with docstring example."""
-    positions = np.array([0.0, 1.0, 2.0])
+    positions = torch.tensor([0.0, 1.0, 2.0])
     encodings = calculate_pos_encoding(positions, d=4)
     assert len(encodings) == 4
     assert all(enc.shape == (3,) for enc in encodings)
@@ -182,30 +188,30 @@ def test_combine_dict():
 
 def test_create_grid():
     """Test create_grid function with docstring example."""
-    min_bounds = np.array([0.0, 0.0, 0.0])
-    max_bounds = np.array([1.0, 1.0, 1.0])
-    grid_res = np.array([2, 2, 2])
+    min_bounds = torch.tensor([0.0, 0.0, 0.0])
+    max_bounds = torch.tensor([1.0, 1.0, 1.0])
+    grid_res = torch.tensor([2, 2, 2])
     grid = create_grid(max_bounds, min_bounds, grid_res)
     assert grid.shape == (2, 2, 2, 3)
-    assert np.allclose(grid[0, 0, 0], [0.0, 0.0, 0.0])
-    assert np.allclose(grid[1, 1, 1], [1.0, 1.0, 1.0])
+    assert torch.allclose(grid[0, 0, 0], torch.tensor([0.0, 0.0, 0.0]))
+    assert torch.allclose(grid[1, 1, 1], torch.tensor([1.0, 1.0, 1.0]))
 
 
 def test_mean_std_sampling():
     """Test mean_std_sampling function with docstring example."""
     # Create test data with outliers
-    field = np.array([[1.0], [2.0], [3.0], [10.0]])  # 10.0 is outlier
-    field_mean = np.array([2.0])
-    field_std = np.array([1.0])
+    field = torch.tensor([[1.0], [2.0], [3.0], [10.0]])  # 10.0 is outlier
+    field_mean = torch.tensor([2.0])
+    field_std = torch.tensor([1.0])
     outliers = mean_std_sampling(field, field_mean, field_std, 2.0)
     assert 3 in outliers  # Index 3 (value 10.0) should be detected as outlier
 
 
 def test_area_weighted_shuffle_array():
     """Test area_weighted_shuffle_array function with docstring example."""
-    np.random.seed(42)  # For reproducible results
-    mesh_data = np.array([[1.0], [2.0], [3.0], [4.0]])
-    cell_areas = np.array([0.1, 0.1, 0.1, 10.0])  # Last point has much larger area
+    torch.manual_seed(42)  # For reproducible results
+    mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
+    cell_areas = torch.tensor([0.1, 0.1, 0.1, 10.0])  # Last point has much larger area
     subset, indices = area_weighted_shuffle_array(mesh_data, 2, cell_areas)
     assert subset.shape == (2, 1)
     assert indices.shape == (2,)

From 45789759bd4f56d65db7cbc83a69712d07608998 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 3 Sep 2025 09:49:53 -0700
Subject: [PATCH 14/98] update training script for new datapipe

---
 .../external_aerodynamics/domino/src/train.py | 144 +++++++++---------
 1 file changed, 76 insertions(+), 68 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 96e30b58e7..3311083e04 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -41,7 +41,7 @@
 from hydra.utils import to_absolute_path
 from omegaconf import DictConfig, OmegaConf
 import torch.distributed as dist
-from torch.cuda.amp import GradScaler, autocast
+from torch.amp import GradScaler, autocast
 from torch.nn.parallel import DistributedDataParallel
 from torch.utils.data import DataLoader
 from torch.utils.data.distributed import DistributedSampler
@@ -54,7 +54,7 @@
 from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-from physicsnemo.datapipes.cae.domino_datapipe import (
+from physicsnemo.datapipes.cae.domino_datapipe2 import (
     DoMINODataPipe,
     compute_scaling_factors,
     create_domino_dataset,
@@ -73,7 +73,7 @@
 from physicsnemo.utils.profiling import profile, Profiler
 
 
-# Profiler().enable("line_profiler")
+# Profiler().enable("torch")
 # Profiler().initialize()
 
 
@@ -620,8 +620,8 @@ def validation_step(
     with torch.no_grad():
         for i_batch, sample_batched in enumerate(dataloader):
             sampled_batched = dict_to_device(sample_batched, device)
-
-            with autocast(enabled=True):
+            print(f"validation i batch {i_batch}")
+            with autocast("cuda", enabled=True):
                 if add_physics_loss:
                     prediction_vol, prediction_surf = model(
                         sampled_batched, return_volume_neighbors=True
@@ -680,70 +680,75 @@ def train_epoch(
 
     gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
     start_time = time.perf_counter()
-    for i_batch, sample_batched in enumerate(dataloader):
-        sampled_batched = dict_to_device(sample_batched, device)
-
-        if add_physics_loss:
-            autocast_enabled = False
-        else:
-            autocast_enabled = True
-        with autocast(enabled=autocast_enabled):
-            with nvtx.range("Model Forward Pass"):
-                if add_physics_loss:
-                    prediction_vol, prediction_surf = model(
-                        sampled_batched, return_volume_neighbors=True
-                    )
-                else:
-                    prediction_vol, prediction_surf = model(sampled_batched)
-
-            loss, loss_dict = compute_loss_dict(
-                prediction_vol,
-                prediction_surf,
-                sampled_batched,
-                loss_fn_type,
-                integral_scaling_factor,
-                surf_loss_scaling,
-                vol_loss_scaling,
-                first_deriv,
-                eqn,
-                bounding_box,
-                vol_factors,
-                add_physics_loss,
-            )
+    with Profiler():
+        for i_batch, sample_batched in enumerate(dataloader):
+            sampled_batched = dict_to_device(sample_batched, device)
 
-        loss = loss / loss_interval
-        scaler.scale(loss).backward()
+            if add_physics_loss:
+                autocast_enabled = False
+            else:
+                autocast_enabled = True
+            with autocast("cuda", enabled=autocast_enabled):
+                with nvtx.range("Model Forward Pass"):
+                    if add_physics_loss:
+                        prediction_vol, prediction_surf = model(
+                            sampled_batched, return_volume_neighbors=True
+                        )
+                    else:
+                        prediction_vol, prediction_surf = model(sampled_batched)
 
-        if ((i_batch + 1) % loss_interval == 0) or (i_batch + 1 == len(dataloader)):
-            scaler.step(optimizer)
-            scaler.update()
-            optimizer.zero_grad()
+                loss, loss_dict = compute_loss_dict(
+                    prediction_vol,
+                    prediction_surf,
+                    sampled_batched,
+                    loss_fn_type,
+                    integral_scaling_factor,
+                    surf_loss_scaling,
+                    vol_loss_scaling,
+                    first_deriv,
+                    eqn,
+                    bounding_box,
+                    vol_factors,
+                    add_physics_loss,
+                )
 
-        # Gather data and report
-        running_loss += loss.item()
-        elapsed_time = time.perf_counter() - start_time
-        start_time = time.perf_counter()
-        gpu_end_info = nvmlDeviceGetMemoryInfo(gpu_handle)
-        gpu_memory_used = gpu_end_info.used / (1024**3)
-        gpu_memory_delta = (gpu_end_info.used - gpu_start_info.used) / (1024**3)
-
-        logging_string = f"Device {device}, batch processed: {i_batch + 1}\n"
-        # Format the loss dict into a string:
-        loss_string = (
-            "  "
-            + "\t".join([f"{key.replace('loss_', ''):<10}" for key in loss_dict.keys()])
-            + "\n"
-        )
-        loss_string += (
-            "  " + f"\t".join([f"{l.item():<10.3e}" for l in loss_dict.values()]) + "\n"
-        )
+            loss = loss / loss_interval
+            scaler.scale(loss).backward()
+
+            if ((i_batch + 1) % loss_interval == 0) or (i_batch + 1 == len(dataloader)):
+                scaler.step(optimizer)
+                scaler.update()
+                optimizer.zero_grad()
+
+            # Gather data and report
+            running_loss += loss.item()
+            elapsed_time = time.perf_counter() - start_time
+            start_time = time.perf_counter()
+            gpu_end_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+            gpu_memory_used = gpu_end_info.used / (1024**3)
+            gpu_memory_delta = (gpu_end_info.used - gpu_start_info.used) / (1024**3)
+
+            logging_string = f"Device {device}, batch processed: {i_batch + 1}\n"
+            # Format the loss dict into a string:
+            loss_string = (
+                "  "
+                + "\t".join(
+                    [f"{key.replace('loss_', ''):<10}" for key in loss_dict.keys()]
+                )
+                + "\n"
+            )
+            loss_string += (
+                "  "
+                + f"\t".join([f"{l.item():<10.3e}" for l in loss_dict.values()])
+                + "\n"
+            )
 
-        logging_string += loss_string
-        logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
-        logging_string += f"  GPU memory delta: {gpu_memory_delta:.3f} Gb\n"
-        logging_string += f"  Time taken: {elapsed_time:.2f} seconds\n"
-        logger.info(logging_string)
-        gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+            logging_string += loss_string
+            logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
+            logging_string += f"  GPU memory delta: {gpu_memory_delta:.3f} Gb\n"
+            logging_string += f"  Time taken: {elapsed_time:.2f} seconds\n"
+            logger.info(logging_string)
+            gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
 
     last_loss = running_loss / (i_batch + 1)  # loss per batch
     if dist.rank == 0:
@@ -904,7 +909,7 @@ def main(cfg: DictConfig) -> None:
         global_features=num_global_features,
         model_parameters=cfg.model,
     ).to(dist.device)
-    model = torch.compile(model, disable=True)  # TODO make this configurable
+    # model = torch.compile(model, fullgraph=True, dynamic=True)  # TODO make this configurable
 
     # Print model summary (structure and parmeter count).
     logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n")
@@ -999,7 +1004,7 @@ def main(cfg: DictConfig) -> None:
         model.train(True)
         epoch_start_time = time.perf_counter()
         avg_loss = train_epoch(
-            dataloader=train_dataloader,
+            dataloader=train_dataset,
             model=model,
             optimizer=optimizer,
             scaler=scaler,
@@ -1026,7 +1031,7 @@ def main(cfg: DictConfig) -> None:
 
         model.eval()
         avg_vloss = validation_step(
-            dataloader=val_dataloader,
+            dataloader=val_dataset,
             model=model,
             device=dist.device,
             logger=logger,
@@ -1088,4 +1093,7 @@ def main(cfg: DictConfig) -> None:
 
 
 if __name__ == "__main__":
+    # Profiler().enable("torch")
+    # Profiler().initialize()
     main()
+    # Profiler().finalize()

From 9a5d8edf353331b42dcddb8914df9ac2fffc3a85 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Thu, 4 Sep 2025 13:07:38 +0000
Subject: [PATCH 15/98] Add abillity to pin memory, optionally.

---
 physicsnemo/datapipes/cae/domino_datapipe2.py   |  7 +++----
 physicsnemo/datapipes/cae/drivaer_ml_dataset.py | 11 ++++++++++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index dcc82d49cd..d74d450fed 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -44,7 +44,6 @@
 )
 from physicsnemo.distributed import DistributedManager
 from physicsnemo.utils.domino.utils import (
-    ArrayType,
     calculate_center_of_mass,
     calculate_normal_positional_encoding,
     create_grid,
@@ -64,8 +63,8 @@ class BoundingBox(Protocol):
     Type definition for the required format of bounding box dimensions.
     """
 
-    min: ArrayType
-    max: ArrayType
+    min: Sequence
+    max: Sequence
 
 
 @dataclass
@@ -149,7 +148,7 @@ class DoMINODataConfig:
     volume_factors: Optional[Sequence] = None
     bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None
 
-    grid_resolution: Union[Sequence, ArrayType] = (256, 96, 64)
+    grid_resolution: Sequence = (256, 96, 64)
     normalize_coordinates: bool = False
     sample_in_bbox: bool = False
     sampling: bool = False
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index 78f9407ebd..bb3c5b7a1a 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -265,6 +265,7 @@ def __init__(
         keys_to_read: list[str] | None,
         output_device: torch.device,
         preload_depth: int = 2,
+        pin_memory: bool = True,
         device_mesh: torch.distributed.DeviceMesh | None = None,
         placements: dict[str, torch.distributed.tensor.Placement] | None = None,
         consumer_stream: torch.cuda.Stream | None = None,
@@ -285,6 +286,8 @@ def __init__(
             data_dir
         )
 
+        self.pin_memory = pin_memory
+
         # Check the file names; some can be read well in parallel, while others
         # are not parallelizable.
 
@@ -360,8 +363,14 @@ def _move_to_gpu(
 
         with torch.cuda.stream(self._data_loader_stream):
             for key in data.keys():
+                if self.pin_memory:
+                    result[key] = (
+                        data[key].pin_memory().to(self.output_device, non_blocking=True)
+                    )
+                else:
+                    result[key] = data[key].to(self.output_device, non_blocking=True)
                 # Move to GPU if available
-                result[key] = data[key].to(self.output_device, non_blocking=True)
+                # result[key] = data[key].to(self.output_device, non_blocking=True)
                 result[key].record_stream(self.consumer_stream)
 
         # Mark the consumer stream:

From c57f985216a3ca659470ee25a5827d7c96b2e257 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 5 Sep 2025 14:19:38 +0000
Subject: [PATCH 16/98] Snapshot updates of cleanups and minor fixes

---
 .../external_aerodynamics/domino/src/loss.py  | 587 ++++++++++++++++
 .../external_aerodynamics/domino/src/train.py | 638 ++----------------
 .../external_aerodynamics/domino/src/utils.py |  74 ++
 physicsnemo/datapipes/cae/domino_datapipe2.py |  21 +-
 4 files changed, 733 insertions(+), 587 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/domino/src/loss.py
 create mode 100644 examples/cfd/external_aerodynamics/domino/src/utils.py

diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py
new file mode 100644
index 0000000000..0d90ab3674
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/domino/src/loss.py
@@ -0,0 +1,587 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from typing import Literal, Any
+
+from physicsnemo.utils.domino.utils import unnormalize
+
+import time
+import os
+import re
+import torch
+import torchinfo
+
+from typing import Literal, Any
+
+import apex
+import numpy as np
+import hydra
+from hydra.utils import to_absolute_path
+from omegaconf import DictConfig, OmegaConf
+import torch.distributed as dist
+from torch.amp import GradScaler, autocast
+from torch.nn.parallel import DistributedDataParallel
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from torch.utils.tensorboard import SummaryWriter
+from nvtx import annotate as nvtx_annotate
+import torch.cuda.nvtx as nvtx
+
+
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
+from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
+
+from physicsnemo.datapipes.cae.domino_datapipe2 import (
+    DoMINODataPipe,
+    compute_scaling_factors,
+    create_domino_dataset,
+)
+from physicsnemo.models.domino.model import DoMINO
+from physicsnemo.utils.domino.utils import *
+
+# This is included for GPU memory tracking:
+from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
+import time
+
+# Initialize NVML
+nvmlInit()
+
+
+from physicsnemo.utils.profiling import profile, Profiler
+
+
+def compute_physics_loss(
+    output: torch.Tensor,
+    target: torch.Tensor,
+    mask: torch.Tensor,
+    loss_type: Literal["mse", "rmse"],
+    dims: tuple[int, ...] | None,
+    first_deriv: torch.nn.Module,
+    eqn: Any,
+    bounding_box: torch.Tensor,
+    vol_factors: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Compute physics-based loss terms for Navier-Stokes equations.
+
+    Args:
+        output: Model output containing (output, coords_neighbors, output_neighbors, neighbors_list)
+        target: Ground truth values
+        mask: Mask for valid values
+        loss_type: Type of loss to calculate ("mse" or "rmse")
+        dims: Dimensions for loss calculation
+        first_deriv: First derivative calculator
+        eqn: Equations
+        bounding_box: Bounding box for normalization
+        vol_factors: Volume factors for normalization
+
+    Returns:
+        Tuple of (data_loss, continuity_loss, momentum_x_loss, momentum_y_loss, momentum_z_loss)
+    """
+    # Physics loss enabled
+    output, coords_neighbors, output_neighbors, neighbors_list = output
+    batch_size = output.shape[1]
+    fields, num_neighbors = output_neighbors.shape[3], output_neighbors.shape[2]
+    coords_total = coords_neighbors[0, :]
+    output_total = output_neighbors[0, :]
+    output_total_unnormalized = unnormalize(
+        output_total, vol_factors[0], vol_factors[1]
+    )
+    coords_total_unnormalized = unnormalize(
+        coords_total, bounding_box[0], bounding_box[1]
+    )
+
+    # compute first order gradients on all the nodes from the neighbors_list
+    grad_list = {}
+    for parent_id, neighbor_ids in neighbors_list.items():
+        neighbor_ids_tensor = torch.tensor(neighbor_ids).to(
+            output_total_unnormalized.device
+        )
+        du = (
+            output_total_unnormalized[:, [parent_id]]
+            - output_total_unnormalized[:, neighbor_ids_tensor]
+        )
+        dv = (
+            coords_total_unnormalized[:, [parent_id]]
+            - coords_total_unnormalized[:, neighbor_ids_tensor]
+        )
+        grads = first_deriv.forward(
+            coords=None, connectivity_tensor=None, y=None, du=du, dv=dv
+        )
+        grad = torch.cat(grads, dim=1)
+        grad_list[parent_id] = grad
+
+    # compute second order gradients on only the center node
+    neighbor_ids_tensor = torch.tensor(neighbors_list[0]).to(
+        output_total_unnormalized.device
+    )
+    grad_neighbors_center = torch.stack([v for v in grad_list.values()], dim=1)
+    grad_neighbors_center = grad_neighbors_center.reshape(
+        batch_size, len(neighbors_list[0]) + 1, -1
+    )
+
+    du = grad_neighbors_center[:, [0]] - grad_neighbors_center[:, neighbor_ids_tensor]
+    dv = (
+        coords_total_unnormalized[:, [0]]
+        - coords_total_unnormalized[:, neighbor_ids_tensor]
+    )
+
+    # second order gradients
+    ggrads_center = first_deriv.forward(
+        coords=None, connectivity_tensor=None, y=None, du=du, dv=dv
+    )
+    ggrad_center = torch.cat(ggrads_center, dim=1)
+    grad_neighbors_center = grad_neighbors_center.reshape(
+        batch_size, len(neighbors_list[0]) + 1, 3, -1
+    )
+
+    # Get the outputs on the original nodes
+    fields_center_unnormalized = output_total_unnormalized[:, 0, :]
+    grad_center = grad_neighbors_center[:, 0, :, :]
+    grad_grad_uvw_center = ggrad_center[:, :, :9]
+
+    nu = 1.507 * 1e-5
+
+    dict_mapping = {
+        "u": fields_center_unnormalized[:, [0]],
+        "v": fields_center_unnormalized[:, [1]],
+        "w": fields_center_unnormalized[:, [2]],
+        "p": fields_center_unnormalized[:, [3]],
+        "nu": nu + fields_center_unnormalized[:, [4]],
+        "u__x": grad_center[:, 0, [0]],
+        "u__y": grad_center[:, 1, [0]],
+        "u__z": grad_center[:, 2, [0]],
+        "v__x": grad_center[:, 0, [1]],
+        "v__y": grad_center[:, 1, [1]],
+        "v__z": grad_center[:, 2, [1]],
+        "w__x": grad_center[:, 0, [2]],
+        "w__y": grad_center[:, 1, [2]],
+        "w__z": grad_center[:, 2, [2]],
+        "p__x": grad_center[:, 0, [3]],
+        "p__y": grad_center[:, 1, [3]],
+        "p__z": grad_center[:, 2, [3]],
+        "nu__x": grad_center[:, 0, [4]],
+        "nu__y": grad_center[:, 1, [4]],
+        "nu__z": grad_center[:, 2, [4]],
+        "u__x__x": grad_grad_uvw_center[:, 0, [0]],
+        "u__x__y": grad_grad_uvw_center[:, 1, [0]],
+        "u__x__z": grad_grad_uvw_center[:, 2, [0]],
+        "u__y__x": grad_grad_uvw_center[:, 1, [0]],  # same as __x__y
+        "u__y__y": grad_grad_uvw_center[:, 1, [1]],
+        "u__y__z": grad_grad_uvw_center[:, 2, [1]],
+        "u__z__x": grad_grad_uvw_center[:, 2, [0]],  # same as __x__z
+        "u__z__y": grad_grad_uvw_center[:, 2, [1]],  # same as __y__z
+        "u__z__z": grad_grad_uvw_center[:, 2, [2]],
+        "v__x__x": grad_grad_uvw_center[:, 0, [3]],
+        "v__x__y": grad_grad_uvw_center[:, 1, [3]],
+        "v__x__z": grad_grad_uvw_center[:, 2, [3]],
+        "v__y__x": grad_grad_uvw_center[:, 1, [3]],  # same as __x__y
+        "v__y__y": grad_grad_uvw_center[:, 1, [4]],
+        "v__y__z": grad_grad_uvw_center[:, 2, [4]],
+        "v__z__x": grad_grad_uvw_center[:, 2, [3]],  # same as __x__z
+        "v__z__y": grad_grad_uvw_center[:, 2, [4]],  # same as __y__z
+        "v__z__z": grad_grad_uvw_center[:, 2, [5]],
+        "w__x__x": grad_grad_uvw_center[:, 0, [6]],
+        "w__x__y": grad_grad_uvw_center[:, 1, [6]],
+        "w__x__z": grad_grad_uvw_center[:, 2, [6]],
+        "w__y__x": grad_grad_uvw_center[:, 1, [6]],  # same as __x__y
+        "w__y__y": grad_grad_uvw_center[:, 1, [7]],
+        "w__y__z": grad_grad_uvw_center[:, 2, [7]],
+        "w__z__x": grad_grad_uvw_center[:, 2, [6]],  # same as __x__z
+        "w__z__y": grad_grad_uvw_center[:, 2, [7]],  # same as __y__z
+        "w__z__z": grad_grad_uvw_center[:, 2, [8]],
+    }
+    continuity = eqn["continuity"].evaluate(dict_mapping)["continuity"]
+    momentum_x = eqn["momentum_x"].evaluate(dict_mapping)["momentum_x"]
+    momentum_y = eqn["momentum_y"].evaluate(dict_mapping)["momentum_y"]
+    momentum_z = eqn["momentum_z"].evaluate(dict_mapping)["momentum_z"]
+
+    # Compute the weights for the equation residuals
+    weight_continuity = torch.sigmoid(0.5 * (torch.abs(continuity) - 10))
+    weight_momentum_x = torch.sigmoid(0.5 * (torch.abs(momentum_x) - 10))
+    weight_momentum_y = torch.sigmoid(0.5 * (torch.abs(momentum_y) - 10))
+    weight_momentum_z = torch.sigmoid(0.5 * (torch.abs(momentum_z) - 10))
+
+    weighted_continuity = weight_continuity * torch.abs(continuity)
+    weighted_momentum_x = weight_momentum_x * torch.abs(momentum_x)
+    weighted_momentum_y = weight_momentum_y * torch.abs(momentum_y)
+    weighted_momentum_z = weight_momentum_z * torch.abs(momentum_z)
+
+    # Compute data loss
+    num = torch.sum(mask * (output - target) ** 2.0, dims)
+    if loss_type == "rmse":
+        denom = torch.sum(mask * target**2.0, dims)
+    else:
+        denom = torch.sum(mask)
+
+    del coords_total, output_total
+    torch.cuda.empty_cache()
+
+    return (
+        torch.mean(num / denom),
+        torch.mean(torch.abs(weighted_continuity)),
+        torch.mean(torch.abs(weighted_momentum_x)),
+        torch.mean(torch.abs(weighted_momentum_y)),
+        torch.mean(torch.abs(weighted_momentum_z)),
+    )
+
+
+def loss_fn(
+    output: torch.Tensor,
+    target: torch.Tensor,
+    loss_type: Literal["mse", "rmse"],
+    padded_value: float = -10,
+) -> torch.Tensor:
+    """Calculate mean squared error or root mean squared error with masking for padded values.
+
+    Args:
+        output: Predicted values from the model
+        target: Ground truth values
+        loss_type: Type of loss to calculate ("mse" or "rmse")
+        padded_value: Value used for padding in the tensor
+
+    Returns:
+        Calculated loss as a scalar tensor
+    """
+    mask = abs(target - padded_value) > 1e-3
+
+    if loss_type == "rmse":
+        dims = (0, 1)
+    else:
+        dims = None
+
+    num = torch.sum(mask * (output - target) ** 2.0, dims)
+    if loss_type == "rmse":
+        denom = torch.sum(mask * target**2.0, dims)
+        loss = torch.mean(torch.sqrt(num / denom))
+    elif loss_type == "mse":
+        denom = torch.sum(mask)
+        loss = torch.mean(num / denom)
+    else:
+        raise ValueError(f"Invalid loss type: {loss_type}")
+    return loss
+
+
+def loss_fn_with_physics(
+    output: torch.Tensor,
+    target: torch.Tensor,
+    loss_type: Literal["mse", "rmse"],
+    padded_value: float = -10,
+    first_deriv: torch.nn.Module = None,
+    eqn: Any = None,
+    bounding_box: torch.Tensor = None,
+    vol_factors: torch.Tensor = None,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Calculate loss with physics-based terms for appropriate equations.
+
+    Args:
+        output: Predicted values from the model (with neighbor data when physics enabled)
+        target: Ground truth values
+        loss_type: Type of loss to calculate ("mse" or "rmse")
+        padded_value: Value used for padding in the tensor
+        first_deriv: First derivative calculator
+        eqn: Equations
+        bounding_box: Bounding box for normalization
+        vol_factors: Volume factors for normalization
+
+    Returns:
+        Tuple of (data_loss, continuity_loss, momentum_x_loss, momentum_y_loss, momentum_z_loss)
+    """
+    mask = abs(target - padded_value) > 1e-3
+
+    if loss_type == "rmse":
+        dims = (0, 1)
+    else:
+        dims = None
+
+    # Call the physics loss computation function
+    return compute_physics_loss(
+        output=output,
+        target=target,
+        mask=mask,
+        loss_type=loss_type,
+        dims=dims,
+        first_deriv=first_deriv,
+        eqn=eqn,
+        bounding_box=bounding_box,
+        vol_factors=vol_factors,
+    )
+
+
+def loss_fn_surface(
+    output: torch.Tensor, target: torch.Tensor, loss_type: Literal["mse", "rmse"]
+) -> torch.Tensor:
+    """Calculate loss for surface data by handling scalar and vector components separately.
+
+    Args:
+        output: Predicted surface values from the model
+        target: Ground truth surface values
+        loss_type: Type of loss to calculate ("mse" or "rmse")
+
+    Returns:
+        Combined scalar and vector loss as a scalar tensor
+    """
+    # Separate the scalar and vector components:
+    output_scalar, output_vector = torch.split(output, [1, 3], dim=2)
+    target_scalar, target_vector = torch.split(target, [1, 3], dim=2)
+
+    numerator = torch.mean((output_scalar - target_scalar) ** 2.0)
+    vector_diff_sq = torch.mean((target_vector - output_vector) ** 2.0, (0, 1))
+    if loss_type == "mse":
+        masked_loss_pres = numerator
+        masked_loss_ws = torch.sum(vector_diff_sq)
+    else:
+        denom = torch.mean((target_scalar) ** 2.0)
+        masked_loss_pres = numerator / denom
+
+        # Compute the mean diff**2 of the vector component, leave the last dimension:
+        masked_loss_ws_num = vector_diff_sq
+        masked_loss_ws_denom = torch.mean((target_vector) ** 2.0, (0, 1))
+        masked_loss_ws = torch.sum(masked_loss_ws_num / masked_loss_ws_denom)
+
+    loss = masked_loss_pres + masked_loss_ws
+
+    return loss / 4.0
+
+
+def loss_fn_area(
+    output: torch.Tensor,
+    target: torch.Tensor,
+    normals: torch.Tensor,
+    area: torch.Tensor,
+    area_scaling_factor: float,
+    loss_type: Literal["mse", "rmse"],
+) -> torch.Tensor:
+    """Calculate area-weighted loss for surface data considering normal vectors.
+
+    Args:
+        output: Predicted surface values from the model
+        target: Ground truth surface values
+        normals: Normal vectors for the surface
+        area: Area values for surface elements
+        area_scaling_factor: Scaling factor for area weighting
+        loss_type: Type of loss to calculate ("mse" or "rmse")
+
+    Returns:
+        Area-weighted loss as a scalar tensor
+    """
+    area = area * area_scaling_factor
+    area_scale_factor = area
+
+    # Separate the scalar and vector components.
+    target_scalar, target_vector = torch.split(
+        target * area_scale_factor, [1, 3], dim=2
+    )
+    output_scalar, output_vector = torch.split(
+        output * area_scale_factor, [1, 3], dim=2
+    )
+
+    # Apply the normals to the scalar components (only [:,:,0]):
+    normals, _ = torch.split(normals, [1, normals.shape[-1] - 1], dim=2)
+    target_scalar = target_scalar * normals
+    output_scalar = output_scalar * normals
+
+    # Compute the mean diff**2 of the scalar component:
+    masked_loss_pres = torch.mean(((output_scalar - target_scalar) ** 2.0), dim=(0, 1))
+    if loss_type == "rmse":
+        masked_loss_pres /= torch.mean(target_scalar**2.0, dim=(0, 1))
+
+    # Compute the mean diff**2 of the vector component, leave the last dimension:
+    masked_loss_ws = torch.mean((target_vector - output_vector) ** 2.0, (0, 1))
+
+    if loss_type == "rmse":
+        masked_loss_ws /= torch.mean((target_vector) ** 2.0, (0, 1))
+
+    # Combine the scalar and vector components:
+    loss = 0.25 * (masked_loss_pres + torch.sum(masked_loss_ws))
+
+    return loss
+
+
+def integral_loss_fn(
+    output, target, area, normals, stream_velocity=None, padded_value=-10
+):
+    drag_loss = drag_loss_fn(
+        output, target, area, normals, stream_velocity=stream_velocity, padded_value=-10
+    )
+    lift_loss = lift_loss_fn(
+        output, target, area, normals, stream_velocity=stream_velocity, padded_value=-10
+    )
+    return lift_loss + drag_loss
+
+
+def lift_loss_fn(output, target, area, normals, stream_velocity=None, padded_value=-10):
+    vel_inlet = stream_velocity  # Get this from the dataset
+    mask = abs(target - padded_value) > 1e-3
+
+    output_true = target * mask * area * (vel_inlet) ** 2.0
+    output_pred = output * mask * area * (vel_inlet) ** 2.0
+
+    normals = torch.select(normals, 2, 2)
+    # output_true_0 = output_true[:, :, 0]
+    output_true_0 = output_true.select(2, 0)
+    output_pred_0 = output_pred.select(2, 0)
+
+    pres_true = output_true_0 * normals
+    pres_pred = output_pred_0 * normals
+
+    wz_true = output_true[:, :, -1]
+    wz_pred = output_pred[:, :, -1]
+
+    masked_pred = torch.mean(pres_pred + wz_pred, (1))
+    masked_truth = torch.mean(pres_true + wz_true, (1))
+
+    loss = (masked_pred - masked_truth) ** 2.0
+    loss = torch.mean(loss)
+    return loss
+
+
+def drag_loss_fn(output, target, area, normals, stream_velocity=None, padded_value=-10):
+    vel_inlet = stream_velocity  # Get this from the dataset
+    mask = abs(target - padded_value) > 1e-3
+    output_true = target * mask * area * (vel_inlet) ** 2.0
+    output_pred = output * mask * area * (vel_inlet) ** 2.0
+
+    pres_true = output_true[:, :, 0] * normals[:, :, 0]
+    pres_pred = output_pred[:, :, 0] * normals[:, :, 0]
+
+    wx_true = output_true[:, :, 1]
+    wx_pred = output_pred[:, :, 1]
+
+    masked_pred = torch.mean(pres_pred + wx_pred, (1))
+    masked_truth = torch.mean(pres_true + wx_true, (1))
+
+    loss = (masked_pred - masked_truth) ** 2.0
+    loss = torch.mean(loss)
+    return loss
+
+
+def compute_loss_dict(
+    prediction_vol: torch.Tensor,
+    prediction_surf: torch.Tensor,
+    batch_inputs: dict,
+    loss_fn_type: dict,
+    integral_scaling_factor: float,
+    surf_loss_scaling: float,
+    vol_loss_scaling: float,
+    first_deriv: torch.nn.Module | None = None,
+    eqn: Any = None,
+    bounding_box: torch.Tensor | None = None,
+    vol_factors: torch.Tensor | None = None,
+    add_physics_loss: bool = False,
+) -> tuple[torch.Tensor, dict]:
+    """
+    Compute the loss terms in a single function call.
+
+    Computes:
+    - Volume loss if prediction_vol is not None
+    - Surface loss if prediction_surf is not None
+    - Integral loss if prediction_surf is not None
+    - Total loss as a weighted sum of the above
+
+    Returns:
+    - Total loss as a scalar tensor
+    - Dictionary of loss terms (for logging, etc)
+    """
+    nvtx.range_push("Loss Calculation")
+    total_loss_terms = []
+    loss_dict = {}
+
+    if prediction_vol is not None:
+        target_vol = batch_inputs["volume_fields"]
+
+        if add_physics_loss:
+            loss_vol = loss_fn_with_physics(
+                prediction_vol,
+                target_vol,
+                loss_fn_type.loss_type,
+                padded_value=-10,
+                first_deriv=first_deriv,
+                eqn=eqn,
+                bounding_box=bounding_box,
+                vol_factors=vol_factors,
+            )
+            loss_dict["loss_vol"] = loss_vol[0]
+            loss_dict["loss_continuity"] = loss_vol[1]
+            loss_dict["loss_momentum_x"] = loss_vol[2]
+            loss_dict["loss_momentum_y"] = loss_vol[3]
+            loss_dict["loss_momentum_z"] = loss_vol[4]
+            total_loss_terms.append(loss_vol[0])
+            total_loss_terms.append(loss_vol[1])
+            total_loss_terms.append(loss_vol[2])
+            total_loss_terms.append(loss_vol[3])
+            total_loss_terms.append(loss_vol[4])
+        else:
+            loss_vol = loss_fn(
+                prediction_vol,
+                target_vol,
+                loss_fn_type.loss_type,
+                padded_value=-10,
+            )
+            loss_dict["loss_vol"] = loss_vol
+            total_loss_terms.append(loss_vol)
+
+    if prediction_surf is not None:
+        target_surf = batch_inputs["surface_fields"]
+        surface_areas = batch_inputs["surface_areas"]
+        surface_areas = torch.unsqueeze(surface_areas, -1)
+        surface_normals = batch_inputs["surface_normals"]
+
+        # Needs to be taken from the dataset
+        stream_velocity = batch_inputs["global_params_values"][:, 0, :]
+
+        loss_surf = loss_fn_surface(
+            prediction_surf,
+            target_surf,
+            loss_fn_type.loss_type,
+        )
+
+        loss_surf_area = loss_fn_area(
+            prediction_surf,
+            target_surf,
+            surface_normals,
+            surface_areas,
+            area_scaling_factor=loss_fn_type.area_weighing_factor,
+            loss_type=loss_fn_type.loss_type,
+        )
+
+        if loss_fn_type.loss_type == "mse":
+            loss_surf = loss_surf * surf_loss_scaling
+            loss_surf_area = loss_surf_area * surf_loss_scaling
+
+        total_loss_terms.append(loss_surf)
+        loss_dict["loss_surf"] = loss_surf
+        total_loss_terms.append(loss_surf_area)
+        loss_dict["loss_surf_area"] = loss_surf_area
+        loss_integral = (
+            integral_loss_fn(
+                prediction_surf,
+                target_surf,
+                surface_areas,
+                surface_normals,
+                stream_velocity,
+                padded_value=-10,
+            )
+        ) * integral_scaling_factor
+        loss_dict["loss_integral"] = loss_integral
+        total_loss_terms.append(loss_integral)
+
+    total_loss = sum(total_loss_terms)
+    loss_dict["total_loss"] = total_loss
+    nvtx.range_pop()
+
+    return total_loss, loss_dict
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 3311083e04..7882e2d006 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -76,527 +76,8 @@
 # Profiler().enable("torch")
 # Profiler().initialize()
 
-
-def compute_physics_loss(
-    output: torch.Tensor,
-    target: torch.Tensor,
-    mask: torch.Tensor,
-    loss_type: Literal["mse", "rmse"],
-    dims: tuple[int, ...] | None,
-    first_deriv: torch.nn.Module,
-    eqn: Any,
-    bounding_box: torch.Tensor,
-    vol_factors: torch.Tensor,
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    """Compute physics-based loss terms for Navier-Stokes equations.
-
-    Args:
-        output: Model output containing (output, coords_neighbors, output_neighbors, neighbors_list)
-        target: Ground truth values
-        mask: Mask for valid values
-        loss_type: Type of loss to calculate ("mse" or "rmse")
-        dims: Dimensions for loss calculation
-        first_deriv: First derivative calculator
-        eqn: Equations
-        bounding_box: Bounding box for normalization
-        vol_factors: Volume factors for normalization
-
-    Returns:
-        Tuple of (data_loss, continuity_loss, momentum_x_loss, momentum_y_loss, momentum_z_loss)
-    """
-    # Physics loss enabled
-    output, coords_neighbors, output_neighbors, neighbors_list = output
-    batch_size = output.shape[1]
-    fields, num_neighbors = output_neighbors.shape[3], output_neighbors.shape[2]
-    coords_total = coords_neighbors[0, :]
-    output_total = output_neighbors[0, :]
-    output_total_unnormalized = unnormalize(
-        output_total, vol_factors[0], vol_factors[1]
-    )
-    coords_total_unnormalized = unnormalize(
-        coords_total, bounding_box[0], bounding_box[1]
-    )
-
-    # compute first order gradients on all the nodes from the neighbors_list
-    grad_list = {}
-    for parent_id, neighbor_ids in neighbors_list.items():
-        neighbor_ids_tensor = torch.tensor(neighbor_ids).to(
-            output_total_unnormalized.device
-        )
-        du = (
-            output_total_unnormalized[:, [parent_id]]
-            - output_total_unnormalized[:, neighbor_ids_tensor]
-        )
-        dv = (
-            coords_total_unnormalized[:, [parent_id]]
-            - coords_total_unnormalized[:, neighbor_ids_tensor]
-        )
-        grads = first_deriv.forward(
-            coords=None, connectivity_tensor=None, y=None, du=du, dv=dv
-        )
-        grad = torch.cat(grads, dim=1)
-        grad_list[parent_id] = grad
-
-    # compute second order gradients on only the center node
-    neighbor_ids_tensor = torch.tensor(neighbors_list[0]).to(
-        output_total_unnormalized.device
-    )
-    grad_neighbors_center = torch.stack([v for v in grad_list.values()], dim=1)
-    grad_neighbors_center = grad_neighbors_center.reshape(
-        batch_size, len(neighbors_list[0]) + 1, -1
-    )
-
-    du = grad_neighbors_center[:, [0]] - grad_neighbors_center[:, neighbor_ids_tensor]
-    dv = (
-        coords_total_unnormalized[:, [0]]
-        - coords_total_unnormalized[:, neighbor_ids_tensor]
-    )
-
-    # second order gradients
-    ggrads_center = first_deriv.forward(
-        coords=None, connectivity_tensor=None, y=None, du=du, dv=dv
-    )
-    ggrad_center = torch.cat(ggrads_center, dim=1)
-    grad_neighbors_center = grad_neighbors_center.reshape(
-        batch_size, len(neighbors_list[0]) + 1, 3, -1
-    )
-
-    # Get the outputs on the original nodes
-    fields_center_unnormalized = output_total_unnormalized[:, 0, :]
-    grad_center = grad_neighbors_center[:, 0, :, :]
-    grad_grad_uvw_center = ggrad_center[:, :, :9]
-
-    nu = 1.507 * 1e-5
-
-    dict_mapping = {
-        "u": fields_center_unnormalized[:, [0]],
-        "v": fields_center_unnormalized[:, [1]],
-        "w": fields_center_unnormalized[:, [2]],
-        "p": fields_center_unnormalized[:, [3]],
-        "nu": nu + fields_center_unnormalized[:, [4]],
-        "u__x": grad_center[:, 0, [0]],
-        "u__y": grad_center[:, 1, [0]],
-        "u__z": grad_center[:, 2, [0]],
-        "v__x": grad_center[:, 0, [1]],
-        "v__y": grad_center[:, 1, [1]],
-        "v__z": grad_center[:, 2, [1]],
-        "w__x": grad_center[:, 0, [2]],
-        "w__y": grad_center[:, 1, [2]],
-        "w__z": grad_center[:, 2, [2]],
-        "p__x": grad_center[:, 0, [3]],
-        "p__y": grad_center[:, 1, [3]],
-        "p__z": grad_center[:, 2, [3]],
-        "nu__x": grad_center[:, 0, [4]],
-        "nu__y": grad_center[:, 1, [4]],
-        "nu__z": grad_center[:, 2, [4]],
-        "u__x__x": grad_grad_uvw_center[:, 0, [0]],
-        "u__x__y": grad_grad_uvw_center[:, 1, [0]],
-        "u__x__z": grad_grad_uvw_center[:, 2, [0]],
-        "u__y__x": grad_grad_uvw_center[:, 1, [0]],  # same as __x__y
-        "u__y__y": grad_grad_uvw_center[:, 1, [1]],
-        "u__y__z": grad_grad_uvw_center[:, 2, [1]],
-        "u__z__x": grad_grad_uvw_center[:, 2, [0]],  # same as __x__z
-        "u__z__y": grad_grad_uvw_center[:, 2, [1]],  # same as __y__z
-        "u__z__z": grad_grad_uvw_center[:, 2, [2]],
-        "v__x__x": grad_grad_uvw_center[:, 0, [3]],
-        "v__x__y": grad_grad_uvw_center[:, 1, [3]],
-        "v__x__z": grad_grad_uvw_center[:, 2, [3]],
-        "v__y__x": grad_grad_uvw_center[:, 1, [3]],  # same as __x__y
-        "v__y__y": grad_grad_uvw_center[:, 1, [4]],
-        "v__y__z": grad_grad_uvw_center[:, 2, [4]],
-        "v__z__x": grad_grad_uvw_center[:, 2, [3]],  # same as __x__z
-        "v__z__y": grad_grad_uvw_center[:, 2, [4]],  # same as __y__z
-        "v__z__z": grad_grad_uvw_center[:, 2, [5]],
-        "w__x__x": grad_grad_uvw_center[:, 0, [6]],
-        "w__x__y": grad_grad_uvw_center[:, 1, [6]],
-        "w__x__z": grad_grad_uvw_center[:, 2, [6]],
-        "w__y__x": grad_grad_uvw_center[:, 1, [6]],  # same as __x__y
-        "w__y__y": grad_grad_uvw_center[:, 1, [7]],
-        "w__y__z": grad_grad_uvw_center[:, 2, [7]],
-        "w__z__x": grad_grad_uvw_center[:, 2, [6]],  # same as __x__z
-        "w__z__y": grad_grad_uvw_center[:, 2, [7]],  # same as __y__z
-        "w__z__z": grad_grad_uvw_center[:, 2, [8]],
-    }
-    continuity = eqn["continuity"].evaluate(dict_mapping)["continuity"]
-    momentum_x = eqn["momentum_x"].evaluate(dict_mapping)["momentum_x"]
-    momentum_y = eqn["momentum_y"].evaluate(dict_mapping)["momentum_y"]
-    momentum_z = eqn["momentum_z"].evaluate(dict_mapping)["momentum_z"]
-
-    # Compute the weights for the equation residuals
-    weight_continuity = torch.sigmoid(0.5 * (torch.abs(continuity) - 10))
-    weight_momentum_x = torch.sigmoid(0.5 * (torch.abs(momentum_x) - 10))
-    weight_momentum_y = torch.sigmoid(0.5 * (torch.abs(momentum_y) - 10))
-    weight_momentum_z = torch.sigmoid(0.5 * (torch.abs(momentum_z) - 10))
-
-    weighted_continuity = weight_continuity * torch.abs(continuity)
-    weighted_momentum_x = weight_momentum_x * torch.abs(momentum_x)
-    weighted_momentum_y = weight_momentum_y * torch.abs(momentum_y)
-    weighted_momentum_z = weight_momentum_z * torch.abs(momentum_z)
-
-    # Compute data loss
-    num = torch.sum(mask * (output - target) ** 2.0, dims)
-    if loss_type == "rmse":
-        denom = torch.sum(mask * target**2.0, dims)
-    else:
-        denom = torch.sum(mask)
-
-    del coords_total, output_total
-    torch.cuda.empty_cache()
-
-    return (
-        torch.mean(num / denom),
-        torch.mean(torch.abs(weighted_continuity)),
-        torch.mean(torch.abs(weighted_momentum_x)),
-        torch.mean(torch.abs(weighted_momentum_y)),
-        torch.mean(torch.abs(weighted_momentum_z)),
-    )
-
-
-def loss_fn(
-    output: torch.Tensor,
-    target: torch.Tensor,
-    loss_type: Literal["mse", "rmse"],
-    padded_value: float = -10,
-) -> torch.Tensor:
-    """Calculate mean squared error or root mean squared error with masking for padded values.
-
-    Args:
-        output: Predicted values from the model
-        target: Ground truth values
-        loss_type: Type of loss to calculate ("mse" or "rmse")
-        padded_value: Value used for padding in the tensor
-
-    Returns:
-        Calculated loss as a scalar tensor
-    """
-    mask = abs(target - padded_value) > 1e-3
-
-    if loss_type == "rmse":
-        dims = (0, 1)
-    else:
-        dims = None
-
-    num = torch.sum(mask * (output - target) ** 2.0, dims)
-    if loss_type == "rmse":
-        denom = torch.sum(mask * target**2.0, dims)
-        loss = torch.mean(torch.sqrt(num / denom))
-    elif loss_type == "mse":
-        denom = torch.sum(mask)
-        loss = torch.mean(num / denom)
-    else:
-        raise ValueError(f"Invalid loss type: {loss_type}")
-    return loss
-
-
-def loss_fn_with_physics(
-    output: torch.Tensor,
-    target: torch.Tensor,
-    loss_type: Literal["mse", "rmse"],
-    padded_value: float = -10,
-    first_deriv: torch.nn.Module = None,
-    eqn: Any = None,
-    bounding_box: torch.Tensor = None,
-    vol_factors: torch.Tensor = None,
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    """Calculate loss with physics-based terms for appropriate equations.
-
-    Args:
-        output: Predicted values from the model (with neighbor data when physics enabled)
-        target: Ground truth values
-        loss_type: Type of loss to calculate ("mse" or "rmse")
-        padded_value: Value used for padding in the tensor
-        first_deriv: First derivative calculator
-        eqn: Equations
-        bounding_box: Bounding box for normalization
-        vol_factors: Volume factors for normalization
-
-    Returns:
-        Tuple of (data_loss, continuity_loss, momentum_x_loss, momentum_y_loss, momentum_z_loss)
-    """
-    mask = abs(target - padded_value) > 1e-3
-
-    if loss_type == "rmse":
-        dims = (0, 1)
-    else:
-        dims = None
-
-    # Call the physics loss computation function
-    return compute_physics_loss(
-        output=output,
-        target=target,
-        mask=mask,
-        loss_type=loss_type,
-        dims=dims,
-        first_deriv=first_deriv,
-        eqn=eqn,
-        bounding_box=bounding_box,
-        vol_factors=vol_factors,
-    )
-
-
-def loss_fn_surface(
-    output: torch.Tensor, target: torch.Tensor, loss_type: Literal["mse", "rmse"]
-) -> torch.Tensor:
-    """Calculate loss for surface data by handling scalar and vector components separately.
-
-    Args:
-        output: Predicted surface values from the model
-        target: Ground truth surface values
-        loss_type: Type of loss to calculate ("mse" or "rmse")
-
-    Returns:
-        Combined scalar and vector loss as a scalar tensor
-    """
-    # Separate the scalar and vector components:
-    output_scalar, output_vector = torch.split(output, [1, 3], dim=2)
-    target_scalar, target_vector = torch.split(target, [1, 3], dim=2)
-
-    numerator = torch.mean((output_scalar - target_scalar) ** 2.0)
-    vector_diff_sq = torch.mean((target_vector - output_vector) ** 2.0, (0, 1))
-    if loss_type == "mse":
-        masked_loss_pres = numerator
-        masked_loss_ws = torch.sum(vector_diff_sq)
-    else:
-        denom = torch.mean((target_scalar) ** 2.0)
-        masked_loss_pres = numerator / denom
-
-        # Compute the mean diff**2 of the vector component, leave the last dimension:
-        masked_loss_ws_num = vector_diff_sq
-        masked_loss_ws_denom = torch.mean((target_vector) ** 2.0, (0, 1))
-        masked_loss_ws = torch.sum(masked_loss_ws_num / masked_loss_ws_denom)
-
-    loss = masked_loss_pres + masked_loss_ws
-
-    return loss / 4.0
-
-
-def loss_fn_area(
-    output: torch.Tensor,
-    target: torch.Tensor,
-    normals: torch.Tensor,
-    area: torch.Tensor,
-    area_scaling_factor: float,
-    loss_type: Literal["mse", "rmse"],
-) -> torch.Tensor:
-    """Calculate area-weighted loss for surface data considering normal vectors.
-
-    Args:
-        output: Predicted surface values from the model
-        target: Ground truth surface values
-        normals: Normal vectors for the surface
-        area: Area values for surface elements
-        area_scaling_factor: Scaling factor for area weighting
-        loss_type: Type of loss to calculate ("mse" or "rmse")
-
-    Returns:
-        Area-weighted loss as a scalar tensor
-    """
-    area = area * area_scaling_factor
-    area_scale_factor = area
-
-    # Separate the scalar and vector components.
-    target_scalar, target_vector = torch.split(
-        target * area_scale_factor, [1, 3], dim=2
-    )
-    output_scalar, output_vector = torch.split(
-        output * area_scale_factor, [1, 3], dim=2
-    )
-
-    # Apply the normals to the scalar components (only [:,:,0]):
-    normals, _ = torch.split(normals, [1, normals.shape[-1] - 1], dim=2)
-    target_scalar = target_scalar * normals
-    output_scalar = output_scalar * normals
-
-    # Compute the mean diff**2 of the scalar component:
-    masked_loss_pres = torch.mean(((output_scalar - target_scalar) ** 2.0), dim=(0, 1))
-    if loss_type == "rmse":
-        masked_loss_pres /= torch.mean(target_scalar**2.0, dim=(0, 1))
-
-    # Compute the mean diff**2 of the vector component, leave the last dimension:
-    masked_loss_ws = torch.mean((target_vector - output_vector) ** 2.0, (0, 1))
-
-    if loss_type == "rmse":
-        masked_loss_ws /= torch.mean((target_vector) ** 2.0, (0, 1))
-
-    # Combine the scalar and vector components:
-    loss = 0.25 * (masked_loss_pres + torch.sum(masked_loss_ws))
-
-    return loss
-
-
-def integral_loss_fn(
-    output, target, area, normals, stream_velocity=None, padded_value=-10
-):
-    drag_loss = drag_loss_fn(
-        output, target, area, normals, stream_velocity=stream_velocity, padded_value=-10
-    )
-    lift_loss = lift_loss_fn(
-        output, target, area, normals, stream_velocity=stream_velocity, padded_value=-10
-    )
-    return lift_loss + drag_loss
-
-
-def lift_loss_fn(output, target, area, normals, stream_velocity=None, padded_value=-10):
-    vel_inlet = stream_velocity  # Get this from the dataset
-    mask = abs(target - padded_value) > 1e-3
-
-    output_true = target * mask * area * (vel_inlet) ** 2.0
-    output_pred = output * mask * area * (vel_inlet) ** 2.0
-
-    normals = torch.select(normals, 2, 2)
-    # output_true_0 = output_true[:, :, 0]
-    output_true_0 = output_true.select(2, 0)
-    output_pred_0 = output_pred.select(2, 0)
-
-    pres_true = output_true_0 * normals
-    pres_pred = output_pred_0 * normals
-
-    wz_true = output_true[:, :, -1]
-    wz_pred = output_pred[:, :, -1]
-
-    masked_pred = torch.mean(pres_pred + wz_pred, (1))
-    masked_truth = torch.mean(pres_true + wz_true, (1))
-
-    loss = (masked_pred - masked_truth) ** 2.0
-    loss = torch.mean(loss)
-    return loss
-
-
-def drag_loss_fn(output, target, area, normals, stream_velocity=None, padded_value=-10):
-    vel_inlet = stream_velocity  # Get this from the dataset
-    mask = abs(target - padded_value) > 1e-3
-    output_true = target * mask * area * (vel_inlet) ** 2.0
-    output_pred = output * mask * area * (vel_inlet) ** 2.0
-
-    pres_true = output_true[:, :, 0] * normals[:, :, 0]
-    pres_pred = output_pred[:, :, 0] * normals[:, :, 0]
-
-    wx_true = output_true[:, :, 1]
-    wx_pred = output_pred[:, :, 1]
-
-    masked_pred = torch.mean(pres_pred + wx_pred, (1))
-    masked_truth = torch.mean(pres_true + wx_true, (1))
-
-    loss = (masked_pred - masked_truth) ** 2.0
-    loss = torch.mean(loss)
-    return loss
-
-
-def compute_loss_dict(
-    prediction_vol: torch.Tensor,
-    prediction_surf: torch.Tensor,
-    batch_inputs: dict,
-    loss_fn_type: dict,
-    integral_scaling_factor: float,
-    surf_loss_scaling: float,
-    vol_loss_scaling: float,
-    first_deriv: torch.nn.Module | None = None,
-    eqn: Any = None,
-    bounding_box: torch.Tensor | None = None,
-    vol_factors: torch.Tensor | None = None,
-    add_physics_loss: bool = False,
-) -> tuple[torch.Tensor, dict]:
-    """
-    Compute the loss terms in a single function call.
-
-    Computes:
-    - Volume loss if prediction_vol is not None
-    - Surface loss if prediction_surf is not None
-    - Integral loss if prediction_surf is not None
-    - Total loss as a weighted sum of the above
-
-    Returns:
-    - Total loss as a scalar tensor
-    - Dictionary of loss terms (for logging, etc)
-    """
-    nvtx.range_push("Loss Calculation")
-    total_loss_terms = []
-    loss_dict = {}
-
-    if prediction_vol is not None:
-        target_vol = batch_inputs["volume_fields"]
-
-        if add_physics_loss:
-            loss_vol = loss_fn_with_physics(
-                prediction_vol,
-                target_vol,
-                loss_fn_type.loss_type,
-                padded_value=-10,
-                first_deriv=first_deriv,
-                eqn=eqn,
-                bounding_box=bounding_box,
-                vol_factors=vol_factors,
-            )
-            loss_dict["loss_vol"] = loss_vol[0]
-            loss_dict["loss_continuity"] = loss_vol[1]
-            loss_dict["loss_momentum_x"] = loss_vol[2]
-            loss_dict["loss_momentum_y"] = loss_vol[3]
-            loss_dict["loss_momentum_z"] = loss_vol[4]
-            total_loss_terms.append(loss_vol[0])
-            total_loss_terms.append(loss_vol[1])
-            total_loss_terms.append(loss_vol[2])
-            total_loss_terms.append(loss_vol[3])
-            total_loss_terms.append(loss_vol[4])
-        else:
-            loss_vol = loss_fn(
-                prediction_vol,
-                target_vol,
-                loss_fn_type.loss_type,
-                padded_value=-10,
-            )
-            loss_dict["loss_vol"] = loss_vol
-            total_loss_terms.append(loss_vol)
-
-    if prediction_surf is not None:
-        target_surf = batch_inputs["surface_fields"]
-        surface_areas = batch_inputs["surface_areas"]
-        surface_areas = torch.unsqueeze(surface_areas, -1)
-        surface_normals = batch_inputs["surface_normals"]
-
-        # Needs to be taken from the dataset
-        stream_velocity = batch_inputs["global_params_values"][:, 0, :]
-
-        loss_surf = loss_fn_surface(
-            prediction_surf,
-            target_surf,
-            loss_fn_type.loss_type,
-        )
-
-        loss_surf_area = loss_fn_area(
-            prediction_surf,
-            target_surf,
-            surface_normals,
-            surface_areas,
-            area_scaling_factor=loss_fn_type.area_weighing_factor,
-            loss_type=loss_fn_type.loss_type,
-        )
-
-        if loss_fn_type.loss_type == "mse":
-            loss_surf = loss_surf * surf_loss_scaling
-            loss_surf_area = loss_surf_area * surf_loss_scaling
-
-        total_loss_terms.append(loss_surf)
-        loss_dict["loss_surf"] = loss_surf
-        total_loss_terms.append(loss_surf_area)
-        loss_dict["loss_surf_area"] = loss_surf_area
-        loss_integral = (
-            integral_loss_fn(
-                prediction_surf,
-                target_surf,
-                surface_areas,
-                surface_normals,
-                stream_velocity,
-                padded_value=-10,
-            )
-        ) * integral_scaling_factor
-        loss_dict["loss_integral"] = loss_integral
-        total_loss_terms.append(loss_integral)
-
-    total_loss = sum(total_loss_terms)
-    loss_dict["total_loss"] = total_loss
-    nvtx.range_pop()
-
-    return total_loss, loss_dict
+from loss import compute_loss_dict
+from utils import get_num_vars
 
 
 def validation_step(
@@ -763,71 +244,31 @@ def train_epoch(
 
 @hydra.main(version_base="1.3", config_path="conf", config_name="config")
 def main(cfg: DictConfig) -> None:
+    ################################
     # initialize distributed manager
+    ################################
     DistributedManager.initialize()
     dist = DistributedManager()
 
+    ################################
     # Initialize NVML
+    ################################
     nvmlInit()
-
     gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index)
 
-    compute_scaling_factors(
-        cfg=cfg,
-        input_path=cfg.data.input_dir,
-        use_cache=cfg.data_processor.use_cache,
-    )
-    model_type = cfg.model.model_type
+    ################################
+    # Initialize logger
+    ################################
 
     logger = PythonLogger("Train")
     logger = RankZeroLoggingWrapper(logger, dist)
 
     logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
 
-    # Get physics imports conditionally
-    add_physics_loss = getattr(cfg.train, "add_physics_loss", False)
-
-    if add_physics_loss:
-        from physicsnemo.sym.eq.pde import PDE
-        from physicsnemo.sym.eq.ls.grads import FirstDeriv
-        from physicsnemo.sym.eq.pdes.navier_stokes import IncompressibleNavierStokes
-    else:
-        PDE = FirstDeriv = IncompressibleNavierStokes = None
-
-    num_vol_vars = 0
-    volume_variable_names = []
-    if model_type == "volume" or model_type == "combined":
-        volume_variable_names = list(cfg.variables.volume.solution.keys())
-        for j in volume_variable_names:
-            if cfg.variables.volume.solution[j] == "vector":
-                num_vol_vars += 3
-            else:
-                num_vol_vars += 1
-    else:
-        num_vol_vars = None
-
-    num_surf_vars = 0
-    surface_variable_names = []
-    if model_type == "surface" or model_type == "combined":
-        surface_variable_names = list(cfg.variables.surface.solution.keys())
-        num_surf_vars = 0
-        for j in surface_variable_names:
-            if cfg.variables.surface.solution[j] == "vector":
-                num_surf_vars += 3
-            else:
-                num_surf_vars += 1
-    else:
-        num_surf_vars = None
-
-    num_global_features = 0
-    global_params_names = list(cfg.variables.global_parameters.keys())
-    for param in global_params_names:
-        if cfg.variables.global_parameters[param].type == "vector":
-            num_global_features += len(cfg.variables.global_parameters[param].reference)
-        elif cfg.variables.global_parameters[param].type == "scalar":
-            num_global_features += 1
-        else:
-            raise ValueError(f"Unknown global parameter type")
+    ################################
+    # Get or compute scaling and normalization factors
+    # min/max/mean/std of input points + targets
+    ################################
 
     vol_save_path = os.path.join(
         "outputs", cfg.project.name, "volume_scaling_factors.npy"
@@ -844,6 +285,36 @@ def main(cfg: DictConfig) -> None:
         vol_factors = None
         vol_factors_tensor = None
 
+    scaling_factors = compute_scaling_factors(
+        cfg=cfg,
+        input_path=cfg.data.input_dir,
+        use_cache=cfg.data_processor.use_cache,
+    )
+
+    model_type = cfg.model.model_type
+
+    # Get physics imports conditionally
+    add_physics_loss = getattr(cfg.train, "add_physics_loss", False)
+
+    if add_physics_loss:
+        from physicsnemo.sym.eq.pde import PDE
+        from physicsnemo.sym.eq.ls.grads import FirstDeriv
+        from physicsnemo.sym.eq.pdes.navier_stokes import IncompressibleNavierStokes
+    else:
+        PDE = FirstDeriv = IncompressibleNavierStokes = None
+
+    num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type)
+
+    if model_type == "combined" or model_type == "surface":
+        surface_variable_names = list(cfg.variables.surface.solution.keys())
+    else:
+        surface_variable_names = []
+
+    if model_type == "combined" or model_type == "volume":
+        volume_variable_names = list(cfg.variables.volume.solution.keys())
+    else:
+        volume_variable_names = []
+
     bounding_box = None
     if add_physics_loss:
         bounding_box = cfg.data.bounding_box
@@ -891,16 +362,16 @@ def main(cfg: DictConfig) -> None:
         **cfg.val.sampler,
     )
 
-    train_dataloader = DataLoader(
-        train_dataset,
-        sampler=train_sampler,
-        **cfg.train.dataloader,
-    )
-    val_dataloader = DataLoader(
-        val_dataset,
-        sampler=val_sampler,
-        **cfg.val.dataloader,
-    )
+    # train_dataloader = DataLoader(
+    #     train_dataset,
+    #     sampler=train_sampler,
+    #     **cfg.train.dataloader,
+    # )
+    # val_dataloader = DataLoader(
+    #     val_dataset,
+    #     sampler=val_sampler,
+    #     **cfg.val.dataloader,
+    # )
 
     model = DoMINO(
         input_features=3,
@@ -1001,6 +472,9 @@ def main(cfg: DictConfig) -> None:
         else:
             surface_scaling_loss = cfg.model.surf_loss_scaling
 
+        train_dataset.set_indices(list(train_sampler))
+        print(f"train_dataset.indices: {train_dataset.indices}")
+
         model.train(True)
         epoch_start_time = time.perf_counter()
         avg_loss = train_epoch(
diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py
new file mode 100644
index 0000000000..abfc4d7351
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/domino/src/utils.py
@@ -0,0 +1,74 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"]):
+    """Calculate the number of variables for volume, surface, and global features.
+
+    This function analyzes the configuration to determine how many variables are needed
+    for different mesh data types based on the model type. Vector variables contribute
+    3 components (x, y, z) while scalar variables contribute 1 component each.
+
+    Args:
+        cfg: Configuration object containing variable definitions for volume, surface,
+             and global parameters with their types (scalar/vector).
+        model_type (str): Type of model - can be "volume", "surface", or "combined".
+                         Determines which variable types are included in the count.
+
+    Returns:
+        tuple: A 3-tuple containing:
+            - num_vol_vars (int or None): Number of volume variables. None if model_type
+              is not "volume" or "combined".
+            - num_surf_vars (int or None): Number of surface variables. None if model_type
+              is not "surface" or "combined".
+            - num_global_features (int): Number of global parameter features.
+    """
+    num_vol_vars = 0
+    volume_variable_names = []
+    if model_type == "volume" or model_type == "combined":
+        volume_variable_names = list(cfg.variables.volume.solution.keys())
+        for j in volume_variable_names:
+            if cfg.variables.volume.solution[j] == "vector":
+                num_vol_vars += 3
+            else:
+                num_vol_vars += 1
+    else:
+        num_vol_vars = None
+
+    num_surf_vars = 0
+    surface_variable_names = []
+    if model_type == "surface" or model_type == "combined":
+        surface_variable_names = list(cfg.variables.surface.solution.keys())
+        num_surf_vars = 0
+        for j in surface_variable_names:
+            if cfg.variables.surface.solution[j] == "vector":
+                num_surf_vars += 3
+            else:
+                num_surf_vars += 1
+    else:
+        num_surf_vars = None
+
+    num_global_features = 0
+    global_params_names = list(cfg.variables.global_parameters.keys())
+    for param in global_params_names:
+        if cfg.variables.global_parameters[param].type == "vector":
+            num_global_features += len(cfg.variables.global_parameters[param].reference)
+        elif cfg.variables.global_parameters[param].type == "scalar":
+            num_global_features += 1
+        else:
+            raise ValueError(f"Unknown global parameter type")
+
+    return num_vol_vars, num_surf_vars, num_global_features
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index d74d450fed..7bf4abb6c4 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -864,7 +864,8 @@ def __next__(self):
         current_idx = self.i
 
         # Start loading two ahead:
-        if len(self.dataset) >= current_idx + 2:
+        N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
+        if N >= current_idx + 2:
             self.dataset.preload(self.idx_to_index(current_idx + 1))
             self.dataset.preload(self.idx_to_index(current_idx + 2))
 
@@ -881,10 +882,12 @@ def __iter__(self):
 
         self.i = 0
 
+        N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
+
         # Trigger the dataset to start loading index 0:
-        if len(self.dataset) >= 1:
+        if N >= 1:
             self.dataset.preload(self.idx_to_index(self.i))
-        if len(self.dataset) >= 2:
+        if N >= 2:
             self.dataset.preload(self.idx_to_index(self.i + 1))
 
         return self
@@ -893,15 +896,23 @@ def __iter__(self):
 def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None:
     # Create a dataset for just the field keys:
 
+    norm_keys = [
+        "volume_fields",
+        "surface_fields",
+        "stl_centers",
+        "volume_mesh_centers",
+        "surface_mesh_centers",
+    ]
+
     dataset = DrivaerMLDataset(
         data_dir=input_path,
-        keys_to_read=["volume_fields", "surface_fields"],
+        keys_to_read=norm_keys,
         output_device=torch.device("cuda"),  # TODO - configure this more carefully here
     )
 
     mean, std, min_val, max_val = compute_mean_std_min_max(
         dataset,
-        field_keys=["volume_fields", "surface_fields"],
+        field_keys=norm_keys,
     )
 
     return mean, std, min_val, max_val

From 02b03a0272252e968ddf54d2d79205aff5b9f29a Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 5 Sep 2025 21:22:45 +0000
Subject: [PATCH 17/98] Most datapipe tests passing.  Add compute_statistics
 script.  Clean up training script a little, simply by moving things around
 ...

---
 .../domino/src/compute_statistics.py          | 163 ++++++++++++++++++
 .../external_aerodynamics/domino/src/train.py |  43 ++---
 .../external_aerodynamics/domino/src/utils.py |  88 ++++++++++
 physicsnemo/datapipes/cae/domino_datapipe2.py |  78 +++++----
 .../datapipes/cae/drivaer_ml_dataset.py       |  35 +++-
 5 files changed, 347 insertions(+), 60 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/domino/src/compute_statistics.py

diff --git a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
new file mode 100644
index 0000000000..5c9ef21f04
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
@@ -0,0 +1,163 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Compute and save scaling factors for DoMINO datasets.
+
+This script computes mean, standard deviation, minimum, and maximum values
+for all field variables in a DoMINO dataset. The computed statistics are
+saved in a structured format that can be easily loaded and used for
+normalization during training and inference.
+
+The script uses the same configuration system as the training script,
+ensuring consistency in dataset handling and processing parameters.
+"""
+
+import os
+import time
+from pathlib import Path
+
+import hydra
+import torch
+from omegaconf import DictConfig, OmegaConf
+
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
+
+from physicsnemo.datapipes.cae.domino_datapipe2 import compute_scaling_factors
+from utils import ScalingFactors
+
+
+@hydra.main(version_base="1.3", config_path="conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+    """
+    Main function to compute and save scaling factors.
+
+    Args:
+        cfg: Hydra configuration object containing all parameters
+    """
+    ################################
+    # Initialize distributed manager
+    ################################
+    DistributedManager.initialize()
+    dist = DistributedManager()
+
+    ################################
+    # Initialize logger
+    ################################
+    logger = PythonLogger("ComputeStatistics")
+    logger = RankZeroLoggingWrapper(logger, dist)
+
+    logger.info("Starting scaling factors computation")
+    logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
+
+    ################################
+    # Create output directory
+    ################################
+    output_dir = os.path.join(cfg.output, "scaling_factors")
+    os.makedirs(output_dir, exist_ok=True)
+
+    if dist.world_size > 1:
+        torch.distributed.barrier()
+
+    ################################
+    # Check if scaling exists
+    ################################
+    pickle_path = output_dir + "/scaling_factors.pkl"
+
+    try:
+        scaling_factors = ScalingFactors.load(pickle_path)
+        logger.info(f"Scaling factors loaded from: {pickle_path}")
+    except FileNotFoundError:
+        logger.info(f"Scaling factors not found at: {pickle_path}; recomputing.")
+        scaling_factors = None
+
+    ################################
+    # Compute scaling factors
+    ################################
+    if scaling_factors is None:
+        logger.info("Computing scaling factors from dataset...")
+        start_time = time.perf_counter()
+
+        target_keys = [
+            "volume_fields",
+            "surface_fields",
+            "stl_centers",
+            "volume_mesh_centers",
+            "surface_mesh_centers",
+        ]
+
+        mean, std, min_val, max_val = compute_scaling_factors(
+            cfg=cfg,
+            input_path=cfg.data.input_dir,
+            target_keys=target_keys,
+        )
+        mean = {k: m.cpu().numpy() for k, m in mean.items()}
+        std = {k: s.cpu().numpy() for k, s in std.items()}
+        min_val = {k: m.cpu().numpy() for k, m in min_val.items()}
+        max_val = {k: m.cpu().numpy() for k, m in max_val.items()}
+
+        compute_time = time.perf_counter() - start_time
+        logger.info(
+            f"Scaling factors computation completed in {compute_time:.2f} seconds"
+        )
+
+        ################################
+        # Create structured data object
+        ################################
+        dataset_info = {
+            "input_path": cfg.data.input_dir,
+            "model_type": cfg.model.model_type,
+            "normalization": cfg.model.normalization,
+            "compute_time": compute_time,
+            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+            "config_name": cfg.project.name,
+        }
+
+        scaling_factors = ScalingFactors(
+            mean=mean,
+            std=std,
+            min_val=min_val,
+            max_val=max_val,
+            field_keys=target_keys,
+        )
+
+        ################################
+        # Save scaling factors
+        ################################
+        if dist.rank == 0:
+            # Save as structured pickle file
+            pickle_path = output_dir + "/scaling_factors.pkl"
+            scaling_factors.save(pickle_path)
+            logger.info(f"Scaling factors saved to: {pickle_path}")
+
+            # Save summary report
+            summary_path = output_dir + "/scaling_factors_summary.txt"
+            with open(summary_path, "w") as f:
+                f.write(scaling_factors.summary())
+            logger.info(f"Summary report saved to: {summary_path}")
+
+        ################################
+        # Display summary
+        ################################
+        logger.info("Scaling factors computation summary:")
+        logger.info(f"Field keys processed: {scaling_factors.field_keys}")
+
+        logger.info("Scaling factors computation completed successfully!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 7882e2d006..f30964f3ed 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -56,12 +56,13 @@
 
 from physicsnemo.datapipes.cae.domino_datapipe2 import (
     DoMINODataPipe,
-    compute_scaling_factors,
     create_domino_dataset,
 )
 from physicsnemo.models.domino.model import DoMINO
 from physicsnemo.utils.domino.utils import *
 
+from utils import ScalingFactors
+
 # This is included for GPU memory tracking:
 from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
 import time
@@ -266,30 +267,17 @@ def main(cfg: DictConfig) -> None:
     logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
 
     ################################
-    # Get or compute scaling and normalization factors
-    # min/max/mean/std of input points + targets
+    # Get scaling factors
     ################################
-
-    vol_save_path = os.path.join(
-        "outputs", cfg.project.name, "volume_scaling_factors.npy"
-    )
-    surf_save_path = os.path.join(
-        "outputs", cfg.project.name, "surface_scaling_factors.npy"
-    )
-    if os.path.exists(vol_save_path):
-        vol_factors = np.load(vol_save_path)
-        vol_factors_tensor = (
-            torch.from_numpy(vol_factors).to(dist.device) if add_physics_loss else None
+    pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
+
+    try:
+        scaling_factors = ScalingFactors.load(pickle_path)
+        logger.info(f"Scaling factors loaded from: {pickle_path}")
+    except FileNotFoundError:
+        raise FileNotFoundError(
+            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
         )
-    else:
-        vol_factors = None
-        vol_factors_tensor = None
-
-    scaling_factors = compute_scaling_factors(
-        cfg=cfg,
-        input_path=cfg.data.input_dir,
-        use_cache=cfg.data_processor.use_cache,
-    )
 
     model_type = cfg.model.model_type
 
@@ -315,6 +303,10 @@ def main(cfg: DictConfig) -> None:
     else:
         volume_variable_names = []
 
+    vol_factors = scaling_factors.mean["volume_fields"]
+    surf_factors = scaling_factors.mean["surface_fields"]
+    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
+
     bounding_box = None
     if add_physics_loss:
         bounding_box = cfg.data.bounding_box
@@ -326,11 +318,6 @@ def main(cfg: DictConfig) -> None:
             .to(dist.device)
         )
 
-    if os.path.exists(surf_save_path):
-        surf_factors = np.load(surf_save_path)
-    else:
-        surf_factors = None
-
     train_dataset = create_domino_dataset(
         cfg,
         phase="train",
diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py
index abfc4d7351..6befff00bb 100644
--- a/examples/cfd/external_aerodynamics/domino/src/utils.py
+++ b/examples/cfd/external_aerodynamics/domino/src/utils.py
@@ -14,6 +14,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from dataclasses import dataclass
+from typing import Dict, Optional, Any
+import numpy as np
+import torch
+import pickle
+from pathlib import Path
+from typing import Literal
+
 
 def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"]):
     """Calculate the number of variables for volume, surface, and global features.
@@ -72,3 +80,83 @@ def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"]
             raise ValueError(f"Unknown global parameter type")
 
     return num_vol_vars, num_surf_vars, num_global_features
+
+
+@dataclass
+class ScalingFactors:
+    """
+    Data structure for storing scaling factors computed for DoMINO datasets.
+
+    This class provides a clean, easily serializable format for storing
+    mean, std, min, and max values for different array keys in the dataset.
+    Uses numpy arrays for easy serialization and cross-platform compatibility.
+
+    Attributes:
+        mean: Dictionary mapping keys to mean numpy arrays
+        std: Dictionary mapping keys to standard deviation numpy arrays
+        min_val: Dictionary mapping keys to minimum value numpy arrays
+        max_val: Dictionary mapping keys to maximum value numpy arrays
+        field_keys: List of field keys for which statistics were computed
+    """
+
+    mean: Dict[str, np.ndarray]
+    std: Dict[str, np.ndarray]
+    min_val: Dict[str, np.ndarray]
+    max_val: Dict[str, np.ndarray]
+    field_keys: list[str]
+
+    def to_torch(
+        self, device: Optional[torch.device] = None
+    ) -> Dict[str, Dict[str, torch.Tensor]]:
+        """Convert numpy arrays to torch tensors for use in training/inference."""
+        device = device or torch.device("cpu")
+
+        return {
+            "mean": {k: torch.from_numpy(v).to(device) for k, v in self.mean.items()},
+            "std": {k: torch.from_numpy(v).to(device) for k, v in self.std.items()},
+            "min_val": {
+                k: torch.from_numpy(v).to(device) for k, v in self.min_val.items()
+            },
+            "max_val": {
+                k: torch.from_numpy(v).to(device) for k, v in self.max_val.items()
+            },
+        }
+
+    def save(self, filepath: str | Path) -> None:
+        """Save scaling factors to pickle file."""
+        filepath = Path(filepath)
+        filepath.parent.mkdir(parents=True, exist_ok=True)
+
+        with open(filepath, "wb") as f:
+            pickle.dump(self, f)
+
+    @classmethod
+    def load(cls, filepath: str | Path) -> "ScalingFactors":
+        """Load scaling factors from pickle file."""
+        with open(filepath, "rb") as f:
+            factors = pickle.load(f)
+        return factors
+
+    def get_field_shapes(self) -> Dict[str, tuple]:
+        """Get the shape of each field's statistics."""
+        return {key: self.mean[key].shape for key in self.field_keys}
+
+    def summary(self) -> str:
+        """Generate a human-readable summary of the scaling factors."""
+        summary = ["Scaling Factors Summary:"]
+        summary.append(f"Field Keys: {self.field_keys}")
+
+        for key in self.field_keys:
+            mean_val = self.mean[key]
+            std_val = self.std[key]
+            min_val = self.min_val[key]
+            max_val = self.max_val[key]
+
+            summary.append(f"\n{key}:")
+            summary.append(f"  Shape: {mean_val.shape}")
+            summary.append(f"  Mean: {mean_val}")
+            summary.append(f"  Std: {std_val}")
+            summary.append(f"  Min: {min_val}")
+            summary.append(f"  Max: {max_val}")
+
+        return "\n".join(summary)
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 7bf4abb6c4..f9e66b6135 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -217,6 +217,7 @@ def __init__(
         self,
         input_path,
         model_type: Literal["surface", "volume", "combined"],
+        pin_memory: bool = False,
         **data_config_overrides,
     ):
         # Perform config packaging and validation
@@ -227,13 +228,13 @@ def __init__(
             DistributedManager.initialize()
 
         dist = DistributedManager()
-        if self.config.gpu_preprocessing or self.config.gpu_output:
-            # Make sure we move data to the right device:
-            target_device = dist.device
-        else:
-            target_device = torch.device("cpu")
 
-        self.device = target_device
+        self.preproc_device = (
+            dist.device if self.config.gpu_preprocessing else torch.device("cpu")
+        )
+        self.output_device = (
+            dist.device if self.config.gpu_output else torch.device("cpu")
+        )
 
         self.model_type = model_type
 
@@ -244,12 +245,12 @@ def __init__(
             self.config.bounding_box_dims = [
                 torch.tensor(
                     self.config.bounding_box_dims.max,
-                    device=self.device,
+                    device=self.preproc_device,
                     dtype=torch.float32,
                 ),
                 torch.tensor(
                     self.config.bounding_box_dims.min,
-                    device=self.device,
+                    device=self.preproc_device,
                     dtype=torch.float32,
                 ),
             ]
@@ -265,12 +266,12 @@ def __init__(
             self.config.bounding_box_dims_surf = [
                 torch.tensor(
                     self.config.bounding_box_dims_surf.max,
-                    device=self.device,
+                    device=self.preproc_device,
                     dtype=torch.float32,
                 ),
                 torch.tensor(
                     self.config.bounding_box_dims_surf.min,
-                    device=self.device,
+                    device=self.preproc_device,
                     dtype=torch.float32,
                 ),
             ]
@@ -285,20 +286,26 @@ def __init__(
         # and on the right device:
         if self.config.volume_factors is not None:
             self.config.volume_factors = torch.tensor(
-                self.config.volume_factors, device=self.device, dtype=torch.float32
+                self.config.volume_factors,
+                device=self.preproc_device,
+                dtype=torch.float32,
             )
         if self.config.surface_factors is not None:
             self.config.surface_factors = torch.tensor(
-                self.config.surface_factors, device=self.device, dtype=torch.float32
+                self.config.surface_factors,
+                device=self.preproc_device,
+                dtype=torch.float32,
             )
 
         # Always read these keys:
         self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"]
 
         self.keys_to_read_if_available = {
-            "global_params_values": torch.tensor([[30.0], [1.226]], device=self.device),
+            "global_params_values": torch.tensor(
+                [[30.0], [1.226]], device=self.preproc_device
+            ),
             "global_params_reference": torch.tensor(
-                [[30.0], [1.226]], device=self.device
+                [[30.0], [1.226]], device=self.preproc_device
             ),
         }
 
@@ -318,7 +325,8 @@ def __init__(
         self.dataset = DrivaerMLDataset(
             data_dir=self.config.data_path,
             keys_to_read=self.keys_to_read,
-            output_device=self.device,
+            output_device=self.preproc_device,
+            pin_memory=pin_memory,
             consumer_stream=torch.cuda.default_stream(),
         )
 
@@ -803,6 +811,11 @@ def __getitem__(self, idx):
             data_dict = self.dataset[index]
             data_dict = self.process_data(data_dict, idx)
 
+        # If the data is not on the target device, put it there:
+        for key, value in data_dict.items():
+            if value.device != self.output_device:
+                data_dict[key] = value.to(self.output_device)
+
         # Add a batch dimension to the data_dict
         data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()}
 
@@ -865,7 +878,8 @@ def __next__(self):
 
         # Start loading two ahead:
         N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
-        if N >= current_idx + 2:
+        print(f"N: {N}, current_idx: {current_idx}")
+        if N > current_idx + 2:
             self.dataset.preload(self.idx_to_index(current_idx + 1))
             self.dataset.preload(self.idx_to_index(current_idx + 2))
 
@@ -885,34 +899,38 @@ def __iter__(self):
         N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
 
         # Trigger the dataset to start loading index 0:
-        if N >= 1:
+        if N > 1:
             self.dataset.preload(self.idx_to_index(self.i))
-        if N >= 2:
+        if N > 2:
             self.dataset.preload(self.idx_to_index(self.i + 1))
 
         return self
 
 
-def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None:
-    # Create a dataset for just the field keys:
+def compute_scaling_factors(
+    cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Using the dataset at the path, compute the mean, std, min, and max of the target keys.
+
+    Args:
+        cfg: Hydra configuration object containing all parameters
+        input_path: Path to the dataset to load.
+        target_keys: List of keys to compute the mean, std, min, and max of.
+        use_cache: (deprecated) This argument has no effect.
+    """
 
-    norm_keys = [
-        "volume_fields",
-        "surface_fields",
-        "stl_centers",
-        "volume_mesh_centers",
-        "surface_mesh_centers",
-    ]
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
     dataset = DrivaerMLDataset(
         data_dir=input_path,
-        keys_to_read=norm_keys,
-        output_device=torch.device("cuda"),  # TODO - configure this more carefully here
+        keys_to_read=target_keys,
+        output_device=device,
     )
 
     mean, std, min_val, max_val = compute_mean_std_min_max(
         dataset,
-        field_keys=norm_keys,
+        field_keys=target_keys,
     )
 
     return mean, std, min_val, max_val
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index bb3c5b7a1a..c9871db8c0 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -128,6 +128,35 @@ def read_file_sharded(
         pass
 
 
+class NpzFileReader(BackendReader):
+    """
+    Reader for npz files.
+    """
+
+    def __init__(self, keys_to_read: list[str] | None) -> None:
+        super().__init__(keys_to_read)
+
+    def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
+        """
+        Read a file and return a dictionary of tensors.
+        """
+        in_data = np.load(filename)
+
+        keys_found = set(in_data.keys())
+        keys_missing = set(self.keys_to_read) - keys_found
+        if len(keys_missing) > 0:
+            raise ValueError(f"Keys {keys_missing} not found in file {filename}")
+
+        data = {key: torch.from_numpy(in_data[key][:]) for key in self.keys_to_read}
+
+        return data
+
+    def read_file_sharded(
+        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
+    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        pass
+
+
 class ZarrFileReader(BackendReader):
     """
     Reader for zarr files.
@@ -265,7 +294,7 @@ def __init__(
         keys_to_read: list[str] | None,
         output_device: torch.device,
         preload_depth: int = 2,
-        pin_memory: bool = True,
+        pin_memory: bool = False,
         device_mesh: torch.distributed.DeviceMesh | None = None,
         placements: dict[str, torch.distributed.tensor.Placement] | None = None,
         consumer_stream: torch.cuda.Stream | None = None,
@@ -333,6 +362,9 @@ def _infer_file_type_and_filenames(
         if all(file.suffix == ".npy" for file in files):
             file_reader = NpyFileReader(self._keys_to_read)
             return file_reader, files
+        elif all(file.suffix == ".npz" for file in files):
+            file_reader = NpzFileReader(self._keys_to_read)
+            return file_reader, files
         elif all(file.suffix == ".zarr" and file.is_dir() for file in files):
             if TENSORSTORE_AVAILABLE:
                 file_reader = TensorStoreZarrReader(self._keys_to_read)
@@ -358,7 +390,6 @@ def _move_to_gpu(
         if self.output_device.type != "cuda":
             return data
 
-        # result = StreamDict()
         result = {}
 
         with torch.cuda.stream(self._data_loader_stream):

From ff185b3ef33b78cc4a3da24be96f7d78b2f1f1c4 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 8 Sep 2025 14:56:20 +0000
Subject: [PATCH 18/98] Update tests for the new pipeline (mostly fix indexing
 from batch size) and  fix a few details in the new pipeline.  Use new
 pipeline in training script

---
 .../external_aerodynamics/domino/src/train.py | 19 ++-----
 physicsnemo/datapipes/cae/domino_datapipe2.py | 21 ++++----
 test/datapipes/test_domino_datapipe.py        | 49 +++++++++----------
 3 files changed, 37 insertions(+), 52 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index f30964f3ed..2ff363e40a 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -102,7 +102,7 @@ def validation_step(
     with torch.no_grad():
         for i_batch, sample_batched in enumerate(dataloader):
             sampled_batched = dict_to_device(sample_batched, device)
-            print(f"validation i batch {i_batch}")
+
             with autocast("cuda", enabled=True):
                 if add_physics_loss:
                     prediction_vol, prediction_surf = model(
@@ -349,17 +349,6 @@ def main(cfg: DictConfig) -> None:
         **cfg.val.sampler,
     )
 
-    # train_dataloader = DataLoader(
-    #     train_dataset,
-    #     sampler=train_sampler,
-    #     **cfg.train.dataloader,
-    # )
-    # val_dataloader = DataLoader(
-    #     val_dataset,
-    #     sampler=val_sampler,
-    #     **cfg.val.dataloader,
-    # )
-
     model = DoMINO(
         input_features=3,
         output_features_vol=num_vol_vars,
@@ -449,8 +438,11 @@ def main(cfg: DictConfig) -> None:
                 "Physics loss enabled - mixed precision (autocast) will be disabled as physics loss computation is not supported with mixed precision"
             )
 
+        # This controls what indices to use for each epoch.
         train_sampler.set_epoch(epoch)
         val_sampler.set_epoch(epoch)
+        train_dataset.set_indices(list(train_sampler))
+        val_dataset.set_indices(list(val_sampler))
 
         initial_integral_factor = initial_integral_factor_orig
 
@@ -459,9 +451,6 @@ def main(cfg: DictConfig) -> None:
         else:
             surface_scaling_loss = cfg.model.surf_loss_scaling
 
-        train_dataset.set_indices(list(train_sampler))
-        print(f"train_dataset.indices: {train_dataset.indices}")
-
         model.train(True)
         epoch_start_time = time.perf_counter()
         avg_loss = train_epoch(
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index f9e66b6135..faaeac8543 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -35,7 +35,6 @@
 import torch
 import torch.cuda.nvtx as nvtx
 from omegaconf import DictConfig
-from torch import Tensor
 from torch.utils.data import Dataset
 
 from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
@@ -468,7 +467,7 @@ def process_surface(
                 (s_max[2] - s_min[2]) / nz,
             )
             pos_normals_com_surface = calculate_normal_positional_encoding(
-                surface_coordinates, center_of_mass, cell_length=[dx, dy, dz]
+                surface_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz]
             )
         else:
             pos_normals_com_surface = surface_coordinates - center_of_mass
@@ -549,12 +548,10 @@ def process_surface(
                 if self.config.scaling_type == "mean_std_scaling":
                     surf_mean = self.config.surface_factors[0]
                     surf_std = self.config.surface_factors[1]
-                    # TODO - Are these array calls needed?
                     surface_fields = standardize(surface_fields, surf_mean, surf_std)
                 elif self.config.scaling_type == "min_max_scaling":
                     surf_min = self.config.surface_factors[1]
                     surf_max = self.config.surface_factors[0]
-                    # TODO - Are these array calls needed?
                     surface_fields = normalize(surface_fields, surf_max, surf_min)
 
         return_dict.update(
@@ -667,10 +664,10 @@ def process_volume(
             pos_normals_closest_vol = calculate_normal_positional_encoding(
                 volume_coordinates,
                 sdf_node_closest_point,
-                cell_length=[dx, dy, dz],
+                cell_dimensions=[dx, dy, dz],
             )
             pos_normals_com_vol = calculate_normal_positional_encoding(
-                volume_coordinates, center_of_mass, cell_length=[dx, dy, dz]
+                volume_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz]
             )
         else:
             pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
@@ -679,6 +676,8 @@ def process_volume(
         if self.config.normalize_coordinates:
             volume_coordinates = normalize(volume_coordinates, c_max, c_min)
             grid = normalize(self.volume_grid, c_max, c_min)
+        else:
+            grid = self.volume_grid
 
         if self.config.scaling_type is not None:
             if self.config.volume_factors is not None:
@@ -870,15 +869,16 @@ def __next__(self):
         #   - the preprocessing pipe has to implicitly wait for idx +1 in the dataset
         # - wait for the preprocessing pipe at idx to finish
         # return the data.
-        if self.i >= len(self.dataset):
+        N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
+
+        if self.i >= N:
             self.i = 0
             raise StopIteration
 
         current_idx = self.i
 
         # Start loading two ahead:
-        N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
-        print(f"N: {N}, current_idx: {current_idx}")
+
         if N > current_idx + 2:
             self.dataset.preload(self.idx_to_index(current_idx + 1))
             self.dataset.preload(self.idx_to_index(current_idx + 2))
@@ -1004,7 +1004,8 @@ def __getitem__(self, idx):
         filepath = self.data_path / cfd_filename
         result = np.load(filepath, allow_pickle=True).item()
         result = {
-            k: v.numpy() if isinstance(v, Tensor) else v for k, v in result.items()
+            k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v
+            for k, v in result.items()
         }
 
         nvtx.range_pop()
diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py
index a2f5ad645a..8df540d9ef 100644
--- a/test/datapipes/test_domino_datapipe.py
+++ b/test/datapipes/test_domino_datapipe.py
@@ -27,6 +27,12 @@
 from pytest_utils import import_or_fail
 from scipy.spatial import ConvexHull
 
+from physicsnemo.datapipes.cae.domino_datapipe2 import (
+    CachedDoMINODataset,
+    DoMINODataConfig,
+    DoMINODataPipe,
+)
+
 Tensor = torch.Tensor
 
 # DEFINING GLOBAL VARIABLES HERE
@@ -91,7 +97,7 @@ def synthetic_domino_data(
     for i in range(n_examples):
         # We are generating a mesh on a random sphere.
         stl_points = random_sample_on_unit_sphere(N_mesh_points)
-        print(f"stl_points.shape: {stl_points.shape}")
+
         # Generate the triangles with ConvexHull:
         hull = ConvexHull(stl_points)
         faces = hull.simplices  # (M, 3)
@@ -238,7 +244,6 @@ def bounding_boxes():
 
 def create_basic_dataset(data_dir, model_type, **kwargs):
     """Helper function to create a basic DoMINODataPipe with default settings."""
-    from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataPipe
 
     # assert model_type in ["volume", "surface", "combined"]
 
@@ -270,6 +275,8 @@ def create_basic_dataset(data_dir, model_type, **kwargs):
 
     default_kwargs.update(kwargs)
 
+    print(f"kwargs: {default_kwargs}")
+
     return DoMINODataPipe(
         input_path=input_path, model_type=model_type, **default_kwargs
     )
@@ -327,7 +334,6 @@ def test_domino_datapipe_core(
     """Core test for basic functionality with different device and model configurations."""
 
     data_dir = request.getfixturevalue(data_dir)
-    print(f"data_dir: {data_dir}")
     dataset = create_basic_dataset(
         data_dir, model_type, gpu_preprocessing=gpu_preprocessing, gpu_output=gpu_output
     )
@@ -360,13 +366,12 @@ def test_domino_datapipe_coordinate_normalization(
     v_coords = sample["volume_mesh_centers"]
     s_coords = sample["surface_mesh_centers"]
 
-    v_min = torch.min(v_coords, dim=0).values
-    v_max = torch.max(v_coords, dim=0).values
-    s_min = torch.min(s_coords, dim=0).values
-    s_max = torch.max(s_coords, dim=0).values
+    # Batch size is 1 here, but in principle this could be a loop:
+    v_min = torch.min(v_coords[0], dim=0).values
+    v_max = torch.max(v_coords[0], dim=0).values
+    s_min = torch.min(s_coords[0], dim=0).values
+    s_max = torch.max(s_coords[0], dim=0).values
 
-    print(f"{normalize_coordinates} v_coords: {v_min} to {v_max}")
-    print(f"{normalize_coordinates} s_coords: {s_min} to {s_max}")
     # If normalization is enabled, coordinates should be in [-2, 2] range
     if normalize_coordinates:
         if sample_in_bbox:
@@ -467,9 +472,9 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf
     if model_type in ["volume", "combined"]:
         for key in ["volume_mesh_centers", "volume_fields"]:
             if sampling:
-                assert sample[key].shape[0] == sample_points
+                assert sample[key].shape[1] == sample_points
             else:
-                assert sample[key].shape[0] == sample["volume_mesh_centers"].shape[0]
+                assert sample[key].shape[1] == sample["volume_mesh_centers"].shape[1]
 
     # Model-specific keys
     if model_type in ["surface", "combined"]:
@@ -480,20 +485,20 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf
             "surface_fields",
         ]:
             if sampling:
-                assert sample[key].shape[0] == sample_points
+                assert sample[key].shape[1] == sample_points
             else:
-                assert sample[key].shape[0] == sample["surface_mesh_centers"].shape[0]
+                assert sample[key].shape[1] == sample["surface_mesh_centers"].shape[1]
         for key in [
             "surface_mesh_neighbors",
             "surface_neighbors_normals",
             "surface_neighbors_areas",
         ]:
             if sampling:
-                assert sample[key].shape[0] == sample_points
-                assert sample[key].shape[1] == dataset.config.num_surface_neighbors - 1
+                assert sample[key].shape[1] == sample_points
+                assert sample[key].shape[2] == dataset.config.num_surface_neighbors - 1
             else:
-                assert sample[key].shape[0] == sample["surface_mesh_neighbors"].shape[0]
-                assert sample[key].shape[1] == dataset.config.num_surface_neighbors - 1
+                assert sample[key].shape[1] == sample["surface_mesh_neighbors"].shape[1]
+                assert sample[key].shape[2] == dataset.config.num_surface_neighbors - 1
 
 
 @import_or_fail(["warp", "cupy", "cuml"])
@@ -572,7 +577,6 @@ def test_domino_datapipe_caching_config(zarr_dataset, model_type, pytestconfig):
 @import_or_fail(["warp", "cupy", "cuml"])
 def test_cached_domino_dataset(zarr_dataset, tmp_path, pytestconfig):
     """Test CachedDoMINODataset functionality."""
-    from physicsnemo.datapipes.cae.domino_datapipe import CachedDoMINODataset
 
     # Create some mock cached data files
     for i in range(3):
@@ -637,7 +641,6 @@ def test_domino_datapipe_invalid_caching_config(zarr_dataset, pytestconfig):
 @import_or_fail(["warp", "cupy", "cuml"])
 def test_domino_datapipe_invalid_phase(pytestconfig):
     """Test that invalid phase values raise appropriate errors."""
-    from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataConfig
 
     with pytest.raises(ValueError, match="phase should be one of"):
         DoMINODataConfig(data_path=tempfile.mkdtemp(), phase="invalid_phase")
@@ -646,7 +649,6 @@ def test_domino_datapipe_invalid_phase(pytestconfig):
 @import_or_fail(["warp", "cupy", "cuml"])
 def test_domino_datapipe_invalid_scaling_type(pytestconfig):
     """Test that invalid scaling_type values raise appropriate errors."""
-    from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataConfig
 
     with pytest.raises(ValueError, match="scaling_type should be one of"):
         DoMINODataConfig(
@@ -684,10 +686,3 @@ def test_domino_datapipe_surface_sampling(
 
     sample = dataset[0]
     validate_sample_structure(sample, "surface", gpu_output=True)
-
-
-if __name__ == "__main__":
-    out_dir = synthetic_domino_data(
-        out_format="zarr",
-    )
-    print(out_dir)

From c7c94cb314c6bd5e99b4a20340b41aea7c29237c Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Sep 2025 01:08:11 +0000
Subject: [PATCH 19/98] add a utility to sample on a mesh with torch.

some tweaks to enable the preprocess pipeline for inference.
---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 138 ++++++++++-----
 .../datapipes/cae/drivaer_ml_dataset.py       | 167 ++++++++++++++++--
 physicsnemo/utils/domino/utils.py             |  70 +++++++-
 3 files changed, 308 insertions(+), 67 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index faaeac8543..18d198f457 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -128,7 +128,7 @@ class DoMINODataConfig:
             You might choose gpu_preprocessing=True and gpu_output=False if caching.
     """
 
-    data_path: Path
+    data_path: Path | None
     phase: Literal["train", "val", "test"]
 
     # Surface-specific variables:
@@ -161,16 +161,17 @@ class DoMINODataConfig:
     gpu_output: bool = True
 
     def __post_init__(self):
-        # Ensure data_path is a Path object:
-        if isinstance(self.data_path, str):
-            self.data_path = Path(self.data_path)
-        self.data_path = self.data_path.expanduser()
+        if self.data_path is not None:
+            # Ensure data_path is a Path object:
+            if isinstance(self.data_path, str):
+                self.data_path = Path(self.data_path)
+            self.data_path = self.data_path.expanduser()
 
-        if not self.data_path.exists():
-            raise ValueError(f"Path {self.data_path} does not exist")
+            if not self.data_path.exists():
+                raise ValueError(f"Path {self.data_path} does not exist")
 
-        if not self.data_path.is_dir():
-            raise ValueError(f"Path {self.data_path} is not a directory")
+            if not self.data_path.is_dir():
+                raise ValueError(f"Path {self.data_path} is not a directory")
 
         # Object if caching settings are impossible:
         if self.caching:
@@ -321,13 +322,16 @@ def __init__(
         if self.model_type == "surface" or self.model_type == "combined":
             self.keys_to_read.extend(self.surface_keys)
 
-        self.dataset = DrivaerMLDataset(
-            data_dir=self.config.data_path,
-            keys_to_read=self.keys_to_read,
-            output_device=self.preproc_device,
-            pin_memory=pin_memory,
-            consumer_stream=torch.cuda.default_stream(),
-        )
+        if self.config.data_path is not None:
+            self.dataset = DrivaerMLDataset(
+                data_dir=self.config.data_path,
+                keys_to_read=self.keys_to_read,
+                output_device=self.preproc_device,
+                pin_memory=pin_memory,
+                consumer_stream=torch.cuda.default_stream(),
+            )
+        else:
+            self.dataset = None
 
         # This is thread storage for data preprocessing:
         self._preprocess_queue = {}
@@ -345,7 +349,10 @@ def set_indices(self, indices: list[int]):
         self.indices = indices
 
     def __len__(self):
-        return len(self.dataset)
+        if self.dataset is not None:
+            return len(self.dataset)
+        else:
+            return 0
 
     def compute_stl_scaling(
         self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None
@@ -358,8 +365,6 @@ def compute_stl_scaling(
         s_min = torch.amin(stl_vertices, 0)
         s_max = torch.amax(stl_vertices, 0)
 
-        length_scale = torch.amax(s_max - s_min)
-
         # if dynamic_bbox_scaling:
         # Check the bounding box is not unit length
 
@@ -373,7 +378,7 @@ def compute_stl_scaling(
 
         surf_grid_max_min = torch.stack([s_min, s_max])
 
-        return s_min, s_max, length_scale, surf_grid_max_min, surf_grid
+        return s_min, s_max, surf_grid_max_min, surf_grid
 
     @profile
     def process_combined(
@@ -418,7 +423,7 @@ def process_surface(
         surface_coordinates: torch.Tensor,
         surface_normals: torch.Tensor,
         surface_sizes: torch.Tensor,
-        surface_fields: torch.Tensor,
+        surface_fields: torch.Tensor | None,
     ) -> dict[str, torch.Tensor]:
         nx, ny, nz = self.config.grid_resolution
 
@@ -427,7 +432,8 @@ def process_surface(
         # Remove any sizes <= 0:
         idx = surface_sizes > 0
         surface_sizes = surface_sizes[idx]
-        surface_fields = surface_fields[idx]
+        if surface_fields is not None:
+            surface_fields = surface_fields[idx]
         surface_normals = surface_normals[idx]
         surface_coordinates = surface_coordinates[idx]
 
@@ -442,7 +448,8 @@ def process_surface(
             )
             surface_normals = surface_normals[idx_s]
             surface_sizes = surface_sizes[idx_s]
-            surface_fields = surface_fields[idx_s]
+            if surface_fields is not None:
+                surface_fields = surface_fields[idx_s]
 
         c_max = self.config.bounding_box_dims[0]
         c_min = self.config.bounding_box_dims[1]
@@ -457,7 +464,8 @@ def process_surface(
             surface_coordinates = surface_coordinates[ids_in_bbox]
             surface_normals = surface_normals[ids_in_bbox]
             surface_sizes = surface_sizes[ids_in_bbox]
-            surface_fields = surface_fields[ids_in_bbox]
+            if surface_fields is not None:
+                surface_fields = surface_fields[ids_in_bbox]
 
         # Compute the positional encoding before sampling
         if self.config.positional_encoding:
@@ -548,11 +556,15 @@ def process_surface(
                 if self.config.scaling_type == "mean_std_scaling":
                     surf_mean = self.config.surface_factors[0]
                     surf_std = self.config.surface_factors[1]
-                    surface_fields = standardize(surface_fields, surf_mean, surf_std)
+                    if surface_fields is not None:
+                        surface_fields = standardize(
+                            surface_fields, surf_mean, surf_std
+                        )
                 elif self.config.scaling_type == "min_max_scaling":
                     surf_min = self.config.surface_factors[1]
                     surf_max = self.config.surface_factors[0]
-                    surface_fields = normalize(surface_fields, surf_max, surf_min)
+                    if surface_fields is not None:
+                        surface_fields = normalize(surface_fields, surf_max, surf_min)
 
         return_dict.update(
             {
@@ -563,9 +575,10 @@ def process_surface(
                 "surface_neighbors_normals": surface_neighbors_normals,
                 "surface_areas": surface_sizes,
                 "surface_neighbors_areas": surface_neighbors_sizes,
-                "surface_fields": surface_fields,
             }
         )
+        if surface_fields is not None:
+            return_dict["surface_fields"] = surface_fields
 
         return return_dict
 
@@ -574,7 +587,7 @@ def process_volume(
         s_min: torch.Tensor,
         s_max: torch.Tensor,
         volume_coordinates: torch.Tensor,
-        volume_fields: torch.Tensor,
+        volume_fields: torch.Tensor | None,
         stl_vertices: torch.Tensor,
         mesh_indices_flattened: torch.Tensor,
         center_of_mass: torch.Tensor,
@@ -602,7 +615,8 @@ def process_volume(
             ids_in_bbox = ids_in_bbox.all(dim=1)
 
             volume_coordinates = volume_coordinates[ids_in_bbox]
-            volume_fields = volume_fields[ids_in_bbox]
+            if volume_fields is not None:
+                volume_fields = volume_fields[ids_in_bbox]
 
         dx, dy, dz = (
             (c_max[0] - c_min[0]) / nx,
@@ -646,8 +660,8 @@ def process_volume(
                     mode="constant",
                     value=-10.0,
                 )
-
-            volume_fields = volume_fields[idx_volume]
+            if volume_fields is not None:
+                volume_fields = volume_fields[idx_volume]
             volume_coordinates = volume_coordinates_sampled
 
         # Get the SDF of all the selected volume coordinates,
@@ -684,11 +698,13 @@ def process_volume(
                 if self.config.scaling_type == "mean_std_scaling":
                     vol_mean = self.config.volume_factors[0]
                     vol_std = self.config.volume_factors[1]
-                    volume_fields = standardize(volume_fields, vol_mean, vol_std)
+                    if volume_fields is not None:
+                        volume_fields = standardize(volume_fields, vol_mean, vol_std)
                 elif self.config.scaling_type == "min_max_scaling":
                     vol_min = self.config.volume_factors[1]
                     vol_max = self.config.volume_factors[0]
-                    volume_fields = normalize(volume_fields, vol_max, vol_min)
+                    if volume_fields is not None:
+                        volume_fields = normalize(volume_fields, vol_max, vol_min)
 
         vol_grid_max_min = torch.stack([c_min, c_max])
 
@@ -699,11 +715,12 @@ def process_volume(
                 "grid": grid,
                 "sdf_grid": sdf_grid,
                 "sdf_nodes": sdf_nodes,
-                "volume_fields": volume_fields,
                 "volume_mesh_centers": volume_coordinates,
                 "volume_min_max": vol_grid_max_min,
             }
         )
+        if volume_fields is not None:
+            return_dict["volume_fields"] = volume_fields
 
         return return_dict
 
@@ -724,10 +741,8 @@ def process_data(self, data_dict, idx: int):
 
         # This function gets information about the surface scale,
         # and decides what the surface grid will be:
-        (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = (
-            self.compute_stl_scaling(
-                data_dict["stl_coordinates"], self.config.bounding_box_dims_surf
-            )
+        (s_min, s_max, surf_grid_max_min, surf_grid) = self.compute_stl_scaling(
+            data_dict["stl_coordinates"], self.config.bounding_box_dims_surf
         )
 
         # This is a center of mass computation for the stl surface,
@@ -742,7 +757,6 @@ def process_data(self, data_dict, idx: int):
 
         return_dict.update(
             {
-                "length_scale": length_scale,
                 "surface_min_max": surf_grid_max_min,
             }
         )
@@ -767,7 +781,9 @@ def process_data(self, data_dict, idx: int):
                 s_min,
                 s_max,
                 volume_coordinates=data_dict["volume_mesh_centers"],
-                volume_fields=data_dict["volume_fields"],
+                volume_fields=data_dict["volume_fields"]
+                if "volume_fields" in data_dict
+                else None,
                 stl_vertices=data_dict["stl_coordinates"],
                 mesh_indices_flattened=mesh_indices_flattened,
                 center_of_mass=center_of_mass,
@@ -784,7 +800,9 @@ def process_data(self, data_dict, idx: int):
                 surface_coordinates=data_dict["surface_mesh_centers"],
                 surface_normals=data_dict["surface_normals"],
                 surface_sizes=data_dict["surface_areas"],
-                surface_fields=data_dict["surface_fields"],
+                surface_fields=data_dict["surface_fields"]
+                if "surface_fields" in data_dict
+                else None,
             )
             return_dict.update(surface_dict)
 
@@ -798,6 +816,9 @@ def __getitem__(self, idx):
         are relatively large due to the mesh size.
         """
 
+        if self.dataset is None:
+            raise ValueError("Dataset is not present")
+
         index = self.idx_to_index(idx)
 
         # Get the preprocessed data:
@@ -831,6 +852,9 @@ def preprocess(self, idx: int) -> None:
         Start preprocessing for the given index (1 step ahead).
         This processes preloaded data or loads it if not available.
         """
+        if self.dataset is None:
+            raise ValueError("Dataset is not present")
+
         if idx in self._preprocess_queue:
             # Skip items that are already being preprocessed
             return
@@ -869,6 +893,10 @@ def __next__(self):
         #   - the preprocessing pipe has to implicitly wait for idx +1 in the dataset
         # - wait for the preprocessing pipe at idx to finish
         # return the data.
+
+        if self.dataset is None:
+            raise ValueError("Dataset is not present")
+
         N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
 
         if self.i >= N:
@@ -894,6 +922,9 @@ def __iter__(self):
         # at idx = 0, idx = 1
         # Start preprocessing at idx = 0, when the load completes
 
+        if self.dataset is None:
+            raise ValueError("Dataset is not present")
+
         self.i = 0
 
         N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
@@ -1087,12 +1118,25 @@ def __getitem__(self, idx):
 
 
 def create_domino_dataset(
-    cfg, phase, volume_variable_names, surface_variable_names, vol_factors, surf_factors
+    cfg: DictConfig,
+    phase: Literal["train", "val", "test"],
+    volume_variable_names: list[str],
+    surface_variable_names: list[str],
+    vol_factors: list[float],
+    surf_factors: list[float],
+    normalize_coordinates: bool = True,
+    sample_in_bbox: bool = True,
+    sampling: bool = True,
 ):
     if phase == "train":
         input_path = cfg.data.input_dir
+        model_type = cfg.model.model_type
     elif phase == "val":
         input_path = cfg.data.input_dir_val
+        model_type = cfg.model.model_type
+    elif phase == "test":
+        input_path = cfg.eval.test_path
+        model_type = "inference"
     else:
         raise ValueError(f"Invalid phase {phase}")
 
@@ -1100,7 +1144,7 @@ def create_domino_dataset(
         return CachedDoMINODataset(
             input_path,
             phase=phase,
-            sampling=True,
+            sampling=sampling,
             volume_points_sample=cfg.model.volume_points_sample,
             surface_points_sample=cfg.model.surface_points_sample,
             geom_points_sample=cfg.model.geom_points_sample,
@@ -1121,9 +1165,9 @@ def create_domino_dataset(
             grid_resolution=cfg.model.interp_res,
             volume_variables=volume_variable_names,
             surface_variables=surface_variable_names,
-            normalize_coordinates=True,
-            sampling=True,
-            sample_in_bbox=True,
+            normalize_coordinates=normalize_coordinates,
+            sampling=sampling,
+            sample_in_bbox=sample_in_bbox,
             volume_points_sample=cfg.model.volume_points_sample,
             surface_points_sample=cfg.model.surface_points_sample,
             geom_points_sample=cfg.model.geom_points_sample,
@@ -1131,7 +1175,7 @@ def create_domino_dataset(
             volume_factors=vol_factors,
             surface_factors=surf_factors,
             scaling_type=cfg.model.normalization,
-            model_type=cfg.model.model_type,
+            model_type=model_type,
             bounding_box_dims=cfg.data.bounding_box,
             bounding_box_dims_surf=cfg.data.bounding_box_surface,
             num_surface_neighbors=cfg.model.num_neighbors_surface,
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index c9871db8c0..17f486fb6c 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -20,7 +20,6 @@
 from concurrent.futures import ThreadPoolExecutor
 
 import numpy as np
-import psutil
 import torch
 import zarr
 
@@ -31,25 +30,17 @@
 except ImportError:
     TENSORSTORE_AVAILABLE = False
 
-from physicsnemo.distributed import ShardTensor, ShardTensorSpec
-
-# from physicsnemo.distributed.utils import compute_split_shapes
-
-# For use on systems where cpu_affinity is not available:
-psutil_process = psutil.Process()
-
+try:
+    import pyvista as pv
 
-class FakeProcess:
-    """
-    Enable a fake cpu affinity setting if it's not available
-    """
+    PV_AVAILABLE = True
+except ImportError:
+    PV_AVAILABLE = False
 
-    def cpu_affinity(self, cpus: list[int] | None) -> None:
-        pass
+from physicsnemo.distributed import ShardTensor, ShardTensorSpec
 
+# from physicsnemo.distributed.utils import compute_split_shapes
 
-if not hasattr(psutil_process, "cpu_affinity"):
-    psutil_process = FakeProcess()
 
 # Abstractions:
 # - want to read npy/npz/.zarr/.stl/.vtp files
@@ -187,7 +178,128 @@ def read_file_sharded(
         """
         Read a file and return a dictionary of tensors.
         """
-        pass
+        raise NotImplementedError("Not implemented yet.")
+
+
+if PV_AVAILABLE:
+
+    class VTKFileReader(BackendReader):
+        """
+        Reader for vtk files.
+        """
+
+        def __init__(self, keys_to_read: list[str] | None) -> None:
+            super().__init__(keys_to_read)
+
+            self.stl_file_keys = [
+                "stl_coordinates",
+                "stl_centers",
+                "stl_faces",
+                "stl_areas",
+            ]
+            self.vtp_file_keys = [
+                "surface_mesh_centers",
+                "surface_normals",
+                "surface_mesh_sizes",
+                "CpMeanTrim",
+                "pMeanTrim",
+                "wallShearStressMeanTrim",
+            ]
+            self.vtu_file_keys = [
+                "volume_mesh_centers",
+                "volume_fields",
+            ]
+
+            self.exclude_patterns = [
+                "single_solid",
+            ]
+
+        def get_file_name(self, dir_name: pathlib.Path, extension: str) -> pathlib.Path:
+            """
+            Get the file name for a given directory and extension.
+            """
+            # >>> matches = [p for p in list(dir_name.iterdir()) if p.suffix == ".stl" and not any(pattern in p.name for pattern in exclude_patterns)]
+            matches = [
+                p
+                for p in dir_name.iterdir()
+                if p.suffix == extension
+                and not any(pattern in p.name for pattern in self.exclude_patterns)
+            ]
+            if len(matches) == 0:
+                raise FileNotFoundError(f"No {extension} files found in {dir_name}")
+            fname = matches[0]
+            return dir_name / fname
+
+        def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
+            """
+            Read a set of files and return a dictionary of tensors.
+            """
+
+            # This reader attempts to only read what's necessary, and not more.
+            # So, the functions that do the reading are each "one file" functions
+            # and we open them for processing only when necessary.
+
+            return_data = {}
+
+            # Note that this reader is, already, running in a background thread.
+            # It may or may not help to further thread these calls.
+            if any(key in self.stl_file_keys for key in self.keys_to_read):
+                stl_path = self.get_file_name(filename, ".stl")
+                stl_data = self.read_data_from_stl(stl_path)
+                return_data.update(stl_data)
+            if any(key in self.vtp_file_keys for key in self.keys_to_read):
+                vtp_path = self.get_file_name(filename, ".vtp")
+                vtp_data = self.read_data_from_vtp(vtp_path)
+                return_data.update(vtp_data)
+            if any(key in self.vtu_file_keys for key in self.keys_to_read):
+                raise NotImplementedError("VTU files are not supported yet.")
+
+            return return_data
+
+        def read_file_sharded(
+            self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
+        ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+            """
+            Read a file and return a dictionary of tensors.
+            """
+            raise NotImplementedError("Not implemented yet.")
+
+        def read_data_from_stl(
+            self,
+            stl_path: str,
+        ) -> dict:
+            """
+            Reads surface mesh data from an STL file and prepares a batch dictionary for inference.
+
+            Args:
+                stl_path (str): Path to the STL file.
+
+            Returns:
+                dict: Batch dictionary with mesh faces and coordinates as torch tensors.
+            """
+
+            mesh = pv.read(stl_path)
+
+            batch = {}
+
+            faces = mesh.faces.reshape(-1, 4)
+            faces = faces[:, 1:]
+
+            batch["stl_faces"] = faces.flatten()
+
+            batch["stl_coordinates"] = mesh.points
+            batch["surface_normals"] = mesh.cell_normals
+
+            batch = {k: torch.from_numpy(v) for k, v in batch.items()}
+
+            return batch
+
+        def read_data_from_vtp(self, vtp_path: str) -> dict:
+            """
+            Read vtp file from a file
+            """
+
+            raise NotImplementedError("Not implemented yet.")
 
 
 if TENSORSTORE_AVAILABLE:
@@ -253,7 +365,20 @@ class TensorStoreZarrReader(BackendReader):
         Null reader for tensorstore zarr files.
         """
 
-        pass
+        def __init__(self, keys_to_read: list[str] | None) -> None:
+            # Raise an exception on construction if we get here:
+            raise NotImplementedError(
+                "TensorStoreZarrReader is not available without tensorstore.  `pip install tensorstore`."
+            )
+
+
+def is_vtk_directory(file: pathlib.Path) -> bool:
+    """
+    Check if a file is a vtk directory.
+    """
+    return file.is_dir() and all(
+        [f.suffix in [".vtp", ".stl", ".vtu", ".vtk", ".csv"] for f in file.iterdir()]
+    )
 
 
 class DrivaerMLDataset:
@@ -371,9 +496,13 @@ def _infer_file_type_and_filenames(
             else:
                 file_reader = ZarrFileReader(self._keys_to_read)
             return file_reader, files
+        elif all(is_vtk_directory(file) for file in files):
+            file_reader = VTKFileReader(self._keys_to_read)
+            return file_reader, files
+            # Each "file" here is a directory of .vtp, stl, etc.
         else:
             # TODO - support folders of stl, vtp, vtu.
-            raise ValueError(f"Unsupported file type: {files}")
+            raise ValueError(f"Unsupported file type: {files[0]}")
 
     def _move_to_gpu(
         self, data: dict[str, torch.Tensor], idx: int
diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index 3abb968c5a..336a411497 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -483,7 +483,7 @@ def shuffle_array(
         for w, p in zip(chunk_weights, points_per_chunk)
     ]
 
-    # Stich the chunks back together:
+    # Stitch the chunks back together:
     idx = torch.cat(idx_chunks)
 
     # Apply the selection:
@@ -870,3 +870,71 @@ def solution_weighted_shuffle_array(
     sampling_probabilities /= sampling_probabilities.sum()  # Normalize to sum to 1
 
     return shuffle_array(arr, n_points, sampling_probabilities)
+
+
+def sample_points_on_mesh(
+    mesh_coordinates: torch.Tensor,
+    mesh_faces: torch.Tensor,
+    n_points: int,
+    mesh_areas: torch.Tensor | None = None,
+    mesh_normals: torch.Tensor | None = None,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Uniformly sample points on a mesh.
+
+    Will use area-weighted sampling to select mesh regions, then uniform
+    sampling within those triangles.
+    """
+
+    # First, if we don't have the areas, compute them:
+    faces_reshaped = mesh_faces.reshape(-1, 3)
+
+    if mesh_areas is None or mesh_normals is None:
+        # We have to do 90% of the work for both of these,
+        # to get either.  So check at the last minute:
+        faces_reshaped_p0 = faces_reshaped[:, 0]
+        faces_reshaped_p1 = faces_reshaped[:, 1]
+        faces_reshaped_p2 = faces_reshaped[:, 2]
+        d1 = mesh_coordinates[faces_reshaped_p1] - mesh_coordinates[faces_reshaped_p0]
+        d2 = mesh_coordinates[faces_reshaped_p2] - mesh_coordinates[faces_reshaped_p0]
+        inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
+        normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1)
+        inferred_mesh_normals = inferred_mesh_normals / normals_norm.unsqueeze(1)
+        if mesh_normals is None:
+            mesh_normals = inferred_mesh_normals
+        if mesh_areas is None:
+            mesh_areas = 0.5 * normals_norm
+
+    # Next, use the areas to compute a weighted sampling of the triangles:
+    target_triangles = torch.multinomial(
+        mesh_areas,
+        n_points,
+        replacement=True,
+    )
+
+    target_faces = faces_reshaped[target_triangles]
+
+    # Next, generate random points within each selected triangle.
+    # We'll map two uniform distributions to the points in the triangles.
+    # See https://stackoverflow.com/questions/47410054/generate-random-locations-within-a-triangular-domain
+    # and the original reference https://www.cs.princeton.edu/%7Efunk/tog02.pdf
+    # for more information
+    r1 = torch.rand((n_points, 1), device=mesh_coordinates.device)
+    r2 = torch.rand((n_points, 1), device=mesh_coordinates.device)
+
+    s1 = torch.sqrt(r1)
+
+    local_coords = torch.stack(
+        (1.0 - s1, (1.0 - r2) * s1, r2 * s1),
+        dim=1,
+    )
+
+    barycentric_coordinates = torch.sum(
+        mesh_coordinates[target_faces] * local_coords, dim=1
+    )
+
+    # Apply the selection to the other tensors, too:
+    target_areas = mesh_areas[target_triangles]
+    target_normals = mesh_normals[target_triangles]
+
+    return barycentric_coordinates, target_triangles, target_areas, target_normals

From 471aae99866d6eec401ac1d130a4e14e60075bae Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Sep 2025 01:10:05 +0000
Subject: [PATCH 20/98] Add revamped inference script.  Doesn't write outputs
 yet.

---
 .../external_aerodynamics/domino/README.md    |  74 +++-
 .../domino/src/inference_on_stl2.py           | 378 ++++++++++++++++++
 .../external_aerodynamics/domino/src/train.py |  16 +-
 3 files changed, 460 insertions(+), 8 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py

diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md
index d66d42f45a..470a6a5f46 100644
--- a/examples/cfd/external_aerodynamics/domino/README.md
+++ b/examples/cfd/external_aerodynamics/domino/README.md
@@ -77,19 +77,24 @@ please refer to their [paper](https://arxiv.org/pdf/2408.11969).
 
 #### Data Preprocessing
 
-`PhysicsNeMo` has a related project to help with data processing, called [PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator).
+`PhysicsNeMo` has a related project to help with data processing, called
+[PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator).
 Using `PhysicsNeMo-Curator`, the data needed to train a DoMINO model can be setup easily.
-Please refer to [these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator)
+Please refer to
+[these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator)
 with `PhysicsNeMo-Curator`.
 
-Download the DrivAer ML dataset using the [provided instructions in PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md#download-drivaerml-dataset).
+Download the DrivAer ML dataset using the
+[provided instructions in PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md#download-drivaerml-dataset).
 The first step for running the DoMINO pipeline requires processing the raw data
 (vtp, vtu and stl) into either Zarr or NumPy format for training.
 Each of the raw simulations files are downloaded in `vtp`, `vtu` and `stl` formats.
 For instructions on running data processing to produce a DoMINO training ready dataset,
-please refer to [How-to Curate data for DoMINO Model](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md).
+please refer to
+[How-to Curate data for DoMINO Model](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md).
 
-Caching is implemented in [`CachedDoMINODataset`](https://github.com/NVIDIA/physicsnemo/blob/main/physicsnemo/datapipes/cae/domino_datapipe.py#L1250).
+Caching is implemented in
+[`CachedDoMINODataset`](https://github.com/NVIDIA/physicsnemo/blob/main/physicsnemo/datapipes/cae/domino_datapipe.py#L1250).
 Optionally, users can run `cache_data.py` to save outputs
 of DoMINO datapipe in the `.npy` files. The DoMINO datapipe is set up to calculate
 Signed Distance Field and Nearest Neighbor interpolations on-the-fly during
@@ -101,6 +106,36 @@ processed files.
 The final processed dataset should be divided and saved into 2 directories,
 for training and validation.
 
+#### Data Scaling factors
+
+DoMINO has several data-specific configuration tools that rely on some
+knowledge of the dataset:
+
+- The output fields (the labels) are normalized during training to a mean
+  of zero and a standard deviation of one, averaged over the dataset.
+  The scaling is controlled by passing the `volume_factors` and
+  `surface_factors` values to the datapipe.
+- The input locations are scaled by, and optionally cropped to, used defined
+  bounding boxes for both surface and volume.  Whether cropping occurs, or not,
+  is controlled by the `sample_in_bbox` value of the datapipe.  Normalization
+  to the bounding box is enabled with `normalize_coordinates`.  By default,
+  both are set to true.  The value of the boxes are configured in the
+  `config.yaml` file, and are configured separately for surface and volume.
+
+> Note: The datapipe module has a helper function `create_domino_dataset`
+> with sensible defaults to help create a Domino Datapipe.
+
+To facilitate setting reasonable values of these, you can use the
+`compute_statistics.py` script.  This will load the core dataset as defined
+in your `config.yaml` file, loop over several events (20, by default), and
+both print and store the surface/volume field statistics as well as the
+coordinate statistics.  
+
+> Note that, for volumetric fields especially, the min/max found may be
+> significantly outside the surface region.  Many simulations extend volumetric
+> sampling to far field, and you may instead want to crop significant amounts
+> of volumetric distance.
+
 #### Training
 
 Specify the training and validation data paths, bounding box sizes etc. in the
@@ -189,6 +224,35 @@ launch overhead at the cost of more memory use.  For non-sharded
 training, the `two-loop` setting is more optimal. The difference in `one-loop`
 or `two-loop` is purely computational, not algorithmic.
 
+### Performance Optimizations
+
+The training and inference scripts for DoMINO contain several performance
+enhancements to accelerate the training and usage of the model. In this
+section we'll highlight several of them, as well as how to customize them
+if needed.
+
+#### Memory Pool Optimizations
+
+The preprocessor of DoMINO requires a computation of k Nearest Neighbors,
+which is accelerated via the `cuml` Neighbors tool.  By default, `cuml` and
+`torch` both use memory allocation pools to speed up allocating tensors, but
+they do not use the same pool.  This means that during preprocessing, it's
+possible for the kNN operation to spend a significant amount of time in
+memory allocations - and further, it limits the available memory to `torch`.
+
+To mitigate this, by default in DoMINO we use the Rapids Memory Manager
+([`rmm`](https://github.com/rapidsai/rmm)).  If, for some reason, you wish
+to disable this you can do so with an environment variable:
+
+```bash
+export DOMINO_DISABLE_RMM=True
+```
+
+> Note - why not make it configurable?  We have to set up the shared memory
+> pool allocation very early in the program, before the config has even
+> been read.  So, we enable by default and the opt-out path is via the
+> environment.
+
 ### Training with Physics Losses
 
 DoMINO supports enforcing of PDE residuals as soft constraints. This can be used
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
new file mode 100644
index 0000000000..7b4f71b507
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
@@ -0,0 +1,378 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code defines a distributed pipeline for training the DoMINO model on
+CFD datasets. It includes the computation of scaling factors, instantiating
+the DoMINO model and datapipe, automatically loading the most recent checkpoint,
+training the model in parallel using DistributedDataParallel across multiple
+GPUs, calculating the loss and updating model parameters using mixed precision.
+This is a common recipe that enables training of combined models for surface and
+volume as well either of them separately. Validation is also conducted every epoch,
+where predictions are compared against ground truth values. The code logs training
+and validation metrics to TensorBoard. The train tab in config.yaml can be used to
+specify batch size, number of epochs and other training parameters.
+"""
+
+import time
+import os
+import re
+from typing import Literal, Any
+
+import apex
+import numpy as np
+import hydra
+from hydra.utils import to_absolute_path
+from omegaconf import DictConfig, OmegaConf
+import torch
+
+DISABLE_RMM = os.environ.get("DISABLE_RMM", "False")
+if not DISABLE_RMM:
+    import rmm
+    from rmm.allocators.torch import rmm_torch_allocator
+
+    rmm.reinitialize(pool_allocator=True)
+    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+
+import torchinfo
+import torch.distributed as dist
+from torch.amp import GradScaler, autocast
+from torch.nn.parallel import DistributedDataParallel
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from torch.utils.tensorboard import SummaryWriter
+from nvtx import annotate as nvtx_annotate
+import torch.cuda.nvtx as nvtx
+
+
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
+from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
+
+from physicsnemo.datapipes.cae.domino_datapipe2 import (
+    DoMINODataPipe,
+    create_domino_dataset,
+)
+from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
+    DrivaerMLDataset,
+)
+
+from physicsnemo.models.domino.model import DoMINO
+from physicsnemo.utils.domino.utils import sample_points_on_mesh
+
+from utils import ScalingFactors
+
+# This is included for GPU memory tracking:
+from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
+import time
+
+
+# Initialize NVML
+nvmlInit()
+
+
+from physicsnemo.utils.profiling import profile, Profiler
+
+
+# Profiler().enable("torch")
+# Profiler().initialize()
+
+from loss import compute_loss_dict
+from utils import get_num_vars
+
+
+def inference_epoch(
+    dataset: DrivaerMLDataset,
+    sampler: DistributedSampler,
+    datapipe: DoMINODataPipe,
+    model: DoMINO,
+    gpu_handle: int,
+    device: torch.device,
+    logger: PythonLogger,
+    batch_size: int = 1_024_000,
+    total_points: int = 1_024_000,
+) -> float:
+    epoch_indices = list(sampler)
+
+    # n_steps = total_points // batch_size
+    # if n_steps * batch_size < total_points:
+    #     n_steps += 1
+    #     last_batch_size = total_points - n_steps * batch_size
+
+    # Assuming here there are more than two target meshes:
+    dataset.preload(epoch_indices[0])
+    dataset.preload(epoch_indices[1])
+    for i_batch, epoch_index in enumerate(epoch_indices):
+        # Do some preloading of input data:
+
+        data_time_start = time.perf_counter()
+        if i_batch + 2 < len(epoch_indices):
+            # Preload next next
+            dataset.preload(epoch_indices[i_batch + 2])
+        # Get the data for this index:
+        sample_batched = dataset[epoch_index]
+        data_time_end = time.perf_counter()
+        print(f"Data {i_batch} time: {data_time_end - data_time_start:.3f} seconds")
+        procesing_time_start = time.perf_counter()
+        # We always need these keys, but are only reading the faces and coordinates
+        # which saves on IO speed.
+        # "stl_coordinates", "stl_centers", "stl_faces", "stl_areas"
+
+        # So, do the computation of the areas and centers:
+        # Center is a mean of the 3 vertices
+        triangle_vertices = sample_batched["stl_coordinates"][
+            sample_batched["stl_faces"].reshape((-1, 3))
+        ]
+        sample_batched["stl_centers"] = triangle_vertices.mean(dim=-1)
+        # Area we compute from the cross product of two sides:
+        d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0]
+        d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0]
+        inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
+        normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1)
+        sample_batched["stl_areas"] = 0.5 * normals_norm
+
+        for i in range(10):
+            batch_time_start = time.perf_counter()
+            # Now that we have the meshes, begin to build a batch of data up for preprocessing:
+            sampled_points, sampled_faces, sampled_areas, sampled_normals = (
+                sample_points_on_mesh(
+                    sample_batched["stl_coordinates"],
+                    sample_batched["stl_faces"],
+                    batch_size,
+                    mesh_normals=sample_batched["surface_normals"],
+                    mesh_areas=sample_batched["stl_areas"],
+                )
+            )
+
+            # Build up volume points too:
+            c_min = datapipe.config.bounding_box_dims[1]
+            c_max = datapipe.config.bounding_box_dims[0]
+
+            sampled_volume_points = (c_max - c_min) * torch.rand(
+                batch_size, 3, device=device, dtype=torch.float32
+            ) + c_min
+
+            inference_dict = {
+                "stl_coordinates": sample_batched["stl_coordinates"],
+                "stl_faces": sample_batched["stl_faces"],
+                "stl_centers": sample_batched["stl_centers"],
+                "stl_areas": sample_batched["stl_areas"],
+                "surface_mesh_centers": sampled_points,
+                "surface_normals": sampled_normals,
+                "surface_areas": sampled_areas,
+                "surface_faces": sampled_faces,
+                "volume_mesh_centers": sampled_volume_points,
+            }
+
+            preprocessed_data = datapipe.process_data(inference_dict, i_batch)
+
+            # Add a batch dimension to the data_dict
+            preprocessed_data = {
+                k: v.unsqueeze(0) for k, v in preprocessed_data.items()
+            }
+
+            with torch.no_grad():
+                output_data = model(preprocessed_data)
+
+            batch_time_end = time.perf_counter()
+            points_per_second = batch_size / (batch_time_end - batch_time_start)
+            print(
+                f"Batch {i} in {i_batch} time: {batch_time_end - batch_time_start:.3f} seconds, {points_per_second:.3f} points per second"
+            )
+        procesing_time_end = time.perf_counter()
+        print(
+            f"Processing {i_batch} time: {procesing_time_end - procesing_time_start:.3f} seconds"
+        )
+        if i_batch > 20:
+            break
+        print(sample_batched.keys())
+
+    return 0.0
+
+
+@hydra.main(version_base="1.3", config_path="conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+    ################################
+    # initialize distributed manager
+    ################################
+    DistributedManager.initialize()
+    dist = DistributedManager()
+
+    ################################
+    # Initialize NVML
+    ################################
+    nvmlInit()
+    gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index)
+
+    ################################
+    # Initialize logger
+    ################################
+
+    logger = PythonLogger("Train")
+    logger = RankZeroLoggingWrapper(logger, dist)
+
+    logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
+
+    ################################
+    # Get scaling factors
+    ################################
+    pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
+
+    try:
+        scaling_factors = ScalingFactors.load(pickle_path)
+        logger.info(f"Scaling factors loaded from: {pickle_path}")
+    except FileNotFoundError:
+        raise FileNotFoundError(
+            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
+        )
+
+    model_type = cfg.model.model_type
+
+    num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type)
+
+    if model_type == "combined" or model_type == "surface":
+        surface_variable_names = list(cfg.variables.surface.solution.keys())
+    else:
+        surface_variable_names = []
+
+    if model_type == "combined" or model_type == "volume":
+        volume_variable_names = list(cfg.variables.volume.solution.keys())
+    else:
+        volume_variable_names = []
+
+    vol_factors = scaling_factors.mean["volume_fields"]
+    surf_factors = scaling_factors.mean["surface_fields"]
+    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
+
+    bounding_box = None
+
+    # Override the model type
+    # For the inference pipeline, we adjust the tooling a little for the data.
+    # We use only a bare STL dataset that will read the mesh coordinates
+    # and triangle definitions.  We'll compute the centers and normals
+    # on the GPU (instead of on the CPU, as pyvista would do) and
+    # then we can sample from that mesh on the GPU.
+    test_dataset = DrivaerMLDataset(
+        data_dir=cfg.eval.test_path,
+        keys_to_read=[
+            "stl_coordinates",
+            "stl_faces",
+        ],
+        output_device=dist.device,
+    )
+
+    # Volumetric data will be generated on the fly on the GPU.
+
+    # We _won't_ iterate over the datapipe, however, we can use the
+    # datapipe processing tools on the sampled surface and
+    overrides = {}
+    if hasattr(cfg.data, "gpu_preprocessing"):
+        overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing
+
+    if hasattr(cfg.data, "gpu_output"):
+        overrides["gpu_output"] = cfg.data.gpu_output
+
+    test_datapipe = DoMINODataPipe(
+        None,
+        phase="test",
+        grid_resolution=cfg.model.interp_res,
+        volume_variables=volume_variable_names,
+        surface_variables=surface_variable_names,
+        normalize_coordinates=True,
+        sampling=False,
+        sample_in_bbox=True,
+        volume_points_sample=None,
+        surface_points_sample=None,
+        geom_points_sample=None,
+        positional_encoding=cfg.model.positional_encoding,
+        volume_factors=vol_factors,
+        surface_factors=surf_factors,
+        scaling_type=cfg.model.normalization,
+        model_type=model_type,
+        bounding_box_dims=cfg.data.bounding_box,
+        bounding_box_dims_surf=cfg.data.bounding_box_surface,
+        num_surface_neighbors=cfg.model.num_neighbors_surface,
+        resample_surfaces=cfg.model.resampling_surface_mesh.resample,
+        resampling_points=cfg.model.resampling_surface_mesh.points,
+        surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
+        **overrides,
+    )
+
+    test_sampler = DistributedSampler(
+        test_dataset,
+        num_replicas=dist.world_size,
+        rank=dist.rank,
+        **cfg.train.sampler,
+    )
+
+    model = DoMINO(
+        input_features=3,
+        output_features_vol=num_vol_vars,
+        output_features_surf=num_surf_vars,
+        global_features=num_global_features,
+        model_parameters=cfg.model,
+    ).to(dist.device)
+    # model = torch.compile(model, fullgraph=True, dynamic=True)  # TODO make this configurable
+
+    # Print model summary (structure and parmeter count).
+    logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n")
+
+    writer = SummaryWriter(os.path.join(cfg.output, "tensorboard"))
+
+    model_save_path = os.path.join(cfg.output, "models")
+    param_save_path = os.path.join(cfg.output, "param")
+    best_model_path = os.path.join(model_save_path, "best_model")
+
+    if dist.world_size > 1:
+        torch.distributed.barrier()
+
+    load_checkpoint(
+        to_absolute_path(cfg.resume_dir),
+        models=model,
+        device=dist.device,
+    )
+
+    initial_integral_factor_orig = cfg.model.integral_loss_scaling_factor
+
+    start_time = time.perf_counter()
+
+    # This controls what indices to use for each epoch.
+    test_sampler.set_epoch(0)
+
+    initial_integral_factor = initial_integral_factor_orig
+
+    model.eval()
+    epoch_start_time = time.perf_counter()
+    inference_epoch(
+        dataset=test_dataset,
+        sampler=test_sampler,
+        datapipe=test_datapipe,
+        model=model,
+        logger=logger,
+        gpu_handle=gpu_handle,
+        device=dist.device,
+    )
+    epoch_end_time = time.perf_counter()
+    logger.info(
+        f"Device {dist.device}, Epoch took {epoch_end_time - epoch_start_time:.3f} seconds"
+    )
+
+
+if __name__ == "__main__":
+    # Profiler().enable("torch")
+    # Profiler().initialize()
+    main()
+    # Profiler().finalize()
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 2ff363e40a..5a155fc198 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -30,9 +30,6 @@
 import time
 import os
 import re
-import torch
-import torchinfo
-
 from typing import Literal, Any
 
 import apex
@@ -40,6 +37,18 @@
 import hydra
 from hydra.utils import to_absolute_path
 from omegaconf import DictConfig, OmegaConf
+
+
+DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", "False")
+if not DISABLE_RMM:
+    import rmm
+    from rmm.allocators.torch import rmm_torch_allocator
+    import torch
+
+    rmm.reinitialize(pool_allocator=True)
+    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+
+import torchinfo
 import torch.distributed as dist
 from torch.amp import GradScaler, autocast
 from torch.nn.parallel import DistributedDataParallel
@@ -67,6 +76,7 @@
 from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
 import time
 
+
 # Initialize NVML
 nvmlInit()
 

From 386483013a3d2856e28f4b4f10372306ff2a4a5d Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Sep 2025 15:11:44 +0000
Subject: [PATCH 21/98] Update inference script for Domino STL inference.

---
 .../external_aerodynamics/domino/README.md    |   2 +-
 .../domino/src/inference_on_stl2.py           | 261 ++++++++++++++----
 physicsnemo/datapipes/cae/domino_datapipe2.py |  34 +++
 physicsnemo/utils/domino/utils.py             |   1 -
 4 files changed, 237 insertions(+), 61 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md
index 470a6a5f46..a786772071 100644
--- a/examples/cfd/external_aerodynamics/domino/README.md
+++ b/examples/cfd/external_aerodynamics/domino/README.md
@@ -113,7 +113,7 @@ knowledge of the dataset:
 
 - The output fields (the labels) are normalized during training to a mean
   of zero and a standard deviation of one, averaged over the dataset.
-  The scaling is controlled by passing the `volume_factors` and
+  The scaling is controlled by passing the `volume_factors` andg
   `surface_factors` values to the datapipe.
 - The input locations are scaled by, and optionally cropped to, used defined
   bounding boxes for both surface and volume.  Whether cropping occurs, or not,
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
index 7b4f71b507..9a44f996e9 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
@@ -57,7 +57,6 @@
 from nvtx import annotate as nvtx_annotate
 import torch.cuda.nvtx as nvtx
 
-
 from physicsnemo.distributed import DistributedManager
 from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
@@ -87,9 +86,6 @@
 from physicsnemo.utils.profiling import profile, Profiler
 
 
-# Profiler().enable("torch")
-# Profiler().initialize()
-
 from loss import compute_loss_dict
 from utils import get_num_vars
 
@@ -102,9 +98,15 @@ def inference_epoch(
     gpu_handle: int,
     device: torch.device,
     logger: PythonLogger,
-    batch_size: int = 1_024_000,
+    batch_size: int = 24_000,
     total_points: int = 1_024_000,
 ) -> float:
+    ######################################################
+    # Inference can run in a distributed way by coordinating
+    # the indices for each rank, which the sampler does
+    ######################################################
+
+    # Convert the indices right to a list:
     epoch_indices = list(sampler)
 
     # n_steps = total_points // batch_size
@@ -112,41 +114,73 @@ def inference_epoch(
     #     n_steps += 1
     #     last_batch_size = total_points - n_steps * batch_size
 
-    # Assuming here there are more than two target meshes:
+    ######################################################
+    # Assuming here there are more than two target meshes
+    # This will get the IO pipe running in the background
+    # While we process a dataset.
+    ######################################################
     dataset.preload(epoch_indices[0])
     dataset.preload(epoch_indices[1])
-    for i_batch, epoch_index in enumerate(epoch_indices):
-        # Do some preloading of input data:
 
-        data_time_start = time.perf_counter()
+    for i_batch, epoch_index in enumerate(epoch_indices):
+        batch_start_time = time.perf_counter()
+        ######################################################
+        # Put another example in the preload queue while this
+        # batch is processed
+        ######################################################
+        data_loading_start = time.perf_counter()
         if i_batch + 2 < len(epoch_indices):
             # Preload next next
             dataset.preload(epoch_indices[i_batch + 2])
+
+        ######################################################
         # Get the data for this index:
+        ######################################################
         sample_batched = dataset[epoch_index]
-        data_time_end = time.perf_counter()
-        print(f"Data {i_batch} time: {data_time_end - data_time_start:.3f} seconds")
+        dataloading_time = time.perf_counter() - data_loading_start
+
+        logger.info(
+            f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds"
+        )
+
         procesing_time_start = time.perf_counter()
-        # We always need these keys, but are only reading the faces and coordinates
-        # which saves on IO speed.
-        # "stl_coordinates", "stl_centers", "stl_faces", "stl_areas"
 
-        # So, do the computation of the areas and centers:
+        ######################################################
+        # The IO only reads in "stl_faces" and "stl_coordinates".
+        # "stl_areas" and "stl_centers" would be computed by
+        # pyvista on CPU - instead, we do it on the GPU
+        # right here.
+        ######################################################
+
         # Center is a mean of the 3 vertices
         triangle_vertices = sample_batched["stl_coordinates"][
             sample_batched["stl_faces"].reshape((-1, 3))
         ]
         sample_batched["stl_centers"] = triangle_vertices.mean(dim=-1)
+        ######################################################
         # Area we compute from the cross product of two sides:
+        ######################################################
         d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0]
         d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0]
         inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
         normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1)
         sample_batched["stl_areas"] = 0.5 * normals_norm
 
-        for i in range(10):
-            batch_time_start = time.perf_counter()
-            # Now that we have the meshes, begin to build a batch of data up for preprocessing:
+        ######################################################
+        # For computing the points, we take those stl objects,
+        # sample in chunks of `batch_size` until we've
+        # accumulated `total_points` predictions.
+        ######################################################
+
+        batch_output_dict = {}
+        N = 2
+        total_points_processed = 0
+        while total_points_processed < total_points:
+            inner_loop_start_time = time.perf_counter()
+
+            ######################################################
+            # This function will sample points on the STL surface
+            ######################################################
             sampled_points, sampled_faces, sampled_areas, sampled_normals = (
                 sample_points_on_mesh(
                     sample_batched["stl_coordinates"],
@@ -157,7 +191,11 @@ def inference_epoch(
                 )
             )
 
-            # Build up volume points too:
+            ######################################################
+            # Build up volume points too with uniform sampling
+            # TODO - this doesn't filter points that are
+            # internal to the mesh
+            ######################################################
             c_min = datapipe.config.bounding_box_dims[1]
             c_max = datapipe.config.bounding_box_dims[0]
 
@@ -165,6 +203,9 @@ def inference_epoch(
                 batch_size, 3, device=device, dtype=torch.float32
             ) + c_min
 
+            ######################################################
+            # Create the dictionary as the preprocessing expects:
+            ######################################################
             inference_dict = {
                 "stl_coordinates": sample_batched["stl_coordinates"],
                 "stl_faces": sample_batched["stl_faces"],
@@ -177,58 +218,113 @@ def inference_epoch(
                 "volume_mesh_centers": sampled_volume_points,
             }
 
+            ######################################################
+            # Pre-process the data with the datapipe:
+            ######################################################
             preprocessed_data = datapipe.process_data(inference_dict, i_batch)
 
+            ######################################################
             # Add a batch dimension to the data_dict
+            # (normally this is added in __getitem__ of the datapipe)
+            ######################################################
             preprocessed_data = {
                 k: v.unsqueeze(0) for k, v in preprocessed_data.items()
             }
 
+            ######################################################
+            # Forward pass through the model:
+            ######################################################
             with torch.no_grad():
-                output_data = model(preprocessed_data)
-
-            batch_time_end = time.perf_counter()
-            points_per_second = batch_size / (batch_time_end - batch_time_start)
-            print(
-                f"Batch {i} in {i_batch} time: {batch_time_end - batch_time_start:.3f} seconds, {points_per_second:.3f} points per second"
+                output_vol, output_surf = model(preprocessed_data)
+
+            ######################################################
+            # unnormalize the outputs with the datapipe
+            # Whatever settings are configured for normalizing the
+            # output fields - even though we don't have ground
+            # truth here - are reused to undo that for the predictions
+            ######################################################
+            output_vol, output_surf = datapipe.unscale_model_outputs(
+                output_vol, output_surf
             )
+
+            ######################################################
+            # Peel off pressure, velocity, nut, shear, etc.
+            # Also compute drag, lift forces.
+            ######################################################
+            # TODO
+            # TODO
+            # TODO
+            # TODO
+            # TODO
+            # TODO
+            # TODO
+
+            total_points_processed += batch_size
+
+            current_loop_time = time.perf_counter()
+
+            gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+            gpu_memory_used = gpu_info.used / (1024**3)
+
+            logging_string = f"Device {device}, batch {i_batch} processed {total_points_processed} points of {total_points}\n"
+            logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
+            logging_string += f"  Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n"
+            logging_string += f"  iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n"
+            logging_string += f"  Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second (includes IO)\n"
+
+            logger.info(logging_string)
+
         procesing_time_end = time.perf_counter()
-        print(
-            f"Processing {i_batch} time: {procesing_time_end - procesing_time_start:.3f} seconds"
+        logger.info(
+            f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds"
+        )
+
+        output_start_time = time.perf_counter()
+        ######################################################
+        # Save the outputs to file:
+        ######################################################
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        output_end_time = time.perf_counter()
+        logger.info(
+            f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds"
         )
-        if i_batch > 20:
-            break
-        print(sample_batched.keys())
 
-    return 0.0
+        if i_batch > 5:
+            break
 
 
 @hydra.main(version_base="1.3", config_path="conf", config_name="config")
 def main(cfg: DictConfig) -> None:
-    ################################
+    ######################################################
     # initialize distributed manager
-    ################################
+    ######################################################
     DistributedManager.initialize()
     dist = DistributedManager()
 
-    ################################
+    ######################################################
     # Initialize NVML
-    ################################
+    ######################################################
     nvmlInit()
     gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index)
 
-    ################################
+    ######################################################
     # Initialize logger
-    ################################
+    ######################################################
 
     logger = PythonLogger("Train")
     logger = RankZeroLoggingWrapper(logger, dist)
 
     logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
 
-    ################################
+    ######################################################
     # Get scaling factors
-    ################################
+    # Likely, you want to reuse the scaling factors from training.
+    ######################################################
     pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
 
     try:
@@ -239,8 +335,13 @@ def main(cfg: DictConfig) -> None:
             f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
         )
 
-    model_type = cfg.model.model_type
+    vol_factors = scaling_factors.mean["volume_fields"]
+    surf_factors = scaling_factors.mean["surface_fields"]
 
+    ######################################################
+    # Configure the model
+    ######################################################
+    model_type = cfg.model.model_type
     num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type)
 
     if model_type == "combined" or model_type == "surface":
@@ -253,11 +354,41 @@ def main(cfg: DictConfig) -> None:
     else:
         volume_variable_names = []
 
-    vol_factors = scaling_factors.mean["volume_fields"]
-    surf_factors = scaling_factors.mean["surface_fields"]
-    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
+    ######################################################
+    # Check that the sample size is equal.
+    # unequal samples could be done but they aren't, here.s
+    ######################################################
+    if cfg.model.model_type == "combined":
+        if cfg.model.volume_points_sample != cfg.model.surface_points_sample:
+            raise ValueError(
+                "Volume and surface points sample must be equal for combined model"
+            )
+
+    # Get the number of sample points:
+    sample_points = (
+        cfg.model.surface_points_sample
+        if cfg.model.model_type == "surface"
+        else cfg.model.volume_points_sample
+    )
 
-    bounding_box = None
+    ######################################################
+    # If the batch size doesn't evenly divide
+    # the num points, that's ok.  But print a warning
+    # that the total points will get tweaked.
+    ######################################################
+    if cfg.eval.num_points % sample_points != 0:
+        logger.warning(
+            f"Batch size {sample_points} doesn't evenly divide num points {cfg.eval.num_points}."
+        )
+        logger.warning(
+            f"Total points will be rounded up to {((cfg.eval.num_points // sample_points) + 1) * sample_points}."
+        )
+
+    ######################################################
+    # Configure the dataset
+    # We are applying preprocessing in a separate step
+    # for this - so the dataset and datapipe are separate
+    ######################################################
 
     # Override the model type
     # For the inference pipeline, we adjust the tooling a little for the data.
@@ -276,8 +407,13 @@ def main(cfg: DictConfig) -> None:
 
     # Volumetric data will be generated on the fly on the GPU.
 
+    ######################################################
+    # Configure the datapipe
     # We _won't_ iterate over the datapipe, however, we can use the
     # datapipe processing tools on the sampled surface and
+    # volume points with the same preprocessing.
+    # It also is used to un-normalize the model outputs.
+    ######################################################
     overrides = {}
     if hasattr(cfg.data, "gpu_preprocessing"):
         overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing
@@ -311,6 +447,10 @@ def main(cfg: DictConfig) -> None:
         **overrides,
     )
 
+    ######################################################
+    # The sampler is used in multi-gpu inference to
+    # coordinate the batches used for each rank.
+    ######################################################
     test_sampler = DistributedSampler(
         test_dataset,
         num_replicas=dist.world_size,
@@ -318,6 +458,10 @@ def main(cfg: DictConfig) -> None:
         **cfg.train.sampler,
     )
 
+    ######################################################
+    # Configure the model
+    # and move it to the device.
+    ######################################################
     model = DoMINO(
         input_features=3,
         output_features_vol=num_vol_vars,
@@ -330,12 +474,6 @@ def main(cfg: DictConfig) -> None:
     # Print model summary (structure and parmeter count).
     logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n")
 
-    writer = SummaryWriter(os.path.join(cfg.output, "tensorboard"))
-
-    model_save_path = os.path.join(cfg.output, "models")
-    param_save_path = os.path.join(cfg.output, "param")
-    best_model_path = os.path.join(model_save_path, "best_model")
-
     if dist.world_size > 1:
         torch.distributed.barrier()
 
@@ -354,17 +492,22 @@ def main(cfg: DictConfig) -> None:
 
     initial_integral_factor = initial_integral_factor_orig
 
+    prof = Profiler()
+
     model.eval()
     epoch_start_time = time.perf_counter()
-    inference_epoch(
-        dataset=test_dataset,
-        sampler=test_sampler,
-        datapipe=test_datapipe,
-        model=model,
-        logger=logger,
-        gpu_handle=gpu_handle,
-        device=dist.device,
-    )
+    with prof:
+        inference_epoch(
+            dataset=test_dataset,
+            sampler=test_sampler,
+            datapipe=test_datapipe,
+            model=model,
+            logger=logger,
+            gpu_handle=gpu_handle,
+            device=dist.device,
+            batch_size=sample_points,
+            total_points=cfg.eval.num_points,
+        )
     epoch_end_time = time.perf_counter()
     logger.info(
         f"Device {dist.device}, Epoch took {epoch_end_time - epoch_start_time:.3f} seconds"
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 18d198f457..f5ed3693ac 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -51,6 +51,8 @@
     pad,
     shuffle_array,
     standardize,
+    unnormalize,
+    unstandardize,
 )
 from physicsnemo.utils.neighbors import knn
 from physicsnemo.utils.profiling import profile
@@ -808,6 +810,38 @@ def process_data(self, data_dict, idx: int):
 
         return return_dict
 
+    def unscale_model_outputs(
+        self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None
+    ):
+        """
+        Unscale the model outputs based on the configured scaling factors.
+
+        The unscaling is included here to make it a consistent interface regardless
+        of the scaling factors and type used.
+
+        """
+
+        if volume_fields is not None:
+            if self.config.scaling_type == "mean_std_scaling":
+                vol_mean = self.config.volume_factors[0]
+                vol_std = self.config.volume_factors[1]
+                volume_fields = unstandardize(volume_fields, vol_mean, vol_std)
+            elif self.config.scaling_type == "min_max_scaling":
+                vol_min = self.config.volume_factors[1]
+                vol_max = self.config.volume_factors[0]
+                volume_fields = unnormalize(volume_fields, vol_max, vol_min)
+        if surface_fields is not None:
+            if self.config.scaling_type == "mean_std_scaling":
+                surf_mean = self.config.surface_factors[0]
+                surf_std = self.config.surface_factors[1]
+                surface_fields = unstandardize(surface_fields, surf_mean, surf_std)
+            elif self.config.scaling_type == "min_max_scaling":
+                surf_min = self.config.surface_factors[1]
+                surf_max = self.config.surface_factors[0]
+                surface_fields = unnormalize(surface_fields, surf_max, surf_min)
+
+        return volume_fields, surface_fields
+
     def __getitem__(self, idx):
         """
         Function for fetching and processing a single file's data.
diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index 336a411497..95a7011976 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -256,7 +256,6 @@ def calculate_normal_positional_encoding(
         pos_x = torch.cat(calculate_pos_encoding(normals[:, 0] / dx, d=4), dim=-1)
         pos_y = torch.cat(calculate_pos_encoding(normals[:, 1] / dy, d=4), dim=-1)
         pos_z = torch.cat(calculate_pos_encoding(normals[:, 2] / dz, d=4), dim=-1)
-        print(pos_x.shape, pos_y.shape, pos_z.shape)
         pos_normals = torch.cat((pos_x, pos_y, pos_z), dim=0).reshape(-1, 12)
 
     return pos_normals

From 0635e4d5d94967ac428b636cdca8ea3aaa8ca8db Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Sep 2025 15:20:32 +0000
Subject: [PATCH 22/98] Minor tweaks to the inference script.

---
 .../domino/src/inference_on_stl2.py               | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
index 9a44f996e9..1f4c2a6305 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
@@ -223,6 +223,17 @@ def inference_epoch(
             ######################################################
             preprocessed_data = datapipe.process_data(inference_dict, i_batch)
 
+            ######################################################
+            # Use the sign of the volume SDF to filter out points
+            # That are inside the STL mesh
+            ######################################################
+            # TODO
+            # TODO
+            # TODO
+            # TODO
+            # TODO
+            # TODO
+
             ######################################################
             # Add a batch dimension to the data_dict
             # (normally this is added in __getitem__ of the datapipe)
@@ -483,15 +494,11 @@ def main(cfg: DictConfig) -> None:
         device=dist.device,
     )
 
-    initial_integral_factor_orig = cfg.model.integral_loss_scaling_factor
-
     start_time = time.perf_counter()
 
     # This controls what indices to use for each epoch.
     test_sampler.set_epoch(0)
 
-    initial_integral_factor = initial_integral_factor_orig
-
     prof = Profiler()
 
     model.eval()

From db8cc984c409a65577b94eba0b14d87ac0d2fe4a Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Sep 2025 15:26:56 +0000
Subject: [PATCH 23/98] Mark the docstring for updating.

---
 .../external_aerodynamics/domino/src/inference_on_stl2.py  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
index 1f4c2a6305..48e4f1ebc2 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
@@ -14,6 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# TODO
+# TODO
+# TODO
+# Update this
+# TODO
+# TODO
+# TODO
 """
 This code defines a distributed pipeline for training the DoMINO model on
 CFD datasets. It includes the computation of scaling factors, instantiating

From 2a190eb0efc9520e159bd98ecf1a75b153d0969b Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Sep 2025 16:32:53 +0000
Subject: [PATCH 24/98] Spin off the stl sampling and inference loop into it's
 own function, for ease in downstream apps with only one stl.

---
 .../domino/src/inference_on_stl2.py           | 308 ++++++++++--------
 physicsnemo/datapipes/cae/domino_datapipe2.py |   5 +-
 2 files changed, 166 insertions(+), 147 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
index 48e4f1ebc2..676f146b89 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
@@ -97,17 +97,170 @@
 from utils import get_num_vars
 
 
+def inference_on_single_stl(
+    stl_coordinates: torch.Tensor,
+    stl_faces: torch.Tensor,
+    model: DoMINO,
+    datapipe: DoMINODataPipe,
+    batch_size: int,
+    total_points: int,
+    gpu_handle: int | None = None,
+    logger: PythonLogger | None = None,
+):
+    device = stl_coordinates.device
+    batch_start_time = time.perf_counter()
+    ######################################################
+    # The IO only reads in "stl_faces" and "stl_coordinates".
+    # "stl_areas" and "stl_centers" would be computed by
+    # pyvista on CPU - instead, we do it on the GPU
+    # right here.
+    ######################################################
+
+    # Center is a mean of the 3 vertices
+    triangle_vertices = stl_coordinates[stl_faces.reshape((-1, 3))]
+    stl_centers = triangle_vertices.mean(dim=-1)
+    ######################################################
+    # Area we compute from the cross product of two sides:
+    ######################################################
+    d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0]
+    d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0]
+    inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
+    normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1)
+    inferred_mesh_normals = inferred_mesh_normals / normals_norm.unsqueeze(1)
+    stl_areas = 0.5 * normals_norm
+
+    ######################################################
+    # For computing the points, we take those stl objects,
+    # sample in chunks of `batch_size` until we've
+    # accumulated `total_points` predictions.
+    ######################################################
+
+    batch_output_dict = {}
+    N = 2
+    total_points_processed = 0
+    while total_points_processed < total_points:
+        inner_loop_start_time = time.perf_counter()
+
+        ######################################################
+        # This function will sample points on the STL surface
+        ######################################################
+        sampled_points, sampled_faces, sampled_areas, sampled_normals = (
+            sample_points_on_mesh(
+                stl_coordinates,
+                stl_faces,
+                batch_size,
+                mesh_normals=inferred_mesh_normals,
+                mesh_areas=stl_areas,
+            )
+        )
+
+        ######################################################
+        # Build up volume points too with uniform sampling
+        # TODO - this doesn't filter points that are
+        # internal to the mesh
+        ######################################################
+        c_min = datapipe.config.bounding_box_dims[1]
+        c_max = datapipe.config.bounding_box_dims[0]
+
+        sampled_volume_points = (c_max - c_min) * torch.rand(
+            batch_size, 3, device=device, dtype=torch.float32
+        ) + c_min
+
+        ######################################################
+        # Create the dictionary as the preprocessing expects:
+        ######################################################
+        inference_dict = {
+            "stl_coordinates": stl_coordinates,
+            "stl_faces": stl_faces,
+            "stl_centers": stl_centers,
+            "stl_areas": stl_areas,
+            "surface_mesh_centers": sampled_points,
+            "surface_normals": sampled_normals,
+            "surface_areas": sampled_areas,
+            "surface_faces": sampled_faces,
+            "volume_mesh_centers": sampled_volume_points,
+        }
+
+        ######################################################
+        # Pre-process the data with the datapipe:
+        ######################################################
+        preprocessed_data = datapipe.process_data(inference_dict)
+
+        ######################################################
+        # Use the sign of the volume SDF to filter out points
+        # That are inside the STL mesh
+        ######################################################
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+
+        ######################################################
+        # Add a batch dimension to the data_dict
+        # (normally this is added in __getitem__ of the datapipe)
+        ######################################################
+        preprocessed_data = {k: v.unsqueeze(0) for k, v in preprocessed_data.items()}
+
+        ######################################################
+        # Forward pass through the model:
+        ######################################################
+        with torch.no_grad():
+            output_vol, output_surf = model(preprocessed_data)
+
+        ######################################################
+        # unnormalize the outputs with the datapipe
+        # Whatever settings are configured for normalizing the
+        # output fields - even though we don't have ground
+        # truth here - are reused to undo that for the predictions
+        ######################################################
+        output_vol, output_surf = datapipe.unscale_model_outputs(
+            output_vol, output_surf
+        )
+
+        ######################################################
+        # Peel off pressure, velocity, nut, shear, etc.
+        # Also compute drag, lift forces.
+        ######################################################
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+
+        total_points_processed += batch_size
+
+        current_loop_time = time.perf_counter()
+
+        logging_string = f"Device {device} processed {total_points_processed} points of {total_points}\n"
+        if gpu_handle is not None:
+            gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+            gpu_memory_used = gpu_info.used / (1024**3)
+            logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
+
+        logging_string += f"  Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n"
+        logging_string += f"  iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n"
+        logging_string += f"  Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second.\n"
+
+        if logger is not None:
+            logger.info(logging_string)
+        else:
+            print(logging_string)
+
+
 def inference_epoch(
     dataset: DrivaerMLDataset,
     sampler: DistributedSampler,
     datapipe: DoMINODataPipe,
     model: DoMINO,
     gpu_handle: int,
-    device: torch.device,
     logger: PythonLogger,
     batch_size: int = 24_000,
     total_points: int = 1_024_000,
-) -> float:
+):
     ######################################################
     # Inference can run in a distributed way by coordinating
     # the indices for each rank, which the sampler does
@@ -151,146 +304,16 @@ def inference_epoch(
         )
 
         procesing_time_start = time.perf_counter()
-
-        ######################################################
-        # The IO only reads in "stl_faces" and "stl_coordinates".
-        # "stl_areas" and "stl_centers" would be computed by
-        # pyvista on CPU - instead, we do it on the GPU
-        # right here.
-        ######################################################
-
-        # Center is a mean of the 3 vertices
-        triangle_vertices = sample_batched["stl_coordinates"][
-            sample_batched["stl_faces"].reshape((-1, 3))
-        ]
-        sample_batched["stl_centers"] = triangle_vertices.mean(dim=-1)
-        ######################################################
-        # Area we compute from the cross product of two sides:
-        ######################################################
-        d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0]
-        d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0]
-        inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
-        normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1)
-        sample_batched["stl_areas"] = 0.5 * normals_norm
-
-        ######################################################
-        # For computing the points, we take those stl objects,
-        # sample in chunks of `batch_size` until we've
-        # accumulated `total_points` predictions.
-        ######################################################
-
-        batch_output_dict = {}
-        N = 2
-        total_points_processed = 0
-        while total_points_processed < total_points:
-            inner_loop_start_time = time.perf_counter()
-
-            ######################################################
-            # This function will sample points on the STL surface
-            ######################################################
-            sampled_points, sampled_faces, sampled_areas, sampled_normals = (
-                sample_points_on_mesh(
-                    sample_batched["stl_coordinates"],
-                    sample_batched["stl_faces"],
-                    batch_size,
-                    mesh_normals=sample_batched["surface_normals"],
-                    mesh_areas=sample_batched["stl_areas"],
-                )
-            )
-
-            ######################################################
-            # Build up volume points too with uniform sampling
-            # TODO - this doesn't filter points that are
-            # internal to the mesh
-            ######################################################
-            c_min = datapipe.config.bounding_box_dims[1]
-            c_max = datapipe.config.bounding_box_dims[0]
-
-            sampled_volume_points = (c_max - c_min) * torch.rand(
-                batch_size, 3, device=device, dtype=torch.float32
-            ) + c_min
-
-            ######################################################
-            # Create the dictionary as the preprocessing expects:
-            ######################################################
-            inference_dict = {
-                "stl_coordinates": sample_batched["stl_coordinates"],
-                "stl_faces": sample_batched["stl_faces"],
-                "stl_centers": sample_batched["stl_centers"],
-                "stl_areas": sample_batched["stl_areas"],
-                "surface_mesh_centers": sampled_points,
-                "surface_normals": sampled_normals,
-                "surface_areas": sampled_areas,
-                "surface_faces": sampled_faces,
-                "volume_mesh_centers": sampled_volume_points,
-            }
-
-            ######################################################
-            # Pre-process the data with the datapipe:
-            ######################################################
-            preprocessed_data = datapipe.process_data(inference_dict, i_batch)
-
-            ######################################################
-            # Use the sign of the volume SDF to filter out points
-            # That are inside the STL mesh
-            ######################################################
-            # TODO
-            # TODO
-            # TODO
-            # TODO
-            # TODO
-            # TODO
-
-            ######################################################
-            # Add a batch dimension to the data_dict
-            # (normally this is added in __getitem__ of the datapipe)
-            ######################################################
-            preprocessed_data = {
-                k: v.unsqueeze(0) for k, v in preprocessed_data.items()
-            }
-
-            ######################################################
-            # Forward pass through the model:
-            ######################################################
-            with torch.no_grad():
-                output_vol, output_surf = model(preprocessed_data)
-
-            ######################################################
-            # unnormalize the outputs with the datapipe
-            # Whatever settings are configured for normalizing the
-            # output fields - even though we don't have ground
-            # truth here - are reused to undo that for the predictions
-            ######################################################
-            output_vol, output_surf = datapipe.unscale_model_outputs(
-                output_vol, output_surf
-            )
-
-            ######################################################
-            # Peel off pressure, velocity, nut, shear, etc.
-            # Also compute drag, lift forces.
-            ######################################################
-            # TODO
-            # TODO
-            # TODO
-            # TODO
-            # TODO
-            # TODO
-            # TODO
-
-            total_points_processed += batch_size
-
-            current_loop_time = time.perf_counter()
-
-            gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle)
-            gpu_memory_used = gpu_info.used / (1024**3)
-
-            logging_string = f"Device {device}, batch {i_batch} processed {total_points_processed} points of {total_points}\n"
-            logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
-            logging_string += f"  Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n"
-            logging_string += f"  iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n"
-            logging_string += f"  Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second (includes IO)\n"
-
-            logger.info(logging_string)
+        inference_on_single_stl(
+            sample_batched["stl_coordinates"],
+            sample_batched["stl_faces"],
+            model,
+            datapipe,
+            batch_size,
+            total_points,
+            gpu_handle,
+            logger,
+        )
 
         procesing_time_end = time.perf_counter()
         logger.info(
@@ -518,7 +541,6 @@ def main(cfg: DictConfig) -> None:
             model=model,
             logger=logger,
             gpu_handle=gpu_handle,
-            device=dist.device,
             batch_size=sample_points,
             total_points=cfg.eval.num_points,
         )
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index f5ed3693ac..01ed70f6d8 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -727,14 +727,11 @@ def process_volume(
         return return_dict
 
     @torch.no_grad()
-    def process_data(self, data_dict, idx: int):
+    def process_data(self, data_dict):
         for key in self.keys_to_read_if_available.keys():
             if key not in data_dict:
                 data_dict[key] = self.keys_to_read_if_available[key]
 
-        if self.config.deterministic:
-            torch.manual_seed(idx)
-
         # Start building the preprocessed return dict:
         return_dict = {
             "global_params_values": data_dict["global_params_values"],

From 6393b5616dd8dc5d220d3f34798e56ba009fcd3d Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Sep 2025 17:05:10 +0000
Subject: [PATCH 25/98] Ensure stl mesh itself gets processed too

---
 .../domino/src/inference_on_stl2.py           | 199 +++++++++++++-----
 1 file changed, 144 insertions(+), 55 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
index 676f146b89..91374b63e2 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
@@ -107,6 +107,25 @@ def inference_on_single_stl(
     gpu_handle: int | None = None,
     logger: PythonLogger | None = None,
 ):
+    """
+    Perform model inference on a single STL mesh.
+
+    This function will take the input mesh + faces and
+    then sample the surface and volume to produce the model outputs
+    at `total_points` locations in batches of `batch_size`.
+
+
+
+    Args:
+        stl_coordinates: The coordinates of the STL mesh.
+        stl_faces: The faces of the STL mesh.
+        model: The model to use for inference.
+        datapipe: The datapipe to use for preprocessing.
+        batch_size: The batch size to use for inference.
+        total_points: The total number of points to process.
+        gpu_handle: The GPU handle to use for inference.
+        logger: The logger to use for logging.
+    """
     device = stl_coordinates.device
     batch_start_time = time.perf_counter()
     ######################################################
@@ -124,9 +143,9 @@ def inference_on_single_stl(
     ######################################################
     d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0]
     d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0]
-    inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
-    normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1)
-    inferred_mesh_normals = inferred_mesh_normals / normals_norm.unsqueeze(1)
+    stl_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
+    normals_norm = torch.linalg.norm(stl_mesh_normals, dim=1)
+    stl_mesh_normals = stl_mesh_normals / normals_norm.unsqueeze(1)
     stl_areas = 0.5 * normals_norm
 
     ######################################################
@@ -138,33 +157,13 @@ def inference_on_single_stl(
     batch_output_dict = {}
     N = 2
     total_points_processed = 0
-    while total_points_processed < total_points:
-        inner_loop_start_time = time.perf_counter()
-
-        ######################################################
-        # This function will sample points on the STL surface
-        ######################################################
-        sampled_points, sampled_faces, sampled_areas, sampled_normals = (
-            sample_points_on_mesh(
-                stl_coordinates,
-                stl_faces,
-                batch_size,
-                mesh_normals=inferred_mesh_normals,
-                mesh_areas=stl_areas,
-            )
-        )
 
-        ######################################################
-        # Build up volume points too with uniform sampling
-        # TODO - this doesn't filter points that are
-        # internal to the mesh
-        ######################################################
-        c_min = datapipe.config.bounding_box_dims[1]
-        c_max = datapipe.config.bounding_box_dims[0]
+    # Use these lists to build up the output tensors:
+    surface_results = []
+    volume_results = []
 
-        sampled_volume_points = (c_max - c_min) * torch.rand(
-            batch_size, 3, device=device, dtype=torch.float32
-        ) + c_min
+    while total_points_processed < total_points:
+        inner_loop_start_time = time.perf_counter()
 
         ######################################################
         # Create the dictionary as the preprocessing expects:
@@ -174,28 +173,60 @@ def inference_on_single_stl(
             "stl_faces": stl_faces,
             "stl_centers": stl_centers,
             "stl_areas": stl_areas,
-            "surface_mesh_centers": sampled_points,
-            "surface_normals": sampled_normals,
-            "surface_areas": sampled_areas,
-            "surface_faces": sampled_faces,
-            "volume_mesh_centers": sampled_volume_points,
         }
 
+        # If the surface data is part of the model, sample the surface:
+
+        if datapipe.model_type == "surface" or datapipe.model_type == "combined":
+            ######################################################
+            # This function will sample points on the STL surface
+            ######################################################
+            sampled_points, sampled_faces, sampled_areas, sampled_normals = (
+                sample_points_on_mesh(
+                    stl_coordinates,
+                    stl_faces,
+                    batch_size,
+                    mesh_normals=stl_mesh_normals,
+                    mesh_areas=stl_areas,
+                )
+            )
+
+            inference_dict["surface_mesh_centers"] = sampled_points
+            inference_dict["surface_normals"] = sampled_normals
+            inference_dict["surface_areas"] = sampled_areas
+            inference_dict["surface_faces"] = sampled_faces
+
+        # If the volume data is part of the model, sample the volume:
+        if datapipe.model_type == "volume" or datapipe.model_type == "combined":
+            ######################################################
+            # Build up volume points too with uniform sampling
+            # TODO - this doesn't filter points that are
+            # internal to the mesh
+            ######################################################
+            c_min = datapipe.config.bounding_box_dims[1]
+            c_max = datapipe.config.bounding_box_dims[0]
+
+            sampled_volume_points = (c_max - c_min) * torch.rand(
+                batch_size, 3, device=device, dtype=torch.float32
+            ) + c_min
+
+            inference_dict["volume_mesh_centers"] = (sampled_volume_points,)
+
         ######################################################
         # Pre-process the data with the datapipe:
         ######################################################
         preprocessed_data = datapipe.process_data(inference_dict)
 
-        ######################################################
-        # Use the sign of the volume SDF to filter out points
-        # That are inside the STL mesh
-        ######################################################
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
+        if datapipe.model_type == "volume" or datapipe.model_type == "combined":
+            ######################################################
+            # Use the sign of the volume SDF to filter out points
+            # That are inside the STL mesh
+            ######################################################
+            sdf_nodes = preprocessed_data["sdf_nodes"]
+            valid_volume_idx = sdf_nodes > 0
+            preprocessed_data["volume_mesh_centers"] = preprocessed_data[
+                "volume_mesh_centers"
+            ][valid_volume_idx]
 
         ######################################################
         # Add a batch dimension to the data_dict
@@ -219,17 +250,8 @@ def inference_on_single_stl(
             output_vol, output_surf
         )
 
-        ######################################################
-        # Peel off pressure, velocity, nut, shear, etc.
-        # Also compute drag, lift forces.
-        ######################################################
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
+        surface_results.append(output_surf)
+        volume_results.append(output_vol)
 
         total_points_processed += batch_size
 
@@ -250,6 +272,61 @@ def inference_on_single_stl(
         else:
             print(logging_string)
 
+    ######################################################
+    # Here at the end, get the values for the stl centers
+    # by updating the previous inference dict
+    # Only do this if the surface is part of the computation
+    # Comments are shorter here - it's a condensed version
+    # of the above logic.
+    ######################################################
+    if datapipe.model_type == "surface" or datapipe.model_type == "combined":
+        stl_inference_dict = {
+            "stl_coordinates": stl_coordinates,
+            "stl_faces": stl_faces,
+            "stl_centers": stl_centers,
+            "stl_areas": stl_areas,
+        }
+        inference_dict["surface_mesh_centers"] = stl_centers
+        inference_dict["surface_normals"] = stl_mesh_normals
+        inference_dict["surface_areas"] = stl_areas
+        inference_dict["surface_faces"] = stl_faces
+
+        # Just reuse the previous volume samples here if needed:
+        if datapipe.model_type == "combined":
+            inference_dict["volume_mesh_centers"] = sampled_volume_points
+
+        # Preprocess:
+        preprocessed_data = datapipe.process_data(inference_dict)
+
+        # Pull out the invalid volume points again, if needed:
+        if datapipe.model_type == "combined":
+            sdf_nodes = preprocessed_data["sdf_nodes"]
+            valid_volume_idx = sdf_nodes > 0
+            preprocessed_data["volume_mesh_centers"] = preprocessed_data[
+                "volume_mesh_centers"
+            ][valid_volume_idx]
+
+        # Run the model forward:
+        with torch.no_grad():
+            preprocessed_data = {
+                k: v.unsqueeze(0) for k, v in preprocessed_data.items()
+            }
+            _, output_surf = model(preprocessed_data)
+
+        # Unnormalize the outputs:
+        _, stl_center_results = datapipe.unscale_model_outputs(None, output_surf)
+
+    else:
+        stl_center_results = None
+
+    # Stack up the results into one big tensor for surface and volume:
+    if all([s is not None for s in surface_results]):
+        surface_results = torch.cat(surface_results, dim=1)
+    if all([v is not None for v in volume_results]):
+        volume_results = torch.cat(volume_results, dim=0)
+
+    return stl_center_results, surface_results, volume_results
+
 
 def inference_epoch(
     dataset: DrivaerMLDataset,
@@ -304,7 +381,7 @@ def inference_epoch(
         )
 
         procesing_time_start = time.perf_counter()
-        inference_on_single_stl(
+        stl_center_resulst, surface_results, volume_results = inference_on_single_stl(
             sample_batched["stl_coordinates"],
             sample_batched["stl_faces"],
             model,
@@ -315,6 +392,18 @@ def inference_epoch(
             logger,
         )
 
+        ######################################################
+        # Peel off pressure, velocity, nut, shear, etc.
+        # Also compute drag, lift forces.
+        ######################################################
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+
         procesing_time_end = time.perf_counter()
         logger.info(
             f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds"

From f7e9ea24739e7e293419bd081a28228c92dca2c3 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 10 Sep 2025 17:10:48 +0000
Subject: [PATCH 26/98] Update docstring for inference file.

---
 .../domino/src/inference_on_stl2.py           | 38 +++++++------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
index 91374b63e2..9ff5b62d0d 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
@@ -14,24 +14,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# TODO
-# TODO
-# TODO
-# Update this
-# TODO
-# TODO
-# TODO
 """
-This code defines a distributed pipeline for training the DoMINO model on
-CFD datasets. It includes the computation of scaling factors, instantiating
-the DoMINO model and datapipe, automatically loading the most recent checkpoint,
-training the model in parallel using DistributedDataParallel across multiple
-GPUs, calculating the loss and updating model parameters using mixed precision.
-This is a common recipe that enables training of combined models for surface and
-volume as well either of them separately. Validation is also conducted every epoch,
-where predictions are compared against ground truth values. The code logs training
-and validation metrics to TensorBoard. The train tab in config.yaml can be used to
-specify batch size, number of epochs and other training parameters.
+This code shows how to use a trained DoMINO model, with it's corresponding
+preprocessing pipeline, to infer values on and around an STL mesh file.
+
+This script uses the meshes from the DrivaerML dataset, however, the logic
+is largely the same.  As an overview:
+- Load the model
+- Set up the preprocessor
+- Loop over meshes
+- In each mesh, sample random points on the surface, volume, or both
+- Preprocess the points and run them through the model
+- Process the STL mesh centers, too
+- Collect the results and return
+- Save the results to file.
 """
 
 import time
@@ -346,11 +342,6 @@ def inference_epoch(
     # Convert the indices right to a list:
     epoch_indices = list(sampler)
 
-    # n_steps = total_points // batch_size
-    # if n_steps * batch_size < total_points:
-    #     n_steps += 1
-    #     last_batch_size = total_points - n_steps * batch_size
-
     ######################################################
     # Assuming here there are more than two target meshes
     # This will get the IO pipe running in the background
@@ -424,9 +415,6 @@ def inference_epoch(
             f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds"
         )
 
-        if i_batch > 5:
-            break
-
 
 @hydra.main(version_base="1.3", config_path="conf", config_name="config")
 def main(cfg: DictConfig) -> None:

From d784a801a110f26ff1c1ae679c44451d94ea7845 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 15 Sep 2025 19:37:49 +0000
Subject: [PATCH 27/98] Enable shard tensor for zarr datasets, both with or
 without tensorstore

---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 244 ++++-------
 .../datapipes/cae/drivaer_ml_dataset.py       | 403 +++++++++++++++---
 2 files changed, 432 insertions(+), 215 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 01ed70f6d8..472c09be24 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -29,7 +29,7 @@
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Literal, Optional, Protocol, Sequence, Union
+from typing import Iterable, Literal, Optional, Protocol, Sequence, Union
 
 import numpy as np
 import torch
@@ -299,41 +299,7 @@ def __init__(
                 dtype=torch.float32,
             )
 
-        # Always read these keys:
-        self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"]
-
-        self.keys_to_read_if_available = {
-            "global_params_values": torch.tensor(
-                [[30.0], [1.226]], device=self.preproc_device
-            ),
-            "global_params_reference": torch.tensor(
-                [[30.0], [1.226]], device=self.preproc_device
-            ),
-        }
-
-        self.volume_keys = ["volume_mesh_centers", "volume_fields"]
-        self.surface_keys = [
-            "surface_mesh_centers",
-            "surface_normals",
-            "surface_areas",
-            "surface_fields",
-        ]
-
-        if self.model_type == "volume" or self.model_type == "combined":
-            self.keys_to_read.extend(self.volume_keys)
-        if self.model_type == "surface" or self.model_type == "combined":
-            self.keys_to_read.extend(self.surface_keys)
-
-        if self.config.data_path is not None:
-            self.dataset = DrivaerMLDataset(
-                data_dir=self.config.data_path,
-                keys_to_read=self.keys_to_read,
-                output_device=self.preproc_device,
-                pin_memory=pin_memory,
-                consumer_stream=torch.cuda.default_stream(),
-            )
-        else:
-            self.dataset = None
+        self.dataset = None
 
         # This is thread storage for data preprocessing:
         self._preprocess_queue = {}
@@ -341,21 +307,6 @@ def __init__(
         self.preprocess_depth = 2
         self.preprocess_executor = ThreadPoolExecutor(max_workers=1)
 
-    def set_indices(self, indices: list[int]):
-        """
-        Set the indices for the dataset for this epoch.
-        """
-
-        # TODO - this needs to block while anything is in the preprocess queue.
-
-        self.indices = indices
-
-    def __len__(self):
-        if self.dataset is not None:
-            return len(self.dataset)
-        else:
-            return 0
-
     def compute_stl_scaling(
         self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None
     ):
@@ -728,10 +679,6 @@ def process_volume(
 
     @torch.no_grad()
     def process_data(self, data_dict):
-        for key in self.keys_to_read_if_available.keys():
-            if key not in data_dict:
-                data_dict[key] = self.keys_to_read_if_available[key]
-
         # Start building the preprocessed return dict:
         return_dict = {
             "global_params_values": data_dict["global_params_values"],
@@ -839,6 +786,15 @@ def unscale_model_outputs(
 
         return volume_fields, surface_fields
 
+    def set_dataset(self, dataset: Iterable) -> None:
+        self.dataset = dataset
+
+    def __len__(self):
+        if self.dataset is not None:
+            return len(self.dataset)
+        else:
+            return 0
+
     def __getitem__(self, idx):
         """
         Function for fetching and processing a single file's data.
@@ -850,17 +806,27 @@ def __getitem__(self, idx):
         if self.dataset is None:
             raise ValueError("Dataset is not present")
 
-        index = self.idx_to_index(idx)
+        # Get the data from the dataset.
+        # Under the hood, this may be fetching preloaded data.
+        data_dict = self.dataset[idx]
+
+        return self.__call__(data_dict)
+
+    def __call__(self, data_dict: dict) -> dict:
+        """
+        Process the incoming data dictionary.
+        - Processes the data
+        - moves it to GPU
+        - adds a batch dimension
 
-        # Get the preprocessed data:
-        data_dict = self.get_preprocessed(idx)
-        if data_dict is None:
-            # If no preprocessing was done for this index, process it now
+        Args:
+            data_dict: Dictionary containing the data to process as torch.Tensors.
 
-            # Get the data from the dataset.
-            # Under the hood, this may be fetching preloaded data.
-            data_dict = self.dataset[index]
-            data_dict = self.process_data(data_dict, idx)
+        Returns:
+            Dictionary containing the processed data as torch.Tensors.
+
+        """
+        data_dict = self.process_data(data_dict)
 
         # If the data is not on the target device, put it there:
         for key, value in data_dict.items():
@@ -872,101 +838,9 @@ def __getitem__(self, idx):
 
         return data_dict
 
-    def idx_to_index(self, idx):
-        if hasattr(self, "indices"):
-            return self.indices[idx]
-
-        return idx
-
-    def preprocess(self, idx: int) -> None:
-        """
-        Start preprocessing for the given index (1 step ahead).
-        This processes preloaded data or loads it if not available.
-        """
-        if self.dataset is None:
-            raise ValueError("Dataset is not present")
-
-        if idx in self._preprocess_queue:
-            # Skip items that are already being preprocessed
-            return
-
-        def _preprocess_worker():
-            index = self.idx_to_index(idx)
-            # Try to get preloaded data first
-            data_dict = self.dataset[index]
-            # Process the data
-            return self.process_data(data_dict, idx)
-
-        # Submit preprocessing task to thread pool
-        self._preprocess_queue[idx] = self.preprocess_executor.submit(
-            _preprocess_worker
-        )
-
-    def get_preprocessed(self, idx: int) -> dict | None:
-        """
-        Retrieve preprocessed data (blocking if not ready).
-        Returns None if no preprocessing is in progress for this index.
-        """
-        if idx not in self._preprocess_queue:
-            return None
-
-        result = self._preprocess_queue[idx].result()  # Block until ready
-        self._preprocess_queue.pop(idx)  # Clear after getting result
-
-        return result
-
-    def __next__(self):
-        # To iterate through the data efficiently, he have to implement the
-        # following, assuming a steady state
-
-        # - start the dataset loading at idx + 2
-        # - start the preprocessing pipe at idx + 1
-        #   - the preprocessing pipe has to implicitly wait for idx +1 in the dataset
-        # - wait for the preprocessing pipe at idx to finish
-        # return the data.
-
-        if self.dataset is None:
-            raise ValueError("Dataset is not present")
-
-        N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
-
-        if self.i >= N:
-            self.i = 0
-            raise StopIteration
-
-        current_idx = self.i
-
-        # Start loading two ahead:
-
-        if N > current_idx + 2:
-            self.dataset.preload(self.idx_to_index(current_idx + 1))
-            self.dataset.preload(self.idx_to_index(current_idx + 2))
-
-        # If no preprocessing was done for this index, process it now
-        data = self.__getitem__(current_idx)
-
-        self.i += 1
-        return data
-
     def __iter__(self):
-        # When starting the iterator method, start loading the data
-        # at idx = 0, idx = 1
-        # Start preprocessing at idx = 0, when the load completes
-
-        if self.dataset is None:
-            raise ValueError("Dataset is not present")
-
-        self.i = 0
-
-        N = len(self.indices) if hasattr(self, "indices") else len(self.dataset)
-
-        # Trigger the dataset to start loading index 0:
-        if N > 1:
-            self.dataset.preload(self.idx_to_index(self.i))
-        if N > 2:
-            self.dataset.preload(self.idx_to_index(self.i + 1))
-
-        return self
+        for i, batch in enumerate(self.dataset):
+            yield self.__call__(batch)
 
 
 def compute_scaling_factors(
@@ -1151,23 +1025,28 @@ def __getitem__(self, idx):
 def create_domino_dataset(
     cfg: DictConfig,
     phase: Literal["train", "val", "test"],
-    volume_variable_names: list[str],
-    surface_variable_names: list[str],
+    keys_to_read: list[str],
+    keys_to_read_if_available: dict[str, torch.Tensor],
     vol_factors: list[float],
     surf_factors: list[float],
     normalize_coordinates: bool = True,
     sample_in_bbox: bool = True,
     sampling: bool = True,
+    device_mesh: torch.distributed.DeviceMesh | None = None,
+    placements: dict[str, torch.distributed.tensor.Placement] | None = None,
 ):
     if phase == "train":
         input_path = cfg.data.input_dir
         model_type = cfg.model.model_type
+        dataloader_cfg = cfg.train.dataloader
     elif phase == "val":
         input_path = cfg.data.input_dir_val
         model_type = cfg.model.model_type
+        dataloader_cfg = cfg.val.dataloader
     elif phase == "test":
         input_path = cfg.eval.test_path
         model_type = "inference"
+        dataloader_cfg = None
     else:
         raise ValueError(f"Invalid phase {phase}")
 
@@ -1183,6 +1062,15 @@ def create_domino_dataset(
             surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
         )
     else:
+        # The dataset path works in two pieces:
+        # There is a core "dataset" which is loading data and moving to GPU
+        # And there is the preprocess step, here.
+
+        # Optionally, and for backwards compatibility, the preprocess
+        # object can accept a dataset which will enable it as an iterator.
+        # The iteration function will loop over the dataset, preprocess the
+        # output, and return it.
+
         overrides = {}
         if hasattr(cfg.data, "gpu_preprocessing"):
             overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing
@@ -1190,12 +1078,38 @@ def create_domino_dataset(
         if hasattr(cfg.data, "gpu_output"):
             overrides["gpu_output"] = cfg.data.gpu_output
 
-        return DoMINODataPipe(
+        dm = DistributedManager()
+
+        if cfg.data.gpu_preprocessing:
+            device = dm.device
+            consumer_stream = torch.cuda.default_stream()
+        else:
+            device = torch.device("cpu")
+            consumer_stream = None
+
+        if dataloader_cfg is not None:
+            preload_depth = dataloader_cfg.preload_depth
+            pin_memory = dataloader_cfg.pin_memory
+        else:
+            preload_depth = 2
+            pin_memory = False
+
+        dataset = DrivaerMLDataset(
+            data_dir=input_path,
+            keys_to_read=keys_to_read,
+            keys_to_read_if_available=keys_to_read_if_available,
+            output_device=device,
+            preload_depth=preload_depth,
+            pin_memory=pin_memory,
+            device_mesh=device_mesh,
+            placements=placements,
+            consumer_stream=consumer_stream,
+        )
+
+        datapipe = DoMINODataPipe(
             input_path,
             phase=phase,
             grid_resolution=cfg.model.interp_res,
-            volume_variables=volume_variable_names,
-            surface_variables=surface_variable_names,
             normalize_coordinates=normalize_coordinates,
             sampling=sampling,
             sample_in_bbox=sample_in_bbox,
@@ -1216,6 +1130,10 @@ def create_domino_dataset(
             **overrides,
         )
 
+        datapipe.set_dataset(dataset)
+
+        return datapipe
+
 
 if __name__ == "__main__":
     fm_data = DoMINODataPipe(
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index 17f486fb6c..0acec3b7a5 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -21,7 +21,9 @@
 
 import numpy as np
 import torch
+import torch.distributed as dist
 import zarr
+from torch.distributed.tensor import Replicate, Shard
 
 try:
     import tensorstore as ts
@@ -38,9 +40,7 @@
     PV_AVAILABLE = False
 
 from physicsnemo.distributed import ShardTensor, ShardTensorSpec
-
-# from physicsnemo.distributed.utils import compute_split_shapes
-
+from physicsnemo.distributed.utils import compute_split_shapes
 
 # Abstractions:
 # - want to read npy/npz/.zarr/.stl/.vtp files
@@ -67,11 +67,16 @@ class BackendReader(ABC):
     Abstract base class for backend readers.
     """
 
-    def __init__(self, keys_to_read: list[str] | None) -> None:
+    def __init__(
+        self,
+        keys_to_read: list[str] | None,
+        keys_to_read_if_available: dict[str, torch.Tensor] | None,
+    ) -> None:
         """
         Initialize the backend reader.
         """
         self.keys_to_read = keys_to_read
+        self.keys_to_read_if_available = keys_to_read_if_available
 
     @abstractmethod
     def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
@@ -82,21 +87,76 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
 
     @abstractmethod
     def read_file_sharded(
-        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
-    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh
+    ) -> tuple[dict[str, torch.Tensor], dict[str, dict]]:
         """
-        Read a file and return a dictionary of tensors.
+        Read a file and return a dictionary of tensors ready to convert to ShardTensors.
+
+        NOTE: this function does not actually convert torch tensors to ShardTensors.
+        It's possible that the conversion, in some cases, can be a collective function.
+        Due to the async nature of the loader, we don't rely on any ordering of
+        collectives and defer them to the last possible minute.
+
+        Additionally, these functions return CPU tensors and we don't actually
+        define shard tensors on cpu.
+
+        So, the dataset itself will convert a local tensor + shard info to shard tensor
+        after the cpu-> gpu movement.
         """
         pass
 
+    def fill_optional_keys(
+        self, data: dict[str, torch.Tensor]
+    ) -> dict[str, torch.Tensor]:
+        """
+        Fill missing keys with the keys from the keys_to_read_if_available dictionary.
+        """
+        for key in self.keys_to_read_if_available:
+            if key not in data.keys():
+                data[key] = self.keys_to_read_if_available[key]
+        return data
+
+    def _get_slice_boundaries(
+        self, array_shape: tuple[int], this_rank: int, n_splits: int, split_dim: int = 0
+    ) -> tuple[int, int, tuple | None]:
+        """
+        For an array, determine the slice boundaries for parallel reading.
+
+        Args:
+            array_shape: The total shape of the target array.
+            this_rank: The rank of the distributed process.
+            n_splits: The size of the distributed process.
+            split_dim: The dimension to split, default is 0.
+
+        Returns:
+            The slice boundaries for parallel reading.
+        """
+        # Determine what slice this rank should read
+
+        sections = compute_split_shapes(array_shape[split_dim], n_splits)
+
+        global_chunk_start = sum(sections[:this_rank])
+        global_chunk_stop = global_chunk_start + sections[this_rank]
+
+        chunk_sizes = tuple(
+            array_shape[:split_dim] + (section,) + array_shape[split_dim + 1 :]
+            for section in sections
+        )
+
+        return global_chunk_start, global_chunk_stop, chunk_sizes
+
 
 class NpyFileReader(BackendReader):
     """
     Reader for numpy files.
     """
 
-    def __init__(self, keys_to_read: list[str] | None) -> None:
-        super().__init__(keys_to_read)
+    def __init__(
+        self,
+        keys_to_read: list[str] | None,
+        keys_to_read_if_available: dict[str, torch.Tensor] | None,
+    ) -> None:
+        super().__init__(keys_to_read, keys_to_read_if_available)
 
     def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         """
@@ -111,11 +171,11 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
 
         data = {key: torch.from_numpy(data[key]) for key in self.keys_to_read}
 
-        return data
+        return self.fill_optional_keys(data)
 
     def read_file_sharded(
-        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
-    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh
+    ) -> dict[str, ShardTensor]:
         pass
 
 
@@ -124,8 +184,12 @@ class NpzFileReader(BackendReader):
     Reader for npz files.
     """
 
-    def __init__(self, keys_to_read: list[str] | None) -> None:
-        super().__init__(keys_to_read)
+    def __init__(
+        self,
+        keys_to_read: list[str] | None,
+        keys_to_read_if_available: dict[str, torch.Tensor] | None,
+    ) -> None:
+        super().__init__(keys_to_read, keys_to_read_if_available)
 
     def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         """
@@ -140,11 +204,11 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
 
         data = {key: torch.from_numpy(in_data[key][:]) for key in self.keys_to_read}
 
-        return data
+        return self.fill_optional_keys(data)
 
     def read_file_sharded(
-        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
-    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh
+    ) -> dict[str, ShardTensor]:
         pass
 
 
@@ -153,8 +217,12 @@ class ZarrFileReader(BackendReader):
     Reader for zarr files.
     """
 
-    def __init__(self, keys_to_read: list[str] | None) -> None:
-        super().__init__(keys_to_read)
+    def __init__(
+        self,
+        keys_to_read: list[str] | None,
+        keys_to_read_if_available: dict[str, torch.Tensor] | None,
+    ) -> None:
+        super().__init__(keys_to_read, keys_to_read_if_available)
 
     def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         """
@@ -170,15 +238,78 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         # This is a slower basic way to do this, to be improved:
         data = {key: torch.from_numpy(group[key][:]) for key in self.keys_to_read}
 
-        return data
+        return self.fill_optional_keys(data)
 
     def read_file_sharded(
-        self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
-    ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+        self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh
+    ) -> tuple[dict[str, torch.Tensor], dict[str, dict]]:
         """
         Read a file and return a dictionary of tensors.
         """
-        raise NotImplementedError("Not implemented yet.")
+
+        # We need the coordinates of this GPU:
+        this_rank = device_mesh.get_local_rank()
+        domain_size = dist.get_world_size(group=device_mesh.get_group())
+
+        group = zarr.open_group(filename, mode="r")
+
+        missing_keys = set(self.keys_to_read) - set(group.keys())
+
+        if len(missing_keys) > 0:
+            raise ValueError(f"Keys {missing_keys} not found in file {filename}")
+
+        data = {}
+        specs = {}
+        for key in self.keys_to_read:
+            # Open the array in zarr without reading it and get info:
+            zarr_array = group[key]
+            array_shape = zarr_array.shape
+            if array_shape == ():
+                # Read scalars from every rank and use replicate sharding
+                raw_data = torch.from_numpy(zarr_array[:])
+                placement = [
+                    Replicate(),
+                ]
+                chunk_sizes = None
+            else:
+                target_dim = 0
+                if array_shape[target_dim] < domain_size:
+                    # If the array is smaller than the number of ranks,
+                    # again read and use replicate sharding:
+                    raw_data = torch.from_numpy(zarr_array[:])
+                    placement = [
+                        Replicate(),
+                    ]
+                    chunk_sizes = None
+                else:
+                    # Read partially from the data and use Shard(target_dim) sharding
+                    chunk_start, chunk_stop, chunk_sizes = self._get_slice_boundaries(
+                        zarr_array.shape, this_rank, domain_size
+                    )
+                    raw_data = torch.from_numpy(zarr_array[chunk_start:chunk_stop])
+                    placement = [
+                        Shard(target_dim),
+                    ]
+
+                    # Turn chunk sizes into a dict over mesh dim 0:
+                    chunk_sizes = {0: chunk_sizes}
+
+            #
+            data[key] = raw_data
+            specs[key] = (placement, chunk_sizes)
+
+        # Patch in the optional keys:
+        data = self.fill_optional_keys(data)
+        for key in data.keys():
+            if key not in specs:
+                specs[key] = (
+                    [
+                        Replicate(),
+                    ],
+                    {},
+                )
+
+        return data, specs
 
 
 if PV_AVAILABLE:
@@ -188,8 +319,12 @@ class VTKFileReader(BackendReader):
         Reader for vtk files.
         """
 
-        def __init__(self, keys_to_read: list[str] | None) -> None:
-            super().__init__(keys_to_read)
+        def __init__(
+            self,
+            keys_to_read: list[str] | None,
+            keys_to_read_if_available: dict[str, torch.Tensor] | None,
+        ) -> None:
+            super().__init__(keys_to_read, keys_to_read_if_available)
 
             self.stl_file_keys = [
                 "stl_coordinates",
@@ -254,7 +389,7 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
             if any(key in self.vtu_file_keys for key in self.keys_to_read):
                 raise NotImplementedError("VTU files are not supported yet.")
 
-            return return_data
+            return self.fill_optional_keys(return_data)
 
         def read_file_sharded(
             self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
@@ -309,8 +444,12 @@ class TensorStoreZarrReader(BackendReader):
         Reader for tensorstore zarr files.
         """
 
-        def __init__(self, keys_to_read: list[str] | None) -> None:
-            super().__init__(keys_to_read)
+        def __init__(
+            self,
+            keys_to_read: list[str] | None,
+            keys_to_read_if_available: dict[str, torch.Tensor] | None,
+        ) -> None:
+            super().__init__(keys_to_read, keys_to_read_if_available)
 
             self.spec_template = {
                 "driver": "zarr2",
@@ -331,6 +470,8 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
             """
             Read a file and return a dictionary of tensors.
             """
+
+            # Trigger an async open of each data item:
             read_futures = {}
             for key in self.keys_to_read:
                 spec = self.spec_template.copy()
@@ -340,24 +481,111 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
                     spec, create=False, open=True, context=self.context
                 )
 
-            results = {
-                key: np.array(read_futures[key].result()) for key in self.keys_to_read
+            # Wait for all the opens to conclude:
+            read_futures = {
+                key: read_futures[key].result() for key in read_futures.keys()
+            }
+
+            # Trigger an async read of each data item:
+            # (Each item will be a numpy ndarray after this:)
+            read_futures = {
+                key: read_futures[key].read() for key in read_futures.keys()
             }
 
+            # Convert them to torch tensors:
+            # (make sure to block for the result)
             data = {
-                key: torch.as_tensor(results[key], dtype=torch.float32)
+                key: torch.as_tensor(read_futures[key].result(), dtype=torch.float32)
                 for key in self.keys_to_read
             }
 
-            return data
+            return self.fill_optional_keys(data)
 
         def read_file_sharded(
-            self, filename: pathlib.Path, parallel_rank: int, parallel_size: int
-        ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]:
+            self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh
+        ) -> tuple[dict[str, torch.Tensor], dict[str, dict]]:
             """
             Read a file and return a dictionary of tensors.
             """
-            pass
+
+            # We need the coordinates of this GPU:
+            this_rank = device_mesh.get_local_rank()
+            domain_size = dist.get_world_size(group=device_mesh.get_group())
+
+            # This pulls a list of store objects in tensorstore:
+            stores = {}
+            for key in self.keys_to_read:
+                spec = self.spec_template.copy()
+                spec["kvstore"]["path"] = str(filename) + "/" + str(key)
+
+                stores[key] = ts.open(
+                    spec, create=False, open=True, context=self.context
+                )
+
+            stores = {key: stores[key].result() for key in stores.keys()}
+
+            data = {}
+            specs = {}
+            for key in self.keys_to_read:
+                # Open the array in zarr without reading it and get info:
+                store = stores[key]
+                array_shape = store.shape
+                if array_shape == ():
+                    # Read scalars from every rank and use replicate sharding
+                    _slice = np.s_[:]
+                    # raw_data = torch.from_numpy(store[:])
+                    placement = [
+                        Replicate(),
+                    ]
+                    chunk_sizes = None
+                else:
+                    target_dim = 0
+                    if array_shape[target_dim] < domain_size:
+                        # If the array is smaller than the number of ranks,
+                        # again read and use replicate sharding:
+                        _slice = np.s_[:]
+                        # raw_data = torch.from_numpy(store[:])
+                        placement = [
+                            Replicate(),
+                        ]
+                        chunk_sizes = None
+                    else:
+                        # Read partially from the data and use Shard(target_dim) sharding
+                        chunk_start, chunk_stop, chunk_sizes = (
+                            self._get_slice_boundaries(
+                                store.shape, this_rank, domain_size
+                            )
+                        )
+                        _slice = np.s_[chunk_start:chunk_stop]
+                        # raw_data = torch.from_numpy(zarr_array[chunk_start:chunk_stop])
+                        placement = [
+                            Shard(target_dim),
+                        ]
+
+                        # Turn chunk sizes into a dict over mesh dim 0:
+                        chunk_sizes = {0: chunk_sizes}
+
+                # Trigger the reads as async:
+                data[key] = store[_slice].read()
+                specs[key] = (placement, chunk_sizes)
+
+            # Finally, await the full data read:
+            for key in self.keys_to_read:
+                data[key] = torch.as_tensor(data[key].result())
+
+            # Patch in the optional keys:
+            data = self.fill_optional_keys(data)
+            for key in data.keys():
+                if key not in specs:
+                    specs[key] = (
+                        [
+                            Replicate(),
+                        ],
+                        {},
+                    )
+
+            return data, specs
+
 else:
 
     class TensorStoreZarrReader(BackendReader):
@@ -365,7 +593,11 @@ class TensorStoreZarrReader(BackendReader):
         Null reader for tensorstore zarr files.
         """
 
-        def __init__(self, keys_to_read: list[str] | None) -> None:
+        def __init__(
+            self,
+            keys_to_read: list[str] | None,
+            keys_to_read_if_available: dict[str, torch.Tensor] | None,
+        ) -> None:
             # Raise an exception on construction if we get here:
             raise NotImplementedError(
                 "TensorStoreZarrReader is not available without tensorstore.  `pip install tensorstore`."
@@ -417,6 +649,7 @@ def __init__(
         self,
         data_dir: str | pathlib.Path,
         keys_to_read: list[str] | None,
+        keys_to_read_if_available: dict[str, torch.Tensor] | None,
         output_device: torch.device,
         preload_depth: int = 2,
         pin_memory: bool = False,
@@ -436,6 +669,8 @@ def __init__(
             raise NotADirectoryError(f"Data directory {data_dir} is not a directory")
 
         self._keys_to_read = keys_to_read
+        self._keys_to_read_if_available = keys_to_read_if_available
+
         self.file_reader, self._filenames = self._infer_file_type_and_filenames(
             data_dir
         )
@@ -449,25 +684,46 @@ def __init__(
 
         self.output_device = output_device
         if output_device.type == "cuda":
-            # self._data_loader_stream = torch.cuda.default_stream()
             self._data_loader_stream = torch.cuda.Stream()
         else:
             self._data_loader_stream = None
 
         self.device_mesh = device_mesh
         self.placements = placements
+        # This tracks global tensor info
+        # so we can convert to ShardTensor at the right time.
+        self.shard_spec = {}
+
+        if self.device_mesh is not None:
+            if self.device_mesh.ndim != 1:
+                raise ValueError("Device mesh must be one dimensional")
 
         # This is thread storage for data preloading:
         self._preload_queue = {}
         self._transfer_events = {}
         self.preload_depth = preload_depth
-        self.preload_executor = ThreadPoolExecutor(max_workers=preload_depth)
+        self.preload_executor = ThreadPoolExecutor(max_workers=max(1, preload_depth))
 
         if consumer_stream is None and self.output_device.type == "cuda":
             consumer_stream = torch.cuda.current_stream()
 
         self.consumer_stream = consumer_stream
 
+    def set_indices(self, indices: list[int]):
+        """
+        Set the indices for the dataset for this epoch.
+        """
+
+        # TODO - this needs to block while anything is in the preprocess queue.
+
+        self.indices = indices
+
+    def idx_to_index(self, idx):
+        if hasattr(self, "indices"):
+            return self.indices[idx]
+
+        return idx
+
     def _infer_file_type_and_filenames(
         self, data_dir: pathlib.Path
     ) -> tuple[str, list[str]]:
@@ -485,19 +741,29 @@ def _infer_file_type_and_filenames(
         # But others benefit from having a state, so we use classes:
 
         if all(file.suffix == ".npy" for file in files):
-            file_reader = NpyFileReader(self._keys_to_read)
+            file_reader = NpyFileReader(
+                self._keys_to_read, self._keys_to_read_if_available
+            )
             return file_reader, files
         elif all(file.suffix == ".npz" for file in files):
-            file_reader = NpzFileReader(self._keys_to_read)
+            file_reader = NpzFileReader(
+                self._keys_to_read, self._keys_to_read_if_available
+            )
             return file_reader, files
         elif all(file.suffix == ".zarr" and file.is_dir() for file in files):
             if TENSORSTORE_AVAILABLE:
-                file_reader = TensorStoreZarrReader(self._keys_to_read)
+                file_reader = TensorStoreZarrReader(
+                    self._keys_to_read, self._keys_to_read_if_available
+                )
             else:
-                file_reader = ZarrFileReader(self._keys_to_read)
+                file_reader = ZarrFileReader(
+                    self._keys_to_read, self._keys_to_read_if_available
+                )
             return file_reader, files
         elif all(is_vtk_directory(file) for file in files):
-            file_reader = VTKFileReader(self._keys_to_read)
+            file_reader = VTKFileReader(
+                self._keys_to_read, self._keys_to_read_if_available
+            )
             return file_reader, files
             # Each "file" here is a directory of .vtp, stl, etc.
         else:
@@ -541,7 +807,9 @@ def _move_to_gpu(
         return result
 
     def _convert_to_shard_tensors(
-        self, tensors: dict[str, torch.Tensor]
+        self,
+        tensors: dict[str, torch.Tensor],
+        filename: str,
     ) -> dict[str, ShardTensor]:
         """Convert tensors to ShardTensor objects for distributed training.
 
@@ -555,7 +823,19 @@ def _convert_to_shard_tensors(
         if self.device_mesh is None:
             return tensors
 
-        raise NotImplementedError("Converting to ShardTensor here not implemented yet.")
+        spec_dict = self.shard_spec.pop(filename)
+        result = {}
+        for key in tensors.keys():
+            placement, chunk_sizes = spec_dict[key]
+
+            result[key] = ShardTensor.from_local(
+                local_tensor=tensors[key],
+                device_mesh=self.device_mesh,
+                placements=placement,
+                sharding_shapes=chunk_sizes,
+            )
+
+        return result
 
         # result = {}
 
@@ -628,18 +908,30 @@ def get_preloaded(self, idx: int) -> dict[str, torch.Tensor] | None:
         return result
 
     def __iter__(self):
+        # When starting the iterator method, start loading the data
+        # at idx = 0, idx = 1
+        # Start preprocessing at idx = 0, when the load completes
+
         self.i = 0
+
+        N = len(self.indices) if hasattr(self, "indices") else len(self)
+        for i in range(self.preload_depth):
+            # Trigger the dataset to start loading index 0:
+            if N > i + 1:
+                self.preload(self.idx_to_index(self.i + i))
+
         return self
 
     def __next__(self):
-        if self.i >= len(self._filenames):
+        N = len(self.indices) if hasattr(self, "indices") else len(self._filenames)
+
+        if self.i >= N:
             self.i = 0
             raise StopIteration
 
-        if self.preload_depth > 0 and self.i + 1 < len(self._filenames):
-            self.preload(self.i + 1)
-        if self.preload_depth > 1 and self.i + 2 < len(self._filenames):
-            self.preload(self.i + 2)
+        for i in range(self.preload_depth):
+            if N > i + 1:
+                self.preload(self.i + i)
 
         data = self.__getitem__(self.i)
 
@@ -654,7 +946,14 @@ def _read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         """
         Read a file and return a dictionary of tensors.
         """
-        return self.file_reader.read_file(filename)
+        if self.device_mesh is not None:
+            tensor_dict, spec_dict = self.file_reader.read_file_sharded(
+                filename, self.device_mesh
+            )
+            self.shard_spec[filename] = spec_dict
+            return tensor_dict
+        else:
+            return self.file_reader.read_file(filename)
 
     def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]:
         """
@@ -693,7 +992,7 @@ def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]:
 
         # Convert to ShardTensors if using domain parallelism
         if self.device_mesh is not None:
-            data = self._convert_to_shard_tensors(data)
+            data = self._convert_to_shard_tensors(data, self._filenames[idx])
 
         return data
 

From 7c27a8eb8c99a1f6731ad0f4f401feb0d73d7a6c Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 15 Sep 2025 20:26:31 +0000
Subject: [PATCH 28/98] Updating and further documenting scripts

---
 .../domino/src/benchmark_dataloader.py        | 139 +++++--------
 .../external_aerodynamics/domino/src/train.py | 191 +++++++++++++-----
 .../domino/src/train_sharded.py               |   2 +-
 .../external_aerodynamics/domino/src/utils.py | 122 +++++++++++
 4 files changed, 307 insertions(+), 147 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index b1f5184fc6..80c4b9e3cf 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -35,11 +35,21 @@
 
 from typing import Literal, Any
 
-import apex
-import numpy as np
+
 import hydra
 from hydra.utils import to_absolute_path
 from omegaconf import DictConfig, OmegaConf
+
+DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", "False")
+if not DISABLE_RMM:
+    import rmm
+    from rmm.allocators.torch import rmm_torch_allocator
+    import torch
+
+    rmm.reinitialize(pool_allocator=True)
+    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+
+
 import torch.distributed as dist
 from torch.cuda.amp import GradScaler, autocast
 from torch.nn.parallel import DistributedDataParallel
@@ -66,6 +76,8 @@
 from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
 import time
 
+from utils import ScalingFactors, get_keys_to_read, coordinate_distributed_environment
+
 # Initialize NVML
 nvmlInit()
 
@@ -73,10 +85,8 @@
 from physicsnemo.utils.profiling import profile, Profiler
 
 
-@profile
-def train_epoch(
+def benchmark_io_epoch(
     dataloader,
-    sampler,
     logger,
     gpu_handle,
     epoch_index,
@@ -84,8 +94,6 @@ def train_epoch(
 ):
     dist = DistributedManager()
 
-    indices = list(iter(sampler))
-    print(f"indices: {indices}")
     # If you tell the dataloader the indices in advance, it will preload
     # and pre-preprocess data
     # dataloader.set_indices(indices)
@@ -93,11 +101,8 @@ def train_epoch(
     gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
     start_time = time.perf_counter()
     for i_batch, sample_batched in enumerate(dataloader):
-        # sampled_batched = dict_to_device(sample_batched, device)
-        # if i_batch == 7:
-        # break
-        # for key in sampled_batched.keys():
-        #     print(f"{key}: {sampled_batched[key].shape}")
+        # for key in sample_batched.keys():
+        #     print(f"{key}: {sample_batched[key].shape}")
 
         # Gather data and report
         elapsed_time = time.perf_counter() - start_time
@@ -116,80 +121,6 @@ def train_epoch(
     return
 
 
-def get_or_compute_scaling_factors(
-    cfg: DictConfig,
-) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-    """Get or compute scaling factors for volume and surface fields normalization.
-
-    This function either loads pre-computed scaling factors from disk or computes them
-    if they don't exist. The scaling factors are used for normalizing volume and surface
-    fields data based on the specified normalization method in the config.
-
-    Args:
-        cfg (DictConfig): Configuration object containing:
-            - project.name: Project name for saving/loading scaling factors
-            - model.normalization: Type of normalization ("min_max_scaling" or "mean_std_scaling")
-            - data.input_dir: Input directory path
-            - data_processor.use_cache: Whether to use cached data
-
-    Returns:
-        tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: A tuple containing:
-            - vol_factors: Scaling factors for volume fields (max/min or mean/std)
-            - surf_factors: Scaling factors for surface fields (max/min or mean/std)
-            Each factor is a numpy array containing the respective scaling values.
-
-    Raises:
-        ValueError: If an invalid normalization type is specified in the config.
-    """
-    # Compute or load the scaling factors:
-    vol_save_path = os.path.join(
-        "outputs", cfg.project.name, "volume_scaling_factors.npy"
-    )
-    surf_save_path = os.path.join(
-        "outputs", cfg.project.name, "surface_scaling_factors.npy"
-    )
-
-    if not os.path.exists(vol_save_path) or not os.path.exists(surf_save_path):
-        # Save the scaling factors if needed:
-        mean, std, min_val, max_val = compute_scaling_factors(
-            cfg=cfg,
-            input_path=cfg.data.input_dir,
-            use_cache=cfg.data_processor.use_cache,
-        )
-
-        v_mean = mean["volume_fields"].cpu().numpy()
-        v_std = std["volume_fields"].cpu().numpy()
-        v_min = min_val["volume_fields"].cpu().numpy()
-        v_max = max_val["volume_fields"].cpu().numpy()
-
-        s_mean = mean["surface_fields"].cpu().numpy()
-        s_std = std["surface_fields"].cpu().numpy()
-        s_min = min_val["surface_fields"].cpu().numpy()
-        s_max = max_val["surface_fields"].cpu().numpy()
-
-        np.save(vol_save_path, [v_mean, v_std, v_min, v_max])
-        np.save(surf_save_path, [s_mean, s_std, s_min, s_max])
-    else:
-        v_mean, v_std, v_min, v_max = np.load(vol_save_path)
-        s_mean, s_std, s_min, s_max = np.load(surf_save_path)
-
-    if cfg.model.normalization == "min_max_scaling":
-        vol_factors = [v_max, v_min]
-    elif cfg.model.normalization == "mean_std_scaling":
-        vol_factors = [v_mean, v_std]
-    else:
-        raise ValueError(f"Invalid normalization type: {cfg.model.normalization}")
-
-    if cfg.model.normalization == "min_max_scaling":
-        surf_factors = [s_max, s_min]
-    elif cfg.model.normalization == "mean_std_scaling":
-        surf_factors = [s_mean, s_std]
-    else:
-        raise ValueError(f"Invalid normalization type: {cfg.model.normalization}")
-
-    return vol_factors, surf_factors
-
-
 @hydra.main(version_base="1.3", config_path="conf", config_name="config")
 def main(cfg: DictConfig) -> None:
     # initialize distributed manager
@@ -208,15 +139,38 @@ def main(cfg: DictConfig) -> None:
 
     logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
 
-    vol_factors, surf_factors = get_or_compute_scaling_factors(cfg)
+    ################################
+    # Get scaling factors
+    ################################
+    pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
+
+    try:
+        scaling_factors = ScalingFactors.load(pickle_path)
+        logger.info(f"Scaling factors loaded from: {pickle_path}")
+    except FileNotFoundError:
+        raise FileNotFoundError(
+            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
+        )
+
+    vol_factors = scaling_factors.mean["volume_fields"]
+    surf_factors = scaling_factors.mean["surface_fields"]
+    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
+
+    keys_to_read, keys_to_read_if_available = get_keys_to_read(
+        cfg, model_type, get_ground_truth=True
+    )
+
+    domain_mesh, data_mesh, placements = coordinate_distributed_environment(cfg)
 
     train_dataset = create_domino_dataset(
         cfg,
         phase="train",
-        volume_variable_names="volume_fields",
-        surface_variable_names="surface_fields",
+        keys_to_read=keys_to_read,
+        keys_to_read_if_available=keys_to_read_if_available,
         vol_factors=vol_factors,
         surf_factors=surf_factors,
+        device_mesh=domain_mesh,
+        placements=placements,
     )
     train_sampler = DistributedSampler(
         train_dataset, num_replicas=dist.world_size, rank=dist.rank
@@ -232,11 +186,14 @@ def main(cfg: DictConfig) -> None:
         start_time = time.perf_counter()
         logger.info(f"Device {dist.device}, epoch {epoch}:")
 
+        train_sampler.set_epoch(epoch)
+        print(f"indices: {list(train_sampler)}")
+        train_dataset.dataset.set_indices(list(train_sampler))
+
         epoch_start_time = time.perf_counter()
         with Profiler():
-            train_epoch(
+            benchmark_io_epoch(
                 dataloader=train_dataset,
-                sampler=train_sampler,
                 logger=logger,
                 gpu_handle=gpu_handle,
                 epoch_index=epoch,
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 5a155fc198..1e0ad5d80c 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -70,7 +70,7 @@
 from physicsnemo.models.domino.model import DoMINO
 from physicsnemo.utils.domino.utils import *
 
-from utils import ScalingFactors
+from utils import ScalingFactors, get_keys_to_read, coordinate_distributed_environment
 
 # This is included for GPU memory tracking:
 from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
@@ -84,9 +84,6 @@
 from physicsnemo.utils.profiling import profile, Profiler
 
 
-# Profiler().enable("torch")
-# Profiler().initialize()
-
 from loss import compute_loss_dict
 from utils import get_num_vars
 
@@ -255,30 +252,34 @@ def train_epoch(
 
 @hydra.main(version_base="1.3", config_path="conf", config_name="config")
 def main(cfg: DictConfig) -> None:
-    ################################
+    ######################################################
     # initialize distributed manager
-    ################################
+    ######################################################
     DistributedManager.initialize()
     dist = DistributedManager()
 
+    # DoMINO supports domain parallel training.  This function helps coordinate
+    # how to set that up, if needed.
+    domain_mesh, data_mesh, placements = coordinate_distributed_environment(cfg)
+
     ################################
     # Initialize NVML
     ################################
     nvmlInit()
     gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index)
 
-    ################################
+    ######################################################
     # Initialize logger
-    ################################
+    ######################################################
 
     logger = PythonLogger("Train")
     logger = RankZeroLoggingWrapper(logger, dist)
 
     logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
 
-    ################################
-    # Get scaling factors
-    ################################
+    ######################################################
+    # Get scaling factors - precompute them if this fails!
+    ######################################################
     pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
 
     try:
@@ -289,18 +290,14 @@ def main(cfg: DictConfig) -> None:
             f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
         )
 
-    model_type = cfg.model.model_type
-
-    # Get physics imports conditionally
-    add_physics_loss = getattr(cfg.train, "add_physics_loss", False)
-
-    if add_physics_loss:
-        from physicsnemo.sym.eq.pde import PDE
-        from physicsnemo.sym.eq.ls.grads import FirstDeriv
-        from physicsnemo.sym.eq.pdes.navier_stokes import IncompressibleNavierStokes
-    else:
-        PDE = FirstDeriv = IncompressibleNavierStokes = None
+    vol_factors = scaling_factors.mean["volume_fields"]
+    surf_factors = scaling_factors.mean["surface_fields"]
+    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
 
+    ######################################################
+    # Configure the model
+    ######################################################
+    model_type = cfg.model.model_type
     num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type)
 
     if model_type == "combined" or model_type == "surface":
@@ -313,10 +310,28 @@ def main(cfg: DictConfig) -> None:
     else:
         volume_variable_names = []
 
-    vol_factors = scaling_factors.mean["volume_fields"]
-    surf_factors = scaling_factors.mean["surface_fields"]
-    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
+    ######################################################
+    # Configure physics loss
+    # Unless enabled, these are null-ops
+    ######################################################
+    add_physics_loss = getattr(cfg.train, "add_physics_loss", False)
+
+    if add_physics_loss:
+        from physicsnemo.sym.eq.pde import PDE
+        from physicsnemo.sym.eq.ls.grads import FirstDeriv
+        from physicsnemo.sym.eq.pdes.navier_stokes import IncompressibleNavierStokes
+    else:
+        PDE = FirstDeriv = IncompressibleNavierStokes = None
 
+    # Initialize physics components conditionally
+    first_deriv = None
+    eqn = None
+    if add_physics_loss:
+        first_deriv = FirstDeriv(dim=3, direct_input=True)
+        eqn = IncompressibleNavierStokes(rho=1.226, nu="nu", dim=3, time=False)
+        eqn = eqn.make_nodes(return_as_dict=True)
+
+    # The bounding box is used in calculating the physics loss:
     bounding_box = None
     if add_physics_loss:
         bounding_box = cfg.data.bounding_box
@@ -328,37 +343,95 @@ def main(cfg: DictConfig) -> None:
             .to(dist.device)
         )
 
-    train_dataset = create_domino_dataset(
+    ######################################################
+    # Configure the dataset
+    ######################################################
+
+    # This helper function is to determine which keys to read from the data
+    # (and which to use default values for, if they aren't present - like
+    # air_density, for example)
+    keys_to_read, keys_to_read_if_available = get_keys_to_read(
+        cfg, model_type, get_ground_truth=True
+    )
+
+    # The dataset actually works in two pieces
+    # The core dataset just reads data from disk, and puts it on the GPU if needed.
+    # The data processesing pipeline will preprocess that data and prepare it for the model.
+    # Obviously, you need both, so this function will return the datapipeline in
+    # a way that can be iterated over.
+    #
+    # To properly shuffle the data, we use a distributed sampler too.
+    # It's configured properly for optional domain parallelism, and you have
+    # to make sure to call set_epoch below.
+
+    train_dataloader = create_domino_dataset(
         cfg,
         phase="train",
-        volume_variable_names=volume_variable_names,
-        surface_variable_names=surface_variable_names,
+        keys_to_read=keys_to_read,
+        keys_to_read_if_available=keys_to_read_if_available,
         vol_factors=vol_factors,
         surf_factors=surf_factors,
+        device_mesh=domain_mesh,
+        placements=placements,
+    )
+    train_sampler = DistributedSampler(
+        train_dataloader,
+        num_replicas=data_mesh.size(),
+        rank=data_mesh.get_local_rank(),
+        **cfg.train.sampler,
     )
-    val_dataset = create_domino_dataset(
+
+    val_dataloader = create_domino_dataset(
         cfg,
         phase="val",
-        volume_variable_names=volume_variable_names,
-        surface_variable_names=surface_variable_names,
+        keys_to_read=keys_to_read,
+        keys_to_read_if_available=keys_to_read_if_available,
         vol_factors=vol_factors,
         surf_factors=surf_factors,
+        device_mesh=domain_mesh,
+        placements=placements,
     )
-
-    train_sampler = DistributedSampler(
-        train_dataset,
-        num_replicas=dist.world_size,
-        rank=dist.rank,
-        **cfg.train.sampler,
-    )
-
     val_sampler = DistributedSampler(
-        val_dataset,
-        num_replicas=dist.world_size,
-        rank=dist.rank,
+        val_dataloader,
+        num_replicas=data_mesh.size(),
+        rank=data_mesh.get_local_rank(),
         **cfg.val.sampler,
     )
 
+    # train_dataloader = create_domino_dataset(
+    #     cfg,
+    #     phase="train",
+    #     volume_variable_names=volume_variable_names,
+    #     surface_variable_names=surface_variable_names,
+    #     vol_factors=vol_factors,
+    #     surf_factors=surf_factors,
+    # )
+    # val_dataloader = create_domino_dataset(
+    #     cfg,
+    #     phase="val",
+    #     volume_variable_names=volume_variable_names,
+    #     surface_variable_names=surface_variable_names,
+    #     vol_factors=vol_factors,
+    #     surf_factors=surf_factors,
+    # )
+
+    # train_sampler = DistributedSampler(
+    #     train_dataloader,
+    #     num_replicas=dist.world_size,
+    #     rank=dist.rank,
+    #     **cfg.train.sampler,
+    # )
+
+    # val_sampler = DistributedSampler(
+    #     val_dataloader,
+    #     num_replicas=dist.world_size,
+    #     rank=dist.rank,
+    #     **cfg.val.sampler,
+    # )
+
+    ######################################################
+    # Configure the model
+    ######################################################
     model = DoMINO(
         input_features=3,
         output_features_vol=num_vol_vars,
@@ -382,23 +455,23 @@ def main(cfg: DictConfig) -> None:
             static_graph=True,
         )
 
-    # optimizer = apex.optimizers.FusedAdam(model.parameters(), lr=0.001)
+    ######################################################
+    # Initialize optimzer and gradient scaler
+    ######################################################
+
     optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
     scheduler = torch.optim.lr_scheduler.MultiStepLR(
         optimizer, milestones=[50, 100, 200, 250, 300, 350, 400, 450], gamma=0.5
     )
 
-    # Initialize physics components conditionally
-    first_deriv = None
-    eqn = None
-    if add_physics_loss:
-        first_deriv = FirstDeriv(dim=3, direct_input=True)
-        eqn = IncompressibleNavierStokes(rho=1.226, nu="nu", dim=3, time=False)
-        eqn = eqn.make_nodes(return_as_dict=True)
-
     # Initialize the scaler for mixed precision
     scaler = GradScaler()
 
+    ######################################################
+    # Initialize output tools
+    ######################################################
+
+    # Tensorboard Writer to track training.
     writer = SummaryWriter(os.path.join(cfg.output, "tensorboard"))
 
     epoch_number = 0
@@ -414,6 +487,10 @@ def main(cfg: DictConfig) -> None:
     if dist.world_size > 1:
         torch.distributed.barrier()
 
+    ######################################################
+    # Load checkpoint if available
+    ######################################################
+
     init_epoch = load_checkpoint(
         to_absolute_path(cfg.resume_dir),
         models=model,
@@ -439,6 +516,10 @@ def main(cfg: DictConfig) -> None:
 
     initial_integral_factor_orig = cfg.model.integral_loss_scaling_factor
 
+    ######################################################
+    # Begin Training loop over epochs
+    ######################################################
+
     for epoch in range(init_epoch, cfg.train.epochs):
         start_time = time.perf_counter()
         logger.info(f"Device {dist.device}, epoch {epoch_number}:")
@@ -451,8 +532,8 @@ def main(cfg: DictConfig) -> None:
         # This controls what indices to use for each epoch.
         train_sampler.set_epoch(epoch)
         val_sampler.set_epoch(epoch)
-        train_dataset.set_indices(list(train_sampler))
-        val_dataset.set_indices(list(val_sampler))
+        train_dataloader.dataset.set_indices(list(train_sampler))
+        val_dataloader.dataset.set_indices(list(val_sampler))
 
         initial_integral_factor = initial_integral_factor_orig
 
@@ -464,7 +545,7 @@ def main(cfg: DictConfig) -> None:
         model.train(True)
         epoch_start_time = time.perf_counter()
         avg_loss = train_epoch(
-            dataloader=train_dataset,
+            dataloader=train_dataloader,
             model=model,
             optimizer=optimizer,
             scaler=scaler,
@@ -491,7 +572,7 @@ def main(cfg: DictConfig) -> None:
 
         model.eval()
         avg_vloss = validation_step(
-            dataloader=val_dataset,
+            dataloader=val_dataloader,
             model=model,
             device=dist.device,
             logger=logger,
diff --git a/examples/cfd/external_aerodynamics/domino/src/train_sharded.py b/examples/cfd/external_aerodynamics/domino/src/train_sharded.py
index f321f50b12..3b1c818cc2 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train_sharded.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train_sharded.py
@@ -79,7 +79,7 @@
 from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-from physicsnemo.datapipes.cae.domino_datapipe import (
+from physicsnemo.datapipes.cae.domino_datapipe2 import (
     compute_scaling_factors,
     create_domino_dataset,
 )
diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py
index 6befff00bb..6d05c90bfc 100644
--- a/examples/cfd/external_aerodynamics/domino/src/utils.py
+++ b/examples/cfd/external_aerodynamics/domino/src/utils.py
@@ -21,6 +21,13 @@
 import pickle
 from pathlib import Path
 from typing import Literal
+from omegaconf import DictConfig
+from physicsnemo.distributed import DistributedManager
+
+from torch.distributed.tensor.placement_types import (
+    Shard,
+    Replicate,
+)
 
 
 def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"]):
@@ -82,6 +89,121 @@ def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"]
     return num_vol_vars, num_surf_vars, num_global_features
 
 
+def get_keys_to_read(
+    cfg: dict,
+    model_type: Literal["volume", "surface", "combined"],
+    get_ground_truth: bool = True,
+):
+    """
+    This function helps configure the keys to read from the dataset.
+
+    And, if some global parameter values are provided in the config,
+    they are also read here and passed to the dataset.
+
+    """
+
+    # Always read these keys:
+    keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"]
+
+    # If these keys are in the config, use them, else provide defaults in
+    # case they aren't in the dataset:
+    # TODO
+    keys_to_read_if_available = {
+        "global_params_values": torch.tensor([[30.0], [1.226]]),
+        "global_params_reference": torch.tensor([[30.0], [1.226]]),
+    }
+
+    # Volume keys:
+    volume_keys = [
+        "volume_mesh_centers",
+    ]
+    if get_ground_truth:
+        volume_keys.append("volume_fields")
+
+    # Surface keys:
+    surface_keys = [
+        "surface_mesh_centers",
+        "surface_normals",
+        "surface_areas",
+    ]
+    if get_ground_truth:
+        surface_keys.append("surface_fields")
+
+    if model_type == "volume" or model_type == "combined":
+        keys_to_read.extend(volume_keys)
+    if model_type == "surface" or model_type == "combined":
+        keys_to_read.extend(surface_keys)
+
+    return keys_to_read, keys_to_read_if_available
+
+
+def coordinate_distributed_environment(cfg: DictConfig):
+    """
+    Initialize the distributed env for DoMINO.  This is actually always a 2D Mesh:
+    one dimension is the data-parallel dimension (DDP), and the other is the
+    domain dimension.
+
+    For the training scripts, we need to know the rank, size of each dimension,
+    and return the domain_mesh and placements for the loader.
+
+    Args:
+        cfg: Configuration object containing the domain parallelism configuration.
+
+    Returns:
+        domain_mesh: torch.distributed.DeviceMesh: The domain mesh for the domain parallel dimension.
+        data_mesh: torch.distributed.DeviceMesh: The data mesh for the data parallel dimension.
+        placements: dict[str, torch.distributed.tensor.Placement]: The placements for the data set
+    """
+
+    DistributedManager.initialize()
+    dist = DistributedManager()
+
+    # Default to no domain parallelism:
+    domain_size = cfg.get("domain_parallelism", {}).get("domain_size", 1)
+
+    # Initialize the device mesh:
+    mesh = dist.initialize_mesh(
+        mesh_shape=(-1, domain_size), mesh_dim_names=("ddp", "domain")
+    )
+    domain_mesh = mesh["domain"]
+    data_mesh = mesh["ddp"]
+
+    if domain_size > 1:
+        # Define the default placements for each tensor that might show up in
+        # the data.  Note that we'll define placements for all keys, even if
+        # they aren't actually used.
+
+        # Note that placements are defined for pre-batched data, no batch index!
+
+        grid_like_placement = [
+            Shard(0),
+        ]
+        point_like_placement = [
+            Shard(0),
+        ]
+        replicate_placement = [
+            Replicate(),
+        ]
+        placements = {
+            "stl_coordinates": point_like_placement,
+            "stl_centers": point_like_placement,
+            "stl_faces": point_like_placement,
+            "stl_areas": point_like_placement,
+            "surface_fields": point_like_placement,
+            "volume_mesh_centers": point_like_placement,
+            "volume_fields": point_like_placement,
+            "surface_mesh_centers": point_like_placement,
+            "surface_normals": point_like_placement,
+            "surface_areas": point_like_placement,
+            "surface_fields": point_like_placement,
+        }
+    else:
+        domain_mesh = None
+        placements = None
+
+    return domain_mesh, data_mesh, placements
+
+
 @dataclass
 class ScalingFactors:
     """

From 7f01ddc9f5f3e578641f2595e8888fbbc2c8c42a Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 15 Sep 2025 21:12:13 +0000
Subject: [PATCH 29/98] Remove bug in sdf fake function

---
 physicsnemo/utils/sdf.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py
index 08f9c8c4c2..9abbafa460 100644
--- a/physicsnemo/utils/sdf.py
+++ b/physicsnemo/utils/sdf.py
@@ -195,11 +195,6 @@ def _(
     if mesh_vertices.device != mesh_indices.device:
         raise RuntimeError("mesh_vertices and mesh_indices must be on the same device")
 
-    if mesh_vertices.shape[0] != mesh_indices.shape[0]:
-        raise RuntimeError(
-            "mesh_vertices and mesh_indices must have the same number of points"
-        )
-
     N = input_points.shape[0]
 
     sdf_output = torch.empty(N, 1, device=input_points.device, dtype=input_points.dtype)

From 1d03ab754309549a4007ecf8d1e3d1c9d2fb5018 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 17 Sep 2025 21:07:15 +0000
Subject: [PATCH 30/98] Restructure datapipe to make the logical flow simpler
 and clearer.

Focus is on readability and maintainabiltiy without performance loss.
---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 487 +++++++++++-------
 physicsnemo/models/domino/model.py            |  33 +-
 2 files changed, 308 insertions(+), 212 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 472c09be24..781402a5b3 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -26,7 +26,6 @@
 variable names, domain resolution, sampling size etc. are configurable in config.yaml.
 """
 
-from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Iterable, Literal, Optional, Protocol, Sequence, Union
@@ -231,13 +230,17 @@ def __init__(
 
         dist = DistributedManager()
 
+        # Set devices for the preprocessing and IO target
         self.preproc_device = (
             dist.device if self.config.gpu_preprocessing else torch.device("cpu")
         )
+        # The drivaer_ml_dataset will automatically target this device
+        # In an async transfer.
         self.output_device = (
             dist.device if self.config.gpu_output else torch.device("cpu")
         )
 
+        # Model type determines whether we process surface, volume, or both.
         self.model_type = model_type
 
         # Update the arrays for bounding boxes:
@@ -256,12 +259,13 @@ def __init__(
                     dtype=torch.float32,
                 ),
             ]
-            self.volume_grid = create_grid(
+            self.default_volume_grid = create_grid(
                 self.config.bounding_box_dims[0],
                 self.config.bounding_box_dims[1],
                 self.config.grid_resolution,
             )
 
+        # And, do the surface bounding box if supplied:
         if hasattr(self.config.bounding_box_dims_surf, "max") and hasattr(
             self.config.bounding_box_dims_surf, "min"
         ):
@@ -278,7 +282,7 @@ def __init__(
                 ),
             ]
 
-            self.surf_grid = create_grid(
+            self.default_surface_grid = create_grid(
                 self.config.bounding_box_dims_surf[0],
                 self.config.bounding_box_dims_surf[1],
                 self.config.grid_resolution,
@@ -301,56 +305,71 @@ def __init__(
 
         self.dataset = None
 
-        # This is thread storage for data preprocessing:
-        self._preprocess_queue = {}
-        self._preprocess_events = {}
-        self.preprocess_depth = 2
-        self.preprocess_executor = ThreadPoolExecutor(max_workers=1)
-
-    def compute_stl_scaling(
-        self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None
-    ):
+    def compute_stl_scaling_and_surface_grids(
+        self,
+        stl_vertices: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """
         Compute the min and max for the defining mesh.
 
-        """
+        If the user supplies a bounding box, we use that.  Otherwise,
+        it's created dynamically from the min/max of the stl vertices.
 
-        s_min = torch.amin(stl_vertices, 0)
-        s_max = torch.amax(stl_vertices, 0)
+        The returned min/max and grid are used for surface data.
+        """
 
-        # if dynamic_bbox_scaling:
         # Check the bounding box is not unit length
 
-        if bounding_box_dims_surf is not None:
-            s_max = bounding_box_dims_surf[0]
-            s_min = bounding_box_dims_surf[1]
-            surf_grid = self.surf_grid
+        if self.config.bounding_box_dims_surf is not None:
+            s_max = self.config.bounding_box_dims_surf[0]
+            s_min = self.config.bounding_box_dims_surf[1]
+            surf_grid = self.default_surface_grid
         else:
-            # Create the grid:
-            surf_grid = create_grid(s_max, s_min, self.grid_resolution)
+            # Create the grid dynamically
+            s_min = torch.amin(stl_vertices, 0)
+            s_max = torch.amax(stl_vertices, 0)
+            surf_grid = create_grid(s_max, s_min, self.config.grid_resolution)
+
+        return s_min, s_max, surf_grid
+
+    def compute_volume_scaling_and_grids(
+        self, s_min: torch.Tensor, s_max: torch.Tensor
+    ):
+        """
+        Compute the min and max and grid for volume data.
+
+        If the user supplies a bounding box, we use that.  Otherwise,
+        it's created dynamically from the surface min/max.
 
-        surf_grid_max_min = torch.stack([s_min, s_max])
+        This will be 2x longer in x and y and the same in z as the surface bounding box.
+        """
 
-        return s_min, s_max, surf_grid_max_min, surf_grid
+        # Determine the volume min / max locations
+        if self.config.bounding_box_dims is not None:
+            c_max = self.config.bounding_box_dims[0]
+            c_min = self.config.bounding_box_dims[1]
+            volume_grid = self.default_volume_grid
+
+        else:
+            # Create the grid based on the surface grid
+            c_max = s_max + (s_max - s_min) / 2
+            c_min = s_min - (s_max - s_min) / 2
+            c_min[2] = s_min[2]
+            volume_grid = create_grid(c_max, c_min, self.config.grid_resolution)
+
+        return c_min, c_max, volume_grid
 
     @profile
-    def process_combined(
+    def downsample_geometry(
         self,
-        s_min,
-        s_max,
-        surf_grid,
         stl_vertices,
-        mesh_indices_flattened,
-    ):
-        # SDF calculation on the grid using WARP
-        nx, ny, nz = self.config.grid_resolution
+    ) -> torch.Tensor:
+        """
+        Downsample the geometry to the desired number of points.
 
-        sdf_surf_grid, _ = signed_distance_field(
-            stl_vertices,
-            mesh_indices_flattened,
-            surf_grid,
-            use_sign_winding_number=True,
-        )
+        Args:
+            stl_vertices: The vertices of the surface.
+        """
 
         if self.config.sampling:
             geometry_points = self.config.geom_points_sample
@@ -365,31 +384,41 @@ def process_combined(
         else:
             geom_centers = stl_vertices
 
-        return (sdf_surf_grid, geom_centers)
+        return geom_centers
 
     def process_surface(
         self,
         s_min: torch.Tensor,
         s_max: torch.Tensor,
+        c_min: torch.Tensor,
+        c_max: torch.Tensor,
+        *,  # Forcing the rest by keyword only since it's a long list ...
         center_of_mass: torch.Tensor,
         surf_grid: torch.Tensor,
         surface_coordinates: torch.Tensor,
         surface_normals: torch.Tensor,
         surface_sizes: torch.Tensor,
+        stl_vertices: torch.Tensor,
+        stl_indices: torch.Tensor,
         surface_fields: torch.Tensor | None,
     ) -> dict[str, torch.Tensor]:
         nx, ny, nz = self.config.grid_resolution
 
         return_dict = {}
 
+        ########################################################################
         # Remove any sizes <= 0:
+        ########################################################################
         idx = surface_sizes > 0
         surface_sizes = surface_sizes[idx]
-        if surface_fields is not None:
-            surface_fields = surface_fields[idx]
         surface_normals = surface_normals[idx]
         surface_coordinates = surface_coordinates[idx]
+        if surface_fields is not None:
+            surface_fields = surface_fields[idx]
 
+        ########################################################################
+        # Surface resampling ...
+        ########################################################################
         if self.config.resample_surfaces:
             if self.config.resampling_points > surface_coordinates.shape[0]:
                 resampling_points = surface_coordinates.shape[0]
@@ -404,9 +433,10 @@ def process_surface(
             if surface_fields is not None:
                 surface_fields = surface_fields[idx_s]
 
-        c_max = self.config.bounding_box_dims[0]
-        c_min = self.config.bounding_box_dims[1]
-
+        ########################################################################
+        # Reject surface points outside of the Bounding Box
+        # NOTE - this is using the VOLUME bounding box!
+        ########################################################################
         if self.config.sample_in_bbox:
             ids_min = surface_coordinates[:] > c_min
             ids_max = surface_coordinates[:] < c_max
@@ -433,12 +463,20 @@ def process_surface(
         else:
             pos_normals_com_surface = surface_coordinates - center_of_mass
 
+        ########################################################################
+        # Perform Down sampling of the surface fields.
+        # Note that we snapshot the full surface coordinates for
+        # use in the kNN in the next step.
+        ########################################################################
+
+        full_surface_coordinates = surface_coordinates
+        full_surface_normals = surface_normals
+        full_surface_sizes = surface_sizes
+
         if self.config.sampling:
             # Perform the down sampling:
-
             if self.config.surface_sampling_algorithm == "area_weighted":
                 weights = surface_sizes
-
             else:
                 weights = None
 
@@ -458,66 +496,56 @@ def process_surface(
             # Select out the sampled points for non-neighbor arrays:
             surface_fields = surface_fields[idx_surface]
             pos_normals_com_surface = pos_normals_com_surface[idx_surface]
-
-            # Now, perform the kNN on the sampled points:
-            if self.config.num_surface_neighbors > 1:
-                neighbor_indices, neighbor_distances = knn(
-                    points=surface_coordinates,
-                    queries=surface_coordinates_sampled,
-                    k=self.config.num_surface_neighbors,
-                )
-
-                # Pull out the neighbor elements.  Note that ii is the index into the original
-                # points - but only exists for the sampled points
-                # In other words, a point from `surface_coordinates_sampled` has neighbors
-                # from the full `surface_coordinates` array.
-                surface_neighbors = surface_coordinates[neighbor_indices][:, 1:]
-                surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:]
-                surface_neighbors_sizes = surface_sizes[neighbor_indices][:, 1:]
-            else:
-                surface_neighbors = surface_coordinates
-                surface_neighbors_normals = surface_normals
-                surface_neighbors_sizes = surface_sizes
-
             # Subsample the normals and sizes:
             surface_normals = surface_normals[idx_surface]
             surface_sizes = surface_sizes[idx_surface]
-
             # Update the coordinates to the sampled points:
             surface_coordinates = surface_coordinates_sampled
 
-        else:
-            neighbor_indices, _ = knn(
-                points=surface_coordinates,
+        ########################################################################
+        # Perform a kNN on the surface to find the neighbor information
+        ########################################################################
+        if self.config.num_surface_neighbors > 1:
+            # Perform the kNN:
+            neighbor_indices, neighbor_distances = knn(
+                points=full_surface_coordinates,
                 queries=surface_coordinates,
                 k=self.config.num_surface_neighbors,
             )
 
-            # Construct the neighbors arrays:
-            surface_neighbors = surface_coordinates[neighbor_indices][:, 1:]
-            surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:]
-            surface_neighbors_sizes = surface_sizes[neighbor_indices][:, 1:]
+            # Pull out the neighbor elements.
+            # Note that `neighbor_indices` is the index into the original,
+            # full sized tensors (full_surface_coordinates, etc).
+            surface_neighbors = full_surface_coordinates[neighbor_indices][:, 1:]
+            surface_neighbors_normals = full_surface_normals[neighbor_indices][:, 1:]
+            surface_neighbors_sizes = full_surface_sizes[neighbor_indices][:, 1:]
 
-        # Have to normalize neighbors after the kNN and sampling
+        # Better to normalize everything after the kNN and sampling
         if self.config.normalize_coordinates:
             surf_grid = normalize(surf_grid, s_max, s_min)
             surface_coordinates = normalize(surface_coordinates, s_max, s_min)
             surface_neighbors = normalize(surface_neighbors, s_max, s_min)
+            # This is for the SDF Later:
+            normed_vertices = normalize(stl_vertices, s_max, s_min)
+        else:
+            normed_vertices = stl_vertices
+
+        ########################################################################
+        # Apply scaling to the targets, if desired:
+        ########################################################################
+        if self.config.scaling_type is not None and surface_fields is not None:
+            surface_fields = self.scale_model_targets(
+                surface_fields, self.config.surface_factors
+            )
 
-        if self.config.scaling_type is not None:
-            if self.config.surface_factors is not None:
-                if self.config.scaling_type == "mean_std_scaling":
-                    surf_mean = self.config.surface_factors[0]
-                    surf_std = self.config.surface_factors[1]
-                    if surface_fields is not None:
-                        surface_fields = standardize(
-                            surface_fields, surf_mean, surf_std
-                        )
-                elif self.config.scaling_type == "min_max_scaling":
-                    surf_min = self.config.surface_factors[1]
-                    surf_max = self.config.surface_factors[0]
-                    if surface_fields is not None:
-                        surface_fields = normalize(surface_fields, surf_max, surf_min)
+        # Compute signed distance function for the surface grid:
+        sdf_surf_grid, _ = signed_distance_field(
+            mesh_vertices=normed_vertices,
+            mesh_indices=stl_indices,
+            input_points=surf_grid,
+            use_sign_winding_number=True,
+        )
+        return_dict["sdf_surf_grid"] = sdf_surf_grid
 
         return_dict.update(
             {
@@ -537,27 +565,27 @@ def process_surface(
 
     def process_volume(
         self,
-        s_min: torch.Tensor,
-        s_max: torch.Tensor,
+        c_min: torch.Tensor,
+        c_max: torch.Tensor,
         volume_coordinates: torch.Tensor,
-        volume_fields: torch.Tensor | None,
-        stl_vertices: torch.Tensor,
-        mesh_indices_flattened: torch.Tensor,
+        volume_grid: torch.Tensor,
         center_of_mass: torch.Tensor,
+        stl_vertices: torch.Tensor,
+        stl_indices: torch.Tensor,
+        volume_fields: torch.Tensor | None,
     ) -> dict[str, torch.Tensor]:
-        return_dict = {}
+        """
+        Preprocess the volume data.
 
-        nx, ny, nz = self.config.grid_resolution
+        First, if configured, we reject points not in the volume bounding box.
 
-        # Determine the volume min / max locations
-        if self.config.bounding_box_dims is None:
-            c_max = s_max + (s_max - s_min) / 2
-            c_min = s_min - (s_max - s_min) / 2
-            c_min[2] = s_min[2]
-        else:
-            c_max = self.config.bounding_box_dims[0]
-            c_min = self.config.bounding_box_dims[1]
+        Next, if sampling is enabled, we sample the volume points and apply that
+        sampling to the ground truth too, if it's present.
 
+        """
+        ########################################################################
+        # Reject points outside the volumetric BBox
+        ########################################################################
         if self.config.sample_in_bbox:
             # Remove points in the volume that are outside
             # of the bbox area.
@@ -571,27 +599,9 @@ def process_volume(
             if volume_fields is not None:
                 volume_fields = volume_fields[ids_in_bbox]
 
-        dx, dy, dz = (
-            (c_max[0] - c_min[0]) / nx,
-            (c_max[1] - c_min[1]) / ny,
-            (c_max[2] - c_min[2]) / nz,
-        )
-
-        # TODO - we need to make sure if the bbox is dynamic,
-        # the bounds on the grid are correct
-
-        # # Generate a grid of specified resolution to map the bounding box
-        # # The grid is used for capturing structured geometry features and SDF representation of geometry
-        # grid = create_grid(c_max, c_min, [nx, ny, nz])
-        # grid_reshaped = grid.reshape(nx * ny * nz, 3)
-
-        # SDF calculation on the volume grid using WARP
-        sdf_grid, _ = signed_distance_field(
-            stl_vertices,
-            mesh_indices_flattened,
-            self.volume_grid,
-            use_sign_winding_number=True,
-        )
+        ########################################################################
+        # Apply sampling to the volume coordinates and fields
+        ########################################################################
 
         if self.config.sampling:
             # Generate a series of idx to sample the volume
@@ -602,6 +612,8 @@ def process_volume(
             )
             volume_coordinates_sampled = volume_coordinates[idx_volume]
 
+            # In case too few points are in the sampled data (because the
+            # inputs were too few), pad the outputs:
             if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample:
                 padding_size = (
                     self.config.volume_points_sample
@@ -613,20 +625,93 @@ def process_volume(
                     mode="constant",
                     value=-10.0,
                 )
+
+            # Apply the same sampling to the targets, too:
             if volume_fields is not None:
                 volume_fields = volume_fields[idx_volume]
+
             volume_coordinates = volume_coordinates_sampled
 
+        ########################################################################
+        # Apply normalization to the coordinates, if desired:
+        ########################################################################
+        if self.config.normalize_coordinates:
+            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
+            grid = normalize(volume_grid, c_max, c_min)
+            # This is used later in the SDF, apply the same scaling to the mesh
+            # coordinates:
+            normed_vertices = normalize(stl_vertices, c_max, c_min)
+        else:
+            grid = volume_grid
+            normed_vertices = stl_vertices
+
+        ########################################################################
+        # Apply scaling to the targets, if desired:
+        ########################################################################
+        if self.config.scaling_type is not None and volume_fields is not None:
+            volume_fields = self.scale_model_targets(
+                volume_fields, self.config.volume_factors
+            )
+
+        ########################################################################
+        # Compute Signed Distance Function for volumetric quantities
+        # Note - the SDF happens here, after volume data processing finishes,
+        # because we need to use the (maybe) normalized volume coordinates and grid
+        ########################################################################
+
+        # SDF calculation on the volume grid using WARP
+        sdf_grid, _ = signed_distance_field(
+            normed_vertices,
+            stl_indices,
+            grid,
+            use_sign_winding_number=True,
+        )
+
         # Get the SDF of all the selected volume coordinates,
         # And keep the closest point to each one.
         sdf_nodes, sdf_node_closest_point = signed_distance_field(
-            stl_vertices,
-            mesh_indices_flattened,
+            normed_vertices,
+            stl_indices,
             volume_coordinates,
             use_sign_winding_number=True,
         )
         sdf_nodes = sdf_nodes.reshape((-1, 1))
 
+        # Use the closest point from the mesh to compute the volume encodings:
+        pos_normals_closest_vol, pos_normals_com_vol = self.calculate_volume_encoding(
+            c_min, c_max, volume_coordinates, sdf_node_closest_point, center_of_mass
+        )
+
+        return_dict = {
+            "volume_mesh_centers": volume_coordinates,
+            "sdf_nodes": sdf_nodes,
+            "grid": grid,
+            "sdf_grid": sdf_grid,
+            "pos_volume_closest": pos_normals_closest_vol,
+            "pos_volume_center_of_mass": pos_normals_com_vol,
+        }
+
+        if volume_fields is not None:
+            return_dict["volume_fields"] = volume_fields
+
+        return return_dict
+
+    def calculate_volume_encoding(
+        self,
+        c_min: torch.Tensor,
+        c_max: torch.Tensor,
+        volume_coordinates: torch.Tensor,
+        sdf_node_closest_point: torch.Tensor,
+        center_of_mass: torch.Tensor,
+    ):
+        nx, ny, nz = self.config.grid_resolution
+
+        dx, dy, dz = (
+            (c_max[0] - c_min[0]) / nx,
+            (c_max[1] - c_min[1]) / ny,
+            (c_max[2] - c_min[2]) / nz,
+        )
+
         if self.config.positional_encoding:
             pos_normals_closest_vol = calculate_normal_positional_encoding(
                 volume_coordinates,
@@ -640,42 +725,7 @@ def process_volume(
             pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
             pos_normals_com_vol = volume_coordinates - center_of_mass
 
-        if self.config.normalize_coordinates:
-            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
-            grid = normalize(self.volume_grid, c_max, c_min)
-        else:
-            grid = self.volume_grid
-
-        if self.config.scaling_type is not None:
-            if self.config.volume_factors is not None:
-                if self.config.scaling_type == "mean_std_scaling":
-                    vol_mean = self.config.volume_factors[0]
-                    vol_std = self.config.volume_factors[1]
-                    if volume_fields is not None:
-                        volume_fields = standardize(volume_fields, vol_mean, vol_std)
-                elif self.config.scaling_type == "min_max_scaling":
-                    vol_min = self.config.volume_factors[1]
-                    vol_max = self.config.volume_factors[0]
-                    if volume_fields is not None:
-                        volume_fields = normalize(volume_fields, vol_max, vol_min)
-
-        vol_grid_max_min = torch.stack([c_min, c_max])
-
-        return_dict.update(
-            {
-                "pos_volume_closest": pos_normals_closest_vol,
-                "pos_volume_center_of_mass": pos_normals_com_vol,
-                "grid": grid,
-                "sdf_grid": sdf_grid,
-                "sdf_nodes": sdf_nodes,
-                "volume_mesh_centers": volume_coordinates,
-                "volume_min_max": vol_grid_max_min,
-            }
-        )
-        if volume_fields is not None:
-            return_dict["volume_fields"] = volume_fields
-
-        return return_dict
+        return pos_normals_closest_vol, pos_normals_com_vol
 
     @torch.no_grad()
     def process_data(self, data_dict):
@@ -685,75 +735,103 @@ def process_data(self, data_dict):
             "global_params_reference": data_dict["global_params_reference"],
         }
 
+        ########################################################################
+        # Process the core STL information
+        ########################################################################
+
         # This function gets information about the surface scale,
         # and decides what the surface grid will be:
-        (s_min, s_max, surf_grid_max_min, surf_grid) = self.compute_stl_scaling(
-            data_dict["stl_coordinates"], self.config.bounding_box_dims_surf
+        s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids(
+            data_dict["stl_coordinates"]
         )
+        return_dict["surf_grid"] = surf_grid
+
+        # Store this only if normalization is active:
+        if self.model_type == "surface" or self.model_type == "combined":
+            if self.config.normalize_coordinates:
+                return_dict["surface_min_max"] = torch.stack([s_min, s_max])
 
         # This is a center of mass computation for the stl surface,
         # using the size of each mesh point as weight.
-
         center_of_mass = calculate_center_of_mass(
             data_dict["stl_centers"], data_dict["stl_areas"]
         )
 
-        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
-        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
-
-        return_dict.update(
-            {
-                "surface_min_max": surf_grid_max_min,
-            }
-        )
-
-        # This will compute the sdf on the surface grid and apply downsampling if needed
-        sdf_surf_grid, geom_centers = self.process_combined(
-            s_min,
-            s_max,
-            surf_grid,
+        # This will apply downsampling if needed to the geometry coordinates
+        geom_centers = self.downsample_geometry(
             stl_vertices=data_dict["stl_coordinates"],
-            mesh_indices_flattened=mesh_indices_flattened,
         )
-        return_dict["surf_grid"] = surf_grid
-
-        return_dict["sdf_surf_grid"] = sdf_surf_grid
         return_dict["geometry_coordinates"] = geom_centers
 
-        # Up to here works all in torch!
+        ########################################################################
+        # Determine the volumetric bounds of the data:
+        ########################################################################
+        # Compute the min/max for volume an the unnomralized grid:
+        c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids(s_min, s_max)
+
+        # For volume data, we store this only if normalizing coordinates:
+        if self.model_type == "volume" or self.model_type == "combined":
+            if self.config.normalize_coordinates:
+                return_dict["volume_min_max"] = torch.stack([c_min, c_max])
+
+        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
+        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
 
         if self.model_type == "volume" or self.model_type == "combined":
+            volume_fields_raw = (
+                data_dict["volume_fields"] if "volume_fields" in data_dict else None
+            )
             volume_dict = self.process_volume(
-                s_min,
-                s_max,
+                c_min,
+                c_max,
                 volume_coordinates=data_dict["volume_mesh_centers"],
-                volume_fields=data_dict["volume_fields"]
-                if "volume_fields" in data_dict
-                else None,
-                stl_vertices=data_dict["stl_coordinates"],
-                mesh_indices_flattened=mesh_indices_flattened,
+                volume_grid=volume_grid,
                 center_of_mass=center_of_mass,
+                stl_vertices=data_dict["stl_coordinates"],
+                stl_indices=mesh_indices_flattened,
+                volume_fields=volume_fields_raw,
             )
 
             return_dict.update(volume_dict)
 
         if self.model_type == "surface" or self.model_type == "combined":
+            surface_fields_raw = (
+                data_dict["surface_fields"] if "surface_fields" in data_dict else None
+            )
             surface_dict = self.process_surface(
                 s_min,
                 s_max,
-                center_of_mass,
-                surf_grid,
+                c_min,
+                c_max,
+                center_of_mass=center_of_mass,
+                surf_grid=surf_grid,
                 surface_coordinates=data_dict["surface_mesh_centers"],
                 surface_normals=data_dict["surface_normals"],
                 surface_sizes=data_dict["surface_areas"],
-                surface_fields=data_dict["surface_fields"]
-                if "surface_fields" in data_dict
-                else None,
+                stl_vertices=data_dict["stl_coordinates"],
+                stl_indices=mesh_indices_flattened,
+                surface_fields=surface_fields_raw,
             )
+
             return_dict.update(surface_dict)
 
         return return_dict
 
+    def scale_model_targets(
+        self, fields: torch.Tensor, factors: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Scale the model targets based on the configured scaling factors.
+        """
+        if self.config.scaling_type == "mean_std_scaling":
+            field_mean = self.config.volume_factors[0]
+            field_std = self.config.volume_factors[1]
+            return standardize(fields, field_mean, field_std)
+        elif self.config.scaling_type == "min_max_scaling":
+            field_min = self.config.volume_factors[1]
+            field_max = self.config.volume_factors[0]
+            return normalize(fields, field_max, field_min)
+
     def unscale_model_outputs(
         self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None
     ):
@@ -787,6 +865,9 @@ def unscale_model_outputs(
         return volume_fields, surface_fields
 
     def set_dataset(self, dataset: Iterable) -> None:
+        """
+        Pass a dataset to the datapipe to enable iterating over both in one pass.
+        """
         self.dataset = dataset
 
     def __len__(self):
@@ -801,8 +882,9 @@ def __getitem__(self, idx):
 
         Domino, in general, expects one example per file and the files
         are relatively large due to the mesh size.
-        """
 
+        Requires the user to have set a dataset via `set_dataset`.
+        """
         if self.dataset is None:
             raise ValueError("Dataset is not present")
 
@@ -812,7 +894,7 @@ def __getitem__(self, idx):
 
         return self.__call__(data_dict)
 
-    def __call__(self, data_dict: dict) -> dict:
+    def __call__(self, data_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
         """
         Process the incoming data dictionary.
         - Processes the data
@@ -839,6 +921,11 @@ def __call__(self, data_dict: dict) -> dict:
         return data_dict
 
     def __iter__(self):
+        if self.dataset is None:
+            raise ValueError(
+                "Dataset is not present, can not use the datapipe as an iterator."
+            )
+
         for i, batch in enumerate(self.dataset):
             yield self.__call__(batch)
 
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index ff0a5482c8..24588b123c 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -1884,9 +1884,6 @@ def forward(self, data_dict, return_volume_neighbors=False):
         # Bounding box grid
         s_grid = data_dict["surf_grid"]
         sdf_surf_grid = data_dict["sdf_surf_grid"]
-        # Scaling factors
-        surf_max = data_dict["surface_min_max"][:, 1]
-        surf_min = data_dict["surface_min_max"][:, 0]
 
         # Parameters
         global_params_values = data_dict["global_params_values"]
@@ -1897,12 +1894,17 @@ def forward(self, data_dict, return_volume_neighbors=False):
             # Computational domain grid
             p_grid = data_dict["grid"]
             sdf_grid = data_dict["sdf_grid"]
-            # Scaling factors
-            vol_max = data_dict["volume_min_max"][:, 1]
-            vol_min = data_dict["volume_min_max"][:, 0]
-
-            # Normalize based on computational domain
-            geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
+            if "volume_min_max" in data_dict.keys():
+                # Scaling factors
+                vol_max = data_dict["volume_min_max"][:, 1]
+                vol_min = data_dict["volume_min_max"][:, 0]
+
+                # Normalize based on computational domain
+                geo_centers_vol = (
+                    2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
+                )
+            else:
+                geo_centers_vol = geo_centers
 
             encoding_g_vol = self.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid)
 
@@ -1926,9 +1928,16 @@ def forward(self, data_dict, return_volume_neighbors=False):
 
         if self.output_features_surf is not None:
             # Represent geometry on bounding box
-            geo_centers_surf = (
-                2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1
-            )
+            if "surface_min_max" in data_dict.keys():
+                # Scaling factors
+                surf_max = data_dict["surface_min_max"][:, 1]
+                surf_min = data_dict["surface_min_max"][:, 0]
+                geo_centers_surf = (
+                    2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1
+                )
+            else:
+                geo_centers_surf = geo_centers
+
             encoding_g_surf = self.geo_rep_surface(
                 geo_centers_surf, s_grid, sdf_surf_grid
             )

From b7b7a65901642fffb9cfd7cf03e28124b7dcfeaf Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 17 Sep 2025 21:09:46 +0000
Subject: [PATCH 31/98] Ensure RMM is actually used...

---
 .../external_aerodynamics/domino/src/benchmark_dataloader.py    | 2 +-
 .../cfd/external_aerodynamics/domino/src/inference_on_stl2.py   | 2 +-
 examples/cfd/external_aerodynamics/domino/src/train.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index 80c4b9e3cf..f24e0ffe16 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -40,7 +40,7 @@
 from hydra.utils import to_absolute_path
 from omegaconf import DictConfig, OmegaConf
 
-DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", "False")
+DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", False)
 if not DISABLE_RMM:
     import rmm
     from rmm.allocators.torch import rmm_torch_allocator
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
index 9ff5b62d0d..3c6acc3ccd 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
@@ -42,7 +42,7 @@
 from omegaconf import DictConfig, OmegaConf
 import torch
 
-DISABLE_RMM = os.environ.get("DISABLE_RMM", "False")
+DISABLE_RMM = os.environ.get("DISABLE_RMM", False)
 if not DISABLE_RMM:
     import rmm
     from rmm.allocators.torch import rmm_torch_allocator
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 1e0ad5d80c..0176084082 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -39,7 +39,7 @@
 from omegaconf import DictConfig, OmegaConf
 
 
-DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", "False")
+DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", False)
 if not DISABLE_RMM:
     import rmm
     from rmm.allocators.torch import rmm_torch_allocator

From c5e1db817e9fc3977489c37b28cc5bd1aa3f0578 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 17 Sep 2025 21:35:17 +0000
Subject: [PATCH 32/98] Add sharded implementations of both kNN and SDF, as
 well as tests for them.

---
 .../distributed/shard_utils/__init__.py       |  10 +-
 physicsnemo/distributed/shard_utils/knn.py    | 212 ++++++++++++++++++
 .../distributed/shard_utils/mesh_ops.py       | 146 ++++++++++++
 test/distributed/shard_tensor/ops/test_knn.py |  72 ++++++
 test/distributed/shard_tensor/ops/test_sdf.py | 119 ++++++++++
 test/distributed/shard_tensor/ops/utils.py    |  44 ++++
 6 files changed, 600 insertions(+), 3 deletions(-)
 create mode 100644 physicsnemo/distributed/shard_utils/knn.py
 create mode 100644 physicsnemo/distributed/shard_utils/mesh_ops.py
 create mode 100644 test/distributed/shard_tensor/ops/test_knn.py
 create mode 100644 test/distributed/shard_tensor/ops/test_sdf.py

diff --git a/physicsnemo/distributed/shard_utils/__init__.py b/physicsnemo/distributed/shard_utils/__init__.py
index 36ec8e63bb..e332307869 100644
--- a/physicsnemo/distributed/shard_utils/__init__.py
+++ b/physicsnemo/distributed/shard_utils/__init__.py
@@ -21,7 +21,12 @@
 # Prevent importing this module if the minimum version of pytorch is not met.
 try:
     check_module_requirements("physicsnemo.distributed.shard_tensor")
+    SHARD_TENSOR_AVAILABLE = True
 
+except ImportError:
+    pass
+
+if SHARD_TENSOR_AVAILABLE:
     from physicsnemo.distributed.shard_tensor import ShardTensor
 
     def register_shard_wrappers():
@@ -32,6 +37,8 @@ def register_shard_wrappers():
             sharded_select_backward_helper,
             sharded_select_helper,
         )
+        from .knn import knn_sharded_wrapper
+        from .mesh_ops import sharded_signed_distance_field_wrapper
 
         # Currently disabled until wrapt is removed
         # from .natten_patches import na2d_wrapper
@@ -40,6 +47,3 @@ def register_shard_wrappers():
         from .pooling_patches import generic_avg_pool_nd_wrapper
         from .unary_ops import unsqueeze_wrapper
         from .unpooling_patches import generic_interpolate_wrapper
-
-except ImportError:
-    pass
diff --git a/physicsnemo/distributed/shard_utils/knn.py b/physicsnemo/distributed/shard_utils/knn.py
new file mode 100644
index 0000000000..689223000c
--- /dev/null
+++ b/physicsnemo/distributed/shard_utils/knn.py
@@ -0,0 +1,212 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Callable
+
+import numpy as np
+import torch
+import torch.distributed as dist
+
+from physicsnemo.utils.neighbors.knn._cuml_impl import knn_impl
+from physicsnemo.utils.version_check import check_module_requirements
+
+check_module_requirements("physicsnemo.distributed.shard_tensor")
+
+from physicsnemo.distributed import ShardTensor  # noqa: E402
+from physicsnemo.distributed.shard_utils.patch_core import (  # noqa: E402
+    MissingShardPatch,
+)
+from physicsnemo.distributed.shard_utils.ring import (  # noqa: E402
+    RingPassingConfig,
+    perform_ring_iteration,
+)
+
+
+def ring_knn(
+    points: ShardTensor, queries: ShardTensor, k: int
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """
+    Ring based kNN implementation, where the points travel around a ring and the
+    queries stay local.
+    """
+    # Each tensor has a _spec attribute, which contains information about the tensor's placement
+    # and the devices it lives on:
+    points_spec = points._spec
+    queries_spec = queries._spec
+
+    # ** In general ** you want to do some checking on the placements, since each
+    # point cloud might be sharded differently.  By construction, I know they're both
+    # sharded along the points axis here (and not, say, replicated).
+
+    if not points_spec.mesh == queries_spec.mesh:
+        raise NotImplementedError("Tensors must be sharded on the same mesh")
+
+    mesh = points_spec.mesh
+    local_group = mesh.get_group(0)
+    local_size = dist.get_world_size(group=local_group)
+    mesh_rank = mesh.get_local_rank()
+
+    # points and queries are both sharded - and since we're returning the nearest
+    # neighbors to points, let's make sure the output keeps that sharding too.
+
+    # One memory-efficient way to do this is with with a ring computation.
+    # We'll compute the knn on the local tensors, get the distances and outputs,
+    # then shuffle the queries shards along the mesh.
+
+    # we'll need to sort the results and make sure we have just the top-k,
+    # which is a little extra computation.
+
+    # Physics nemo has a ring passing utility we can use.
+    ring_config = RingPassingConfig(
+        mesh_dim=0,
+        mesh_size=local_size,
+        ring_direction="forward",
+        communication_method="p2p",
+    )
+
+    local_points, local_queries = points.to_local(), queries.to_local()
+    current_dists = None
+    current_topk_idx = None
+
+    points_spec = points._spec
+
+    points_sharding_shapes = points_spec.sharding_shapes()[0]
+
+    sharding_dim = points_spec.placements[0].dim
+
+    # This is to help specify the offset from local to global tensor.
+    points_strides_along_ring = [s[sharding_dim] for s in points_sharding_shapes]
+    points_strides_along_ring = np.cumsum(points_strides_along_ring)
+    points_strides_along_ring = [
+        0,
+    ] + list(points_strides_along_ring[0:-1])
+
+    for i in range(local_size):
+        source_rank = (mesh_rank - i) % local_size
+
+        # For point clouds, we need to pass the size of the incoming shard.
+        next_source_rank = (source_rank - 1) % local_size
+        recv_shape = points_sharding_shapes[next_source_rank]
+        if i != local_size - 1:
+            # Don't do a ring on the last iteration.
+            next_local_points = perform_ring_iteration(
+                local_points,
+                mesh,
+                ring_config,
+                recv_shape=recv_shape,
+            )
+
+        # Compute the knn on the local tensors:
+        local_idx, local_distances = knn_impl(local_points, local_queries, k)
+
+        # The local_idx indexes into the _local_ tensor, but for
+        # Correctness we need it to index into the _global_ tensor.
+        # Make sure to index using the rank the points came from!
+        offset = points_strides_along_ring[source_rank]
+        local_idx = local_idx + offset
+
+        if current_dists is None:
+            current_dists = local_distances
+            current_topk_idx = local_idx
+        else:
+            # Combine with the topk so far:
+            current_dists = torch.cat([current_dists, local_distances], dim=1)
+            current_topk_idx = torch.cat([current_topk_idx, local_idx], dim=1)
+            # And take the topk again:
+            current_dists, running_indexes = torch.topk(
+                current_dists, k=k, dim=1, sorted=True, largest=False
+            )
+
+            # This creates proper indexing to select specific elements along dim 1
+
+            current_topk_idx = torch.gather(current_topk_idx, 1, running_indexes)
+
+        if i != local_size - 1:
+            # Don't do a ring on the last iteration.
+            local_points = next_local_points
+
+    return current_topk_idx, current_dists
+
+
+def extract_knn_args(points, queries, k, *args, **kwargs):
+    return points, queries, k
+
+
+def knn_sharded_wrapper(
+    func: Callable, types: Any, args: tuple, kwargs: dict
+) -> tuple[ShardTensor, ShardTensor]:
+    """
+    Dispatch the proper kNN tools based on the input sharding.
+    """
+
+    points, queries, k = extract_knn_args(*args, **kwargs)
+
+    # kNN will only work with 1D sharding
+    if points._spec.mesh != queries._spec.mesh:
+        raise MissingShardPatch(
+            "sharded knn: All point inputs must be on the same mesh"
+        )
+
+    # make sure all meshes are 1D
+    if points._spec.mesh.ndim != 1:
+        raise MissingShardPatch(
+            "point_cloud_ops.radius_search_wrapper: All point inputs must be on 1D meshes"
+        )
+
+    # Do we need a ring?
+    points_placement = points._spec.placements[0]
+
+    if points_placement.is_shard():
+        # We need a ring
+        idx, distances = ring_knn(points, queries, k)
+    else:
+        # No ring is needed.  Get the local tensors and compute directly:
+        local_points = points.to_local()  # This is replicated, getting all of it
+        local_queries = queries.to_local()  # This sharding doesn't matter!
+        idx, distances = knn_impl(local_points, local_queries, k)
+
+    # The outputs only depend on the local queries shape
+    input_queries_spec = queries._spec
+    # The global output tensor will be (N_q, k)
+
+    output_queries_shard_shapes = {
+        mesh_dim: tuple(
+            torch.Size((s[0], k))
+            for s in input_queries_spec.sharding_shapes()[mesh_dim]
+        )
+        for mesh_dim in input_queries_spec.sharding_shapes().keys()
+    }
+
+    # Convert the selected points and indexes to shards:
+    shard_idx = ShardTensor.from_local(
+        idx,
+        queries._spec.mesh,
+        queries._spec.placements,
+        sharding_shapes=output_queries_shard_shapes,
+    )
+    shard_distances = ShardTensor.from_local(
+        distances,
+        queries._spec.mesh,
+        queries._spec.placements,
+        sharding_shapes=output_queries_shard_shapes,
+    )
+
+    return shard_idx, shard_distances
+
+
+ShardTensor.register_named_function_handler(
+    "physicsnemo.knn_cuml.default", knn_sharded_wrapper
+)
diff --git a/physicsnemo/distributed/shard_utils/mesh_ops.py b/physicsnemo/distributed/shard_utils/mesh_ops.py
new file mode 100644
index 0000000000..c04ad66a89
--- /dev/null
+++ b/physicsnemo/distributed/shard_utils/mesh_ops.py
@@ -0,0 +1,146 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+import torch
+
+from physicsnemo.utils.sdf import signed_distance_field
+from physicsnemo.utils.version_check import check_module_requirements
+
+check_module_requirements("physicsnemo.distributed.shard_tensor")
+
+
+from physicsnemo.distributed import ShardTensor  # noqa: E402
+
+
+def sharded_signed_distance_field(
+    mesh_vertices: ShardTensor,
+    mesh_indices: ShardTensor,
+    input_points: ShardTensor,
+    max_dist: float = 1e8,
+    use_sign_winding_number: bool = False,
+) -> tuple[ShardTensor, ShardTensor]:
+    """
+    Compute the signed distance field for a (possibly sharded) mesh.
+
+    Args:
+        mesh_vertices: Sharded tensor of mesh vertices
+        mesh_indices: Sharded tensor of mesh indices
+        input_points: Sharded tensor of input points
+        max_dist: Maximum distance for the signed distance field
+        use_sign_winding_number: Whether to use sign winding number
+    """
+
+    # We can not actually compute the signed distance function on a sharded mesh.
+    # So, in this case, force the mesh to replicate placement if necessary:
+
+    local_mesh_vertices = mesh_vertices.full_tensor()
+    local_mesh_indices = mesh_indices.full_tensor()
+
+    # For the input points, though, it doesn't matter - they can be sharded.
+    # No communication is necessary
+
+    local_input_points = input_points.to_local()
+
+    local_sdf, local_sdf_hit_point = signed_distance_field(
+        local_mesh_vertices,
+        local_mesh_indices,
+        local_input_points,
+        max_dist,
+        use_sign_winding_number,
+    )
+
+    # Then, construct the output shard tensors:
+
+    if input_points._spec.placements[0].is_shard():
+        # Compute the output sharding shapes
+
+        # Output shape is always (N, 1), hit point is (N, 3)
+        input_shard_shapes = input_points._spec.sharding_shapes()
+
+        output_shard_shapes = {
+            mesh_dim: tuple(torch.Size((s[0],)) for s in input_shard_shapes[mesh_dim])
+            for mesh_dim in input_shard_shapes.keys()
+        }
+
+        sharded_sdf_output = ShardTensor.from_local(
+            local_sdf,
+            input_points._spec.mesh,
+            input_points._spec.placements,
+            sharding_shapes=output_shard_shapes,
+        ).reshape(input_points.shape[:-1])
+
+        sharded_sdf_hit_point_output = ShardTensor.from_local(
+            local_sdf_hit_point,
+            input_points._spec.mesh,
+            input_points._spec.placements,
+            sharding_shapes=input_shard_shapes,
+        ).reshape(input_points.shape)
+
+    else:
+        # The input points were replicated, use that for output:
+        sharded_sdf_output = ShardTensor.from_local(
+            local_sdf,
+            input_points._spec.mesh,
+            input_points._spec.placements,
+        )
+        sharded_sdf_hit_point_output = ShardTensor.from_local(
+            local_sdf_hit_point,
+            input_points._spec.mesh,
+            input_points._spec.placements,
+        )
+
+    return sharded_sdf_output, sharded_sdf_hit_point_output
+
+
+def repackage_radius_search_wrapper_args(
+    mesh_vertices: torch.Tensor,
+    mesh_indices: torch.Tensor,
+    input_points: torch.Tensor,
+    max_dist: float = 1e8,
+    use_sign_winding_number: bool = False,
+    *args,
+    **kwargs,
+) -> tuple[ShardTensor, ShardTensor, dict]:
+    """Repackages sdf arguments into a standard format."""
+    # Extract any additional parameters that might be in kwargs
+    # or use defaults if not provided
+    return_kwargs = {
+        "max_dist": max_dist,
+        "use_sign_winding_number": use_sign_winding_number,
+    }
+
+    # Add any explicitly passed parameters
+    if kwargs:
+        return_kwargs.update(kwargs)
+
+    return mesh_vertices, mesh_indices, input_points, return_kwargs
+
+
+def sharded_signed_distance_field_wrapper(
+    func: Any, type: Any, args: tuple, kwargs: dict
+) -> tuple[ShardTensor, ShardTensor]:
+    """
+    Wrapper for sharded_signed_distance_field to support sharded tensors.
+    """
+
+    return sharded_signed_distance_field(*args, **kwargs)
+
+
+ShardTensor.register_named_function_handler(
+    "physicsnemo.signed_distance_field.default", sharded_signed_distance_field_wrapper
+)
diff --git a/test/distributed/shard_tensor/ops/test_knn.py b/test/distributed/shard_tensor/ops/test_knn.py
new file mode 100644
index 0000000000..b41b62748a
--- /dev/null
+++ b/test/distributed/shard_tensor/ops/test_knn.py
@@ -0,0 +1,72 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+from torch.distributed.tensor.placement_types import Replicate, Shard
+
+from physicsnemo.distributed import DistributedManager, scatter_tensor
+from physicsnemo.utils.neighbors import knn
+
+from .utils import numerical_shard_tensor_check
+
+
+class kNNModule(torch.nn.Module):
+    def __init__(
+        self,
+        num_neighbors=4,
+    ):
+        super().__init__()
+
+        self.num_neighbors = num_neighbors
+
+    def forward(self, points, queries):
+        return knn(points, queries, self.num_neighbors)
+
+
+@pytest.mark.multigpu_static
+@pytest.mark.parametrize("scatter_points", [True, False])
+@pytest.mark.parametrize("scatter_queries", [True, False])
+def test_knn_1dmesh(
+    distributed_mesh,
+    scatter_points: bool,
+    scatter_queries: bool,
+):
+    dm = DistributedManager()
+
+    # Generate random points for the points and queries
+    points = torch.randn(1043, 3).to(dm.device)
+    queries = torch.randn(2198, 3).to(dm.device)
+
+    # points = torch.randn(10, 3).to(dm.device)
+    # queries = torch.randn(8, 3).to(dm.device)
+
+    # Distribute the inputs:
+    points_placements = (Shard(0),) if scatter_points else (Replicate(),)
+    queries_placements = (Shard(0),) if scatter_queries else (Replicate(),)
+
+    sharded_points = scatter_tensor(points, 0, distributed_mesh, points_placements)
+    sharded_queries = scatter_tensor(queries, 0, distributed_mesh, queries_placements)
+
+    module = kNNModule()
+
+    numerical_shard_tensor_check(
+        distributed_mesh,
+        module,
+        [sharded_points, sharded_queries],
+        {},
+        check_grads=False,
+    )
diff --git a/test/distributed/shard_tensor/ops/test_sdf.py b/test/distributed/shard_tensor/ops/test_sdf.py
new file mode 100644
index 0000000000..079127a229
--- /dev/null
+++ b/test/distributed/shard_tensor/ops/test_sdf.py
@@ -0,0 +1,119 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+import torch
+from scipy.spatial import ConvexHull
+from torch.distributed.tensor.placement_types import Replicate, Shard
+
+from physicsnemo.distributed import DistributedManager, scatter_tensor
+from physicsnemo.utils.sdf import signed_distance_field
+
+from .utils import numerical_shard_tensor_check
+
+
+# This is from the domino datapipe, too:
+def random_sample_on_unit_sphere(n_points):
+    # Random points on the sphere:
+    phi = np.random.uniform(0, 2 * np.pi, n_points)
+    cos_theta = np.random.uniform(-1, 1, n_points)
+    theta = np.arccos(cos_theta)
+
+    # Convert to x/y/z and stack:
+    x = np.sin(theta) * np.cos(phi)
+    y = np.sin(theta) * np.sin(phi)
+    z = np.cos(theta)
+    points = np.stack([x, y, z], axis=1)
+    return points
+
+
+def mesh_vertices_and_indices(n_points):
+    # We are generating a mesh on a random sphere.
+    stl_points = random_sample_on_unit_sphere(n_points)
+
+    # Generate the triangles with ConvexHull:
+    hull = ConvexHull(stl_points)
+    faces = hull.simplices  # (M, 3)
+
+    return stl_points, faces
+
+
+class SDFModule(torch.nn.Module):
+    """
+    This is a test module to run the SDF function ... don't use it elsewhere.
+    """
+
+    def __init__(self, max_dist=1e8, use_sign_winding_number=False):
+        super().__init__()
+
+        self.max_dist = max_dist
+        self.use_sign_winding_number = use_sign_winding_number
+
+    def forward(self, mesh_vertices, mesh_indices, input_points):
+        return signed_distance_field(
+            mesh_vertices,
+            mesh_indices,
+            input_points,
+            self.max_dist,
+            self.use_sign_winding_number,
+        )
+
+
+@pytest.mark.multigpu_static
+@pytest.mark.parametrize("scatter_mesh", [True, False])
+@pytest.mark.parametrize("scatter_inputs", [True, False])
+def test_sdf_1dmesh(
+    distributed_mesh,
+    scatter_mesh: bool,
+    scatter_inputs: bool,
+):
+    dm = DistributedManager()
+
+    # Generate a mesh on a unit sphere:
+    mesh_vertices, mesh_indices = mesh_vertices_and_indices(932)
+
+    # Cast the vertices and indices to tensors:
+    mesh_vertices = torch.tensor(mesh_vertices).to(dm.device)
+    mesh_indices = torch.tensor(mesh_indices.flatten()).to(dm.device)
+
+    # Distribute the inputs:
+    mesh_placements = (Shard(0),) if scatter_mesh else (Replicate(),)
+    input_placements = (Shard(0),) if scatter_inputs else (Replicate(),)
+
+    sharded_mesh_vertices = scatter_tensor(
+        mesh_vertices, 0, distributed_mesh, mesh_placements
+    )
+    sharded_mesh_indices = scatter_tensor(
+        mesh_indices, 0, distributed_mesh, mesh_placements
+    )
+
+    # Generate random points in the volume:
+    input_points = torch.randn(1043, 3).to(dm.device)
+
+    sharded_input_points = scatter_tensor(
+        input_points, 0, distributed_mesh, input_placements
+    )
+
+    module = SDFModule()
+
+    numerical_shard_tensor_check(
+        distributed_mesh,
+        module,
+        [sharded_mesh_vertices, sharded_mesh_indices, sharded_input_points],
+        {},
+        check_grads=False,
+    )
diff --git a/test/distributed/shard_tensor/ops/utils.py b/test/distributed/shard_tensor/ops/utils.py
index 19e3de4d73..cdece254e7 100644
--- a/test/distributed/shard_tensor/ops/utils.py
+++ b/test/distributed/shard_tensor/ops/utils.py
@@ -18,6 +18,7 @@
 from collections.abc import Iterable
 
 import torch
+import torch.distributed as dist
 from torch.distributed.tensor import DTensor, distribute_module
 from torch.distributed.tensor.device_mesh import DeviceMesh
 
@@ -83,6 +84,18 @@ def sharded_to_local(container):
 def default_tensor_comparison(output, d_output, atol, rtol):
     # We assume a single output!
 
+    if not isinstance(output, torch.Tensor):
+        if isinstance(output, Iterable):
+            return all(
+                [
+                    default_tensor_comparison(item, d_item, atol, rtol)
+                    for item, d_item in zip(output, d_output)
+                ]
+            )
+
+    if isinstance(d_output, ShardTensor):
+        validate_shard_tensor_spec(d_output)
+
     local_output = sharded_to_local(d_output)
 
     # Check forward agreement:
@@ -95,6 +108,37 @@ def default_loss_fn(output):
     return output.mean()
 
 
+def validate_shard_tensor_spec(shard_tensor):
+    # Take a shard tensor and cross check on the dimensions.
+    # Take care about assertions here, since this is a collective
+
+    # Check out shard shapes
+    # The local shard shape needs to match the local tensor shape:
+    sharding_shapes = shard_tensor._spec.sharding_shapes()
+    mesh = shard_tensor._spec.mesh
+
+    for mesh_dim in range(mesh.ndim):
+        mesh_rank = mesh.get_local_rank(mesh_dim)
+        mesh_size = dist.get_world_size(mesh.get_group(mesh_dim))
+
+        # Is this axis sharded?
+        this_placement = shard_tensor._spec.placements[mesh_dim]
+        if this_placement.is_shard():
+            # This axis is sharded.  the mesh dim should be in the shapes
+            assert mesh_dim in sharding_shapes.keys()
+
+            # The length of the sharding shapes should match the mesh size:
+            assert len(sharding_shapes[mesh_dim]) == mesh_size
+
+            # The local shape should match the listed shape for this rank:
+            # this_shape = shard_tensor._spec.sharding_shapes()[mesh_dim]
+            # print(f"local tensor shape: {shard_tensor._local_tensor.shape}")
+            # print(f"sharding shapes: {sharding_shapes[mesh_dim][mesh_rank]}")
+            assert (
+                sharding_shapes[mesh_dim][mesh_rank] == shard_tensor._local_tensor.shape
+            )
+
+
 def numerical_shard_tensor_check(
     mesh: DeviceMesh,
     module: torch.nn.Module,

From ee0c728becb364127036fece3075344b0e578c1d Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 19 Sep 2025 13:24:12 +0000
Subject: [PATCH 33/98] First domino refactor: consolidate all MLP
 implementations, move several classes to their own files for organization.

---
 physicsnemo/models/domino/ball_query.py | 106 ++++++
 physicsnemo/models/domino/encodings.py  | 106 ++++++
 physicsnemo/models/domino/mlps.py       | 113 ++++++
 physicsnemo/models/domino/model.py      | 437 +++---------------------
 4 files changed, 381 insertions(+), 381 deletions(-)
 create mode 100644 physicsnemo/models/domino/ball_query.py
 create mode 100644 physicsnemo/models/domino/encodings.py
 create mode 100644 physicsnemo/models/domino/mlps.py

diff --git a/physicsnemo/models/domino/ball_query.py b/physicsnemo/models/domino/ball_query.py
new file mode 100644
index 0000000000..681fe80733
--- /dev/null
+++ b/physicsnemo/models/domino/ball_query.py
@@ -0,0 +1,106 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code contains the DoMINO model architecture.
+The DoMINO class contains an architecture to model both surface and
+volume quantities together as well as separately (controlled using
+the config.yaml file)
+"""
+
+import torch
+import torch.nn as nn
+from einops import rearrange
+
+from physicsnemo.utils.neighbors import radius_search
+
+
+class BQWarp(nn.Module):
+    """
+    Warp-based ball-query layer for finding neighboring points within a specified radius.
+
+    This layer uses an accelerated ball query implementation to efficiently find points
+    within a specified radius of query points.
+    """
+
+    def __init__(
+        self,
+        radius: float = 0.25,
+        neighbors_in_radius: int = 10,
+    ):
+        """
+        Initialize the BQWarp layer.
+
+        Args:
+            radius: Radius for ball query operation
+            neighbors_in_radius: Maximum number of neighbors to return within radius
+        """
+        super().__init__()
+
+        self.radius = radius
+        self.neighbors_in_radius = neighbors_in_radius
+
+    def forward(
+        self, x: torch.Tensor, p_grid: torch.Tensor, reverse_mapping: bool = True
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """
+        Performs ball query operation to find neighboring points and their features.
+
+        This method uses the Warp-accelerated ball query implementation to find points
+        within a specified radius. It can operate in two modes:
+        - Forward mapping: Find points from x that are near p_grid points (reverse_mapping=False)
+        - Reverse mapping: Find points from p_grid that are near x points (reverse_mapping=True)
+
+        Args:
+            x: Tensor of shape (batch_size, num_points, 3+features) containing point coordinates
+               and their features
+            p_grid: Tensor of shape (batch_size, grid_x, grid_y, grid_z, 3) containing grid point
+                   coordinates
+            reverse_mapping: Boolean flag to control the direction of the mapping:
+                            - True: Find p_grid points near x points
+                            - False: Find x points near p_grid points
+
+        Returns:
+            tuple containing:
+                - mapping: Tensor containing indices of neighboring points
+                - outputs: Tensor containing coordinates of the neighboring points
+        """
+
+        if p_grid.ndim != 3:
+            p_grid = rearrange(p_grid, "b nx ny nz c -> b (nx ny nz) c")
+
+        if reverse_mapping:
+            mapping, outputs = radius_search(
+                x[0],
+                p_grid[0],
+                self.radius,
+                self.neighbors_in_radius,
+                return_points=True,
+            )
+            mapping = mapping.unsqueeze(0)
+            outputs = outputs.unsqueeze(0)
+        else:
+            mapping, outputs = radius_search(
+                p_grid[0],
+                x[0],
+                self.radius,
+                self.neighbors_in_radius,
+                return_points=True,
+            )
+            mapping = mapping.unsqueeze(0)
+            outputs = outputs.unsqueeze(0)
+
+        return mapping, outputs
diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py
new file mode 100644
index 0000000000..e4236cc8e6
--- /dev/null
+++ b/physicsnemo/models/domino/encodings.py
@@ -0,0 +1,106 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code contains the DoMINO model architecture.
+The DoMINO class contains an architecture to model both surface and
+volume quantities together as well as separately (controlled using
+the config.yaml file)
+"""
+
+import math
+
+import torch
+import torch.nn as nn
+
+from .mlps import MLP
+
+
+def fourier_encode_vectorized(
+    coords: torch.Tensor, freqs: torch.Tensor
+) -> torch.Tensor:
+    """Vectorized Fourier feature encoding
+
+    Args:
+        coords: Tensor containing coordinates, of shape (batch_size, D)
+        freqs: Tensor containing frequencies, of shape (F,) (num frequencies)
+
+    Returns:
+        Tensor containing Fourier features, of shape (batch_size, D * 2 * F)
+    """
+
+    D = coords.shape[-1]
+    F = freqs.shape[0]
+
+    freqs = freqs[None, None, :, None]  # reshape to [*, F, 1] for broadcasting
+
+    coords = coords.unsqueeze(-2)  # [*, 1, D]
+    scaled = (coords * freqs).reshape(*coords.shape[:-2], D * F)  # [*, D, F]
+    features = torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1)  # [*, D, 2F]
+
+    return features.reshape(*coords.shape[:-2], D * 2 * F)  # [*, D * 2F]
+
+
+class EncodingMLP(nn.Module):
+    """
+    This is an MLP that will, optionally, fourier encode the input features.
+
+    The encoded features are concatenated to the original inputs, and then
+    processed with an MLP.
+
+    Args:
+        input_features: The number of input features to the MLP.
+        base_layer: The number of neurons in the hidden layer of the MLP.
+        fourier_features: Whether to fourier encode the input features.
+        num_modes: The number of modes to use for the fourier encoding.
+        activation: The activation function to use in the MLP.
+
+    """
+
+    def __init__(
+        self,
+        input_features: int,
+        base_layer: int,
+        fourier_features: bool,
+        num_modes: int,
+        activation: nn.Module,
+    ):
+        super().__init__()
+        self.fourier_features = fourier_features
+
+        # self.num_modes = model_parameters.num_modes
+
+        if self.fourier_features:
+            input_features_calculated = input_features + input_features * num_modes * 2
+            self.register_buffer(
+                "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes))
+            )
+        else:
+            input_features_calculated = input_features
+
+        self.mlp = MLP(
+            input_features=input_features_calculated,
+            base_layer=base_layer,
+            output_features=base_layer,
+            activation=activation,
+            n_layers=3,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.fourier_features:
+            x = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1)
+
+        return self.mlp(x)
diff --git a/physicsnemo/models/domino/mlps.py b/physicsnemo/models/domino/mlps.py
new file mode 100644
index 0000000000..e74583dea0
--- /dev/null
+++ b/physicsnemo/models/domino/mlps.py
@@ -0,0 +1,113 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code contains the DoMINO model architecture.
+The DoMINO class contains an architecture to model both surface and
+volume quantities together as well as separately (controlled using
+the config.yaml file)
+"""
+
+import torch
+import torch.nn as nn
+
+
+class MLP(nn.Module):
+    """
+    FlexibleMulti-layer perceptron (MLP) module.
+
+    This is reused in various domino layers to simplify and unify
+    the MLP implementations.
+    """
+
+    def __init__(
+        self,
+        input_features: int,
+        output_features: int,
+        base_layer: int,
+        activation: nn.Module,
+        n_layers: int,
+    ):
+        super(MLP, self).__init__()
+        self.input_features = input_features
+
+        modules = []
+
+        if n_layers == 1:
+            # Single layer: input_features -> output_features
+            modules.append(nn.Linear(input_features, output_features))
+        else:
+            # First layer: input_features -> base_layer
+            modules.append(nn.Linear(input_features, base_layer))
+            modules.append(activation)
+
+            # Hidden layers: base_layer -> base_layer
+            for _ in range(n_layers - 2):
+                modules.append(nn.Linear(base_layer, base_layer))
+                modules.append(activation)
+
+            # Final layer: base_layer -> output_features (no activation)
+            modules.append(nn.Linear(base_layer, output_features))
+
+        self.mlp_modules = torch.nn.Sequential(*modules)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.mlp_modules(x)
+
+
+class AggregationModel(MLP):
+    """
+    Neural network module to aggregate local geometry encoding with basis functions.
+
+    This module combines basis function representations with geometry encodings
+    to predict the final output quantities. It serves as the final prediction layer
+    that integrates all available information sources.
+
+    """
+
+    def __init__(
+        self,
+        input_features: int,
+        output_features: int,
+        base_layer: int,
+        activation: nn.Module,
+    ):
+        super().__init__(
+            input_features=input_features,
+            output_features=output_features,
+            base_layer=base_layer,
+            activation=activation,
+            n_layers=5,
+        )
+
+
+class LocalPointConv(MLP):
+    """Layer for local geometry point kernel"""
+
+    def __init__(
+        self,
+        input_features: int,
+        base_layer: int,
+        output_features: int,
+        activation: nn.Module,
+    ):
+        super().__init__(
+            input_features=input_features,
+            base_layer=base_layer,
+            output_features=output_features,
+            activation=activation,
+            n_layers=2,
+        )
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index c95f971e97..0b658ffbd0 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -31,57 +31,28 @@
 from einops import rearrange
 
 from physicsnemo.models.unet import UNet
-from physicsnemo.utils.neighbors import radius_search
 from physicsnemo.utils.profiling import profile
 
+from .ball_query import BQWarp
+from .encodings import (
+    EncodingMLP,
+    fourier_encode_vectorized,
+)
+from .mlps import AggregationModel, LocalPointConv
+
 
 def get_activation(activation: Literal["relu", "gelu"]) -> Callable:
     """
     Return a PyTorch activation function corresponding to the given name.
     """
     if activation == "relu":
-        return F.relu
+        return nn.ReLU()
     elif activation == "gelu":
-        return F.gelu
+        return nn.GELU()
     else:
         raise ValueError(f"Activation function {activation} not found")
 
 
-def fourier_encode(coords, num_freqs):
-    """Function to caluculate fourier features"""
-    # Create a range of frequencies
-    freqs = torch.exp(torch.linspace(0, math.pi, num_freqs, device=coords.device))
-    # Generate sine and cosine features
-    features = [torch.sin(coords * f) for f in freqs] + [
-        torch.cos(coords * f) for f in freqs
-    ]
-    ret = torch.cat(features, dim=-1)
-    return ret
-
-
-def fourier_encode_vectorized(coords, freqs):
-    """Vectorized Fourier feature encoding"""
-    D = coords.shape[-1]
-    F = freqs.shape[0]
-
-    freqs = freqs[None, None, :, None]  # reshape to [*, F, 1] for broadcasting
-
-    coords = coords.unsqueeze(-2)  # [*, 1, D]
-    scaled = (coords * freqs).reshape(*coords.shape[:-2], D * F)  # [*, D, F]
-    features = torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1)  # [*, D, 2F]
-
-    return features.reshape(*coords.shape[:-2], D * 2 * F)  # [*, D * 2F]
-
-
-def calculate_pos_encoding(nx, d=8):
-    """Function to caluculate positional encoding"""
-    vec = []
-    for k in range(int(d / 2)):
-        vec.append(torch.sin(nx / 10000 ** (2 * (k) / d)))
-        vec.append(torch.cos(nx / 10000 ** (2 * (k) / d)))
-    return vec
-
-
 def scale_sdf(sdf: torch.Tensor) -> torch.Tensor:
     """
     Scale a signed distance function (SDF) to emphasize surface regions.
@@ -99,90 +70,6 @@ def scale_sdf(sdf: torch.Tensor) -> torch.Tensor:
     return sdf / (0.4 + torch.abs(sdf))
 
 
-class BQWarp(nn.Module):
-    """
-    Warp-based ball-query layer for finding neighboring points within a specified radius.
-
-    This layer uses an accelerated ball query implementation to efficiently find points
-    within a specified radius of query points.
-    """
-
-    def __init__(
-        self,
-        grid_resolution=None,
-        radius: float = 0.25,
-        neighbors_in_radius: int = 10,
-    ):
-        """
-        Initialize the BQWarp layer.
-
-        Args:
-            grid_resolution: Resolution of the grid in each dimension [nx, ny, nz]
-            radius: Radius for ball query operation
-            neighbors_in_radius: Maximum number of neighbors to return within radius
-        """
-        super().__init__()
-        if grid_resolution is None:
-            grid_resolution = [256, 96, 64]
-
-        self.radius = radius
-        self.neighbors_in_radius = neighbors_in_radius
-        self.grid_resolution = grid_resolution
-
-    def forward(
-        self, x: torch.Tensor, p_grid: torch.Tensor, reverse_mapping: bool = True
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """
-        Performs ball query operation to find neighboring points and their features.
-
-        This method uses the Warp-accelerated ball query implementation to find points
-        within a specified radius. It can operate in two modes:
-        - Forward mapping: Find points from x that are near p_grid points (reverse_mapping=False)
-        - Reverse mapping: Find points from p_grid that are near x points (reverse_mapping=True)
-
-        Args:
-            x: Tensor of shape (batch_size, num_points, 3+features) containing point coordinates
-               and their features
-            p_grid: Tensor of shape (batch_size, grid_x, grid_y, grid_z, 3) containing grid point
-                   coordinates
-            reverse_mapping: Boolean flag to control the direction of the mapping:
-                            - True: Find p_grid points near x points
-                            - False: Find x points near p_grid points
-
-        Returns:
-            tuple containing:
-                - mapping: Tensor containing indices of neighboring points
-                - outputs: Tensor containing coordinates of the neighboring points
-        """
-        batch_size = x.shape[0]
-        nx, ny, nz = self.grid_resolution
-
-        p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3))
-
-        if reverse_mapping:
-            mapping, outputs = radius_search(
-                x[0],
-                p_grid[0],
-                self.radius,
-                self.neighbors_in_radius,
-                return_points=True,
-            )
-            mapping = mapping.unsqueeze(0)
-            outputs = outputs.unsqueeze(0)
-        else:
-            mapping, outputs = radius_search(
-                p_grid[0],
-                x[0],
-                self.radius,
-                self.neighbors_in_radius,
-                return_points=True,
-            )
-            mapping = mapping.unsqueeze(0)
-            outputs = outputs.unsqueeze(0)
-
-        return mapping, outputs
-
-
 class GeoConvOut(nn.Module):
     """
     Geometry layer to project STL geometry data onto regular grids.
@@ -419,7 +306,6 @@ def __init__(
         for j in range(len(radii)):
             self.bq_warp.append(
                 BQWarp(
-                    grid_resolution=model_parameters.interp_res,
                     radius=radii[j],
                     neighbors_in_radius=neighbors_in_radius[j],
                 )
@@ -632,247 +518,6 @@ def forward(
         return encoding_g
 
 
-class NNBasisFunctions(nn.Module):
-    """Basis function layer for point clouds"""
-
-    def __init__(self, input_features: int, model_parameters=None):
-        super(NNBasisFunctions, self).__init__()
-        base_layer = model_parameters.base_layer
-        self.fourier_features = model_parameters.fourier_features
-        self.num_modes = model_parameters.num_modes
-
-        if self.fourier_features:
-            input_features_calculated = (
-                input_features + input_features * self.num_modes * 2
-            )
-        else:
-            input_features_calculated = input_features
-
-        self.fc1 = nn.Linear(input_features_calculated, base_layer)
-        self.fc2 = nn.Linear(base_layer, int(base_layer))
-        self.fc3 = nn.Linear(int(base_layer), int(base_layer))
-
-        self.activation = get_activation(model_parameters.activation)
-
-        if self.fourier_features:
-            self.register_buffer(
-                "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes))
-            )
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Transform point features into a basis function representation.
-
-        Args:
-            x: Input tensor containing point features
-
-        Returns:
-            Tensor containing basis function coefficients
-        """
-        if self.fourier_features:
-            facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1)
-        else:
-            facets = x
-        facets = self.activation(self.fc1(facets))
-        facets = self.activation(self.fc2(facets))
-        facets = self.fc3(facets)
-
-        return facets
-
-
-class ParameterModel(nn.Module):
-    """
-    Neural network module to encode simulation parameters.
-
-    This module encodes physical global parameters into a learned
-    latent representation that can be incorporated into the
-    model'sprediction process.
-    """
-
-    def __init__(self, input_features: int, model_parameters=None):
-        """
-        Initialize the parameter encoding network.
-
-        Args:
-            input_features: Number of input parameters to encode
-            model_parameters: Configuration parameters for the model
-        """
-        super(ParameterModel, self).__init__()
-        self.fourier_features = model_parameters.fourier_features
-        self.num_modes = model_parameters.num_modes
-
-        if self.fourier_features:
-            input_features_calculated = (
-                input_features + input_features * self.num_modes * 2
-            )
-            self.register_buffer(
-                "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes))
-            )
-        else:
-            input_features_calculated = input_features
-
-        base_layer = model_parameters.base_layer
-        self.fc1 = nn.Linear(input_features_calculated, base_layer)
-        self.fc2 = nn.Linear(base_layer, int(base_layer))
-        self.fc3 = nn.Linear(int(base_layer), int(base_layer))
-
-        self.activation = get_activation(model_parameters.activation)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Encode physical parameters into a latent representation.
-
-        Args:
-            x: Input tensor containing physical parameters (e.g., inlet velocity, air density)
-
-        Returns:
-            Tensor containing encoded parameter representation
-        """
-        if self.fourier_features:
-            params = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1)
-        else:
-            params = x
-        params = self.activation(self.fc1(params))
-        params = self.activation(self.fc2(params))
-        params = self.fc3(params)
-
-        return params
-
-
-class AggregationModel(nn.Module):
-    """
-    Neural network module to aggregate local geometry encoding with basis functions.
-
-    This module combines basis function representations with geometry encodings
-    to predict the final output quantities. It serves as the final prediction layer
-    that integrates all available information sources.
-    """
-
-    def __init__(
-        self,
-        input_features: int,
-        output_features: int,
-        model_parameters=None,
-        new_change: bool = True,
-    ):
-        """
-        Initialize the aggregation model.
-
-        Args:
-            input_features: Number of input feature dimensions
-            output_features: Number of output feature dimensions
-            model_parameters: Configuration parameters for the model
-            new_change: Flag to enable newer implementation (default: True)
-        """
-        super(AggregationModel, self).__init__()
-        self.input_features = input_features
-        self.output_features = output_features
-        self.new_change = new_change
-        base_layer = model_parameters.base_layer
-        self.fc1 = nn.Linear(self.input_features, base_layer)
-        self.fc2 = nn.Linear(base_layer, int(base_layer))
-        self.fc3 = nn.Linear(int(base_layer), int(base_layer))
-        self.fc4 = nn.Linear(int(base_layer), int(base_layer))
-        self.fc5 = nn.Linear(int(base_layer), self.output_features)
-
-        self.activation = get_activation(model_parameters.activation)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Process the combined input features to predict output quantities.
-
-        This method applies a series of fully connected layers to the input,
-        which typically contains a combination of basis functions, geometry
-        encodings, and potentially parameter encodings.
-
-        Args:
-            x: Input tensor containing combined features
-
-        Returns:
-            Tensor containing predicted output quantities
-        """
-        out = self.activation(self.fc1(x))
-        out = self.activation(self.fc2(out))
-        out = self.activation(self.fc3(out))
-        out = self.activation(self.fc4(out))
-
-        out = self.fc5(out)
-
-        return out
-
-
-class LocalPointConv(nn.Module):
-    """Layer for local geometry point kernel"""
-
-    def __init__(
-        self,
-        input_features,
-        base_layer,
-        output_features,
-        model_parameters=None,
-    ):
-        super(LocalPointConv, self).__init__()
-        self.input_features = input_features
-        self.output_features = output_features
-        self.fc1 = nn.Linear(self.input_features, base_layer)
-        self.fc2 = nn.Linear(base_layer, self.output_features)
-        self.activation = get_activation(model_parameters.activation)
-
-    def forward(self, x):
-        out = self.activation(self.fc1(x))
-        out = self.fc2(out)
-
-        return out
-
-
-class PositionEncoder(nn.Module):
-    """Positional encoding of point clouds"""
-
-    def __init__(self, input_features: int, model_parameters=None):
-        super().__init__()
-        base_layer = model_parameters.base_neurons
-        self.fourier_features = model_parameters.fourier_features
-        self.num_modes = model_parameters.num_modes
-
-        if self.fourier_features:
-            input_features_calculated = (
-                input_features + input_features * self.num_modes * 2
-            )
-        else:
-            input_features_calculated = input_features
-
-        self.fc1 = nn.Linear(input_features_calculated, base_layer)
-        self.fc2 = nn.Linear(base_layer, int(base_layer))
-        self.fc3 = nn.Linear(int(base_layer), int(base_layer))
-
-        self.activation = get_activation(model_parameters.activation)
-
-        if self.fourier_features:
-            self.register_buffer(
-                "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes))
-            )
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Transform point features into a basis function representation.
-
-        Args:
-            x: Input tensor containing point features
-
-        Returns:
-            Tensor containing position encoder
-        """
-        if self.fourier_features:
-            facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1)
-        else:
-            facets = x
-        facets = self.activation(self.fc1(facets))
-        facets = self.activation(self.fc2(facets))
-        facets = self.fc3(facets)
-
-        return facets
-
-
 # @dataclass
 # class MetaData(ModelMetaData):
 #     name: str = "DoMINO"
@@ -1134,9 +779,12 @@ def __init__(
         if self.encode_parameters:
             # Defining the parameter model
             base_layer_p = model_parameters.parameter_model.base_layer
-            self.parameter_model = ParameterModel(
+            self.parameter_model = EncodingMLP(
                 input_features=self.global_features,
-                model_parameters=model_parameters.parameter_model,
+                fourier_features=model_parameters.parameter_model.fourier_features,
+                num_modes=model_parameters.parameter_model.num_modes,
+                base_layer=model_parameters.parameter_model.base_layer,
+                activation=get_activation(model_parameters.parameter_model.activation),
             )
         else:
             base_layer_p = 0
@@ -1172,9 +820,15 @@ def __init__(
                 self.num_variables_surf
             ):  # Have the same basis function for each variable
                 self.nn_basis_surf.append(
-                    NNBasisFunctions(
+                    EncodingMLP(
                         input_features=input_features_surface,
-                        model_parameters=model_parameters.nn_basis_functions,
+                        base_layer=model_parameters.nn_basis_functions.base_layer,
+                        fourier_features=model_parameters.nn_basis_functions.fourier_features,
+                        num_modes=model_parameters.nn_basis_functions.num_modes,
+                        activation=get_activation(
+                            model_parameters.nn_basis_functions.activation
+                        ),
+                        # model_parameters=model_parameters.nn_basis_functions,
                     )
                 )
 
@@ -1184,9 +838,15 @@ def __init__(
                 self.num_variables_vol
             ):  # Have the same basis function for each variable
                 self.nn_basis_vol.append(
-                    NNBasisFunctions(
+                    EncodingMLP(
                         input_features=input_features,
-                        model_parameters=model_parameters.nn_basis_functions,
+                        base_layer=model_parameters.nn_basis_functions.base_layer,
+                        fourier_features=model_parameters.nn_basis_functions.fourier_features,
+                        num_modes=model_parameters.nn_basis_functions.num_modes,
+                        activation=get_activation(
+                            model_parameters.nn_basis_functions.activation
+                        ),
+                        # model_parameters=model_parameters.nn_basis_functions,
                     )
                 )
 
@@ -1200,8 +860,12 @@ def __init__(
             else:
                 inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3
 
-            self.fc_p_vol = PositionEncoder(
-                inp_pos_vol, model_parameters.position_encoder
+            self.fc_p_vol = EncodingMLP(
+                input_features=inp_pos_vol,
+                fourier_features=model_parameters.position_encoder.fourier_features,
+                num_modes=model_parameters.position_encoder.num_modes,
+                base_layer=model_parameters.position_encoder.base_neurons,
+                activation=get_activation(model_parameters.position_encoder.activation),
             )
 
         if self.output_features_surf is not None:
@@ -1210,10 +874,13 @@ def __init__(
             else:
                 inp_pos_surf = 3
 
-            self.fc_p_surf = PositionEncoder(
-                inp_pos_surf, model_parameters.position_encoder
+            self.fc_p_surf = EncodingMLP(
+                input_features=inp_pos_surf,
+                fourier_features=model_parameters.position_encoder.fourier_features,
+                num_modes=model_parameters.position_encoder.num_modes,
+                base_layer=model_parameters.position_encoder.base_neurons,
+                activation=get_activation(model_parameters.position_encoder.activation),
             )
-
         # BQ for surface
         self.surface_neighbors_in_radius = (
             model_parameters.geometry_local.surface_neighbors_in_radius
@@ -1236,7 +903,6 @@ def __init__(
 
             self.surface_bq_warp.append(
                 BQWarp(
-                    grid_resolution=model_parameters.interp_res,
                     radius=self.surface_radius[ct],
                     neighbors_in_radius=self.surface_neighbors_in_radius[ct],
                 )
@@ -1246,7 +912,9 @@ def __init__(
                     input_features=total_neighbors_in_radius,
                     base_layer=512,
                     output_features=self.surface_neighbors_in_radius[ct],
-                    model_parameters=model_parameters.local_point_conv,
+                    activation=get_activation(
+                        model_parameters.local_point_conv.activation
+                    ),
                 )
             )
 
@@ -1272,7 +940,6 @@ def __init__(
 
             self.volume_bq_warp.append(
                 BQWarp(
-                    grid_resolution=model_parameters.interp_res,
                     radius=self.volume_radius[ct],
                     neighbors_in_radius=self.volume_neighbors_in_radius[ct],
                 )
@@ -1282,7 +949,9 @@ def __init__(
                     input_features=total_neighbors_in_radius,
                     base_layer=512,
                     output_features=self.volume_neighbors_in_radius[ct],
-                    model_parameters=model_parameters.local_point_conv,
+                    activation=get_activation(
+                        model_parameters.local_point_conv.activation
+                    ),
                 )
             )
 
@@ -1316,7 +985,10 @@ def __init__(
                         + base_layer_geo_surf
                         + base_layer_p,
                         output_features=1,
-                        model_parameters=model_parameters.aggregation_model,
+                        base_layer=model_parameters.aggregation_model.base_layer,
+                        activation=get_activation(
+                            model_parameters.aggregation_model.activation
+                        ),
                     )
                 )
 
@@ -1335,7 +1007,10 @@ def __init__(
                         + base_layer_geo_vol
                         + base_layer_p,
                         output_features=1,
-                        model_parameters=model_parameters.aggregation_model,
+                        base_layer=model_parameters.aggregation_model.base_layer,
+                        activation=get_activation(
+                            model_parameters.aggregation_model.activation
+                        ),
                     )
                 )
 

From 611dce4a8f8afb20557c95caa98a20ba37124f66 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 19 Sep 2025 14:09:30 +0000
Subject: [PATCH 34/98] Refactor the encodings stage of domino to standalone
 nn.Modules

---
 physicsnemo/models/domino/encodings.py | 145 +++++++++++++++++-
 physicsnemo/models/domino/model.py     | 194 ++++---------------------
 2 files changed, 173 insertions(+), 166 deletions(-)

diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py
index e4236cc8e6..ca2ef34f0f 100644
--- a/physicsnemo/models/domino/encodings.py
+++ b/physicsnemo/models/domino/encodings.py
@@ -25,8 +25,10 @@
 
 import torch
 import torch.nn as nn
+from einops import rearrange
 
-from .mlps import MLP
+from .ball_query import BQWarp
+from .mlps import MLP, LocalPointConv
 
 
 def fourier_encode_vectorized(
@@ -104,3 +106,144 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             x = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1)
 
         return self.mlp(x)
+
+
+class LocalGeometryEncoding(nn.Module):
+    """
+    A local geometry encoding module.
+
+    This will apply a ball query to the input features, mapping the point cloud
+    to the volume mesh, and then apply a local point convolution to the output.
+
+    Args:
+        radius: The radius of the ball query.
+        neighbors_in_radius: The number of neighbors in the radius of the ball query.
+        total_neighbors_in_radius: The total number of neighbors in the radius of the ball query.
+        base_layer: The number of neurons in the hidden layer of the MLP.
+        activation: The activation function to use in the MLP.
+        grid_resolution: The resolution of the grid.
+    """
+
+    def __init__(
+        self,
+        radius: float,
+        neighbors_in_radius: int,
+        total_neighbors_in_radius: int,
+        base_layer: int,
+        activation: nn.Module,
+        grid_resolution: tuple[int, int, int],
+    ):
+        super().__init__()
+        self.bq_warp = BQWarp(
+            radius=radius,
+            neighbors_in_radius=neighbors_in_radius,
+        )
+
+        self.local_point_conv = LocalPointConv(
+            input_features=total_neighbors_in_radius,
+            base_layer=base_layer,
+            output_features=neighbors_in_radius,
+            activation=activation,
+        )
+        self.grid_resolution = grid_resolution
+
+    def forward(
+        self,
+        encoding_g: torch.Tensor,
+        volume_mesh_centers: torch.Tensor,
+        p_grid: torch.Tensor,
+    ) -> torch.Tensor:
+        batch_size = volume_mesh_centers.shape[0]
+        nx, ny, nz = self.grid_resolution
+
+        p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3))
+        mapping, outputs = self.bq_warp(
+            volume_mesh_centers, p_grid, reverse_mapping=False
+        )
+        mapping = mapping.type(torch.int64)
+        mask = mapping != 0
+
+        encoding_g_inner = []
+        for j in range(encoding_g.shape[1]):
+            geo_encoding = rearrange(encoding_g[:, j], "b nx ny nz -> b 1 (nx ny nz)")
+
+            geo_encoding_sampled = torch.index_select(
+                geo_encoding, 2, mapping.flatten()
+            )
+            geo_encoding_sampled = torch.reshape(geo_encoding_sampled, mask.shape)
+            geo_encoding_sampled = geo_encoding_sampled * mask
+
+            encoding_g_inner.append(geo_encoding_sampled)
+        encoding_g_inner = torch.cat(encoding_g_inner, dim=2)
+        encoding_g_inner = self.local_point_conv(encoding_g_inner)
+
+        return encoding_g_inner
+
+
+class MultiGeometryEncoding(nn.Module):
+    """
+    Module to apply multiple local geometry encodings
+
+    This will stack several local geometry encodings together, and concatenate the results.
+
+    Args:
+        radii: The list of radii of the local geometry encodings.
+        neighbors_in_radius: The list of number of neighbors in the radius of the local geometry encodings.
+        geo_encoding_type: The type of geometry encoding to use. Can be "both", "stl", or "sdf".
+        base_layer: The number of neurons in the hidden layer of the MLP.
+        activation: The activation function to use in the MLP.
+        grid_resolution: The resolution of the grid.
+    """
+
+    def __init__(
+        self,
+        radii: list[float],
+        neighbors_in_radius: list[int],
+        geo_encoding_type: str,
+        base_layer: int,
+        activation: nn.Module,
+        grid_resolution: tuple[int, int, int],
+    ):
+        super().__init__()
+
+        self.local_geo_encodings = nn.ModuleList(
+            [
+                LocalGeometryEncoding(
+                    radius=r,
+                    neighbors_in_radius=n,
+                    total_neighbors_in_radius=self.calculate_total_neighbors_in_radius(
+                        geo_encoding_type, n, radii
+                    ),
+                    base_layer=base_layer,
+                    activation=activation,
+                    grid_resolution=grid_resolution,
+                )
+                for r, n in zip(radii, neighbors_in_radius)
+            ]
+        )
+
+    def calculate_total_neighbors_in_radius(
+        self, geo_encoding_type: str, neighbors_in_radius: int, radii: list[float]
+    ) -> list[int]:
+        if geo_encoding_type == "both":
+            total_neighbors_in_radius = neighbors_in_radius * (len(radii) + 1)
+        elif geo_encoding_type == "stl":
+            total_neighbors_in_radius = neighbors_in_radius * (len(radii))
+        elif geo_encoding_type == "sdf":
+            total_neighbors_in_radius = neighbors_in_radius
+
+        return total_neighbors_in_radius
+
+    def forward(
+        self,
+        encoding_g: torch.Tensor,
+        volume_mesh_centers: torch.Tensor,
+        p_grid: torch.Tensor,
+    ) -> torch.Tensor:
+        return torch.cat(
+            [
+                local_geo_encoding(encoding_g, volume_mesh_centers, p_grid)
+                for local_geo_encoding in self.local_geo_encodings
+            ],
+            dim=-1,
+        )
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 0b658ffbd0..f4da77948e 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -36,9 +36,10 @@
 from .ball_query import BQWarp
 from .encodings import (
     EncodingMLP,
+    MultiGeometryEncoding,
     fourier_encode_vectorized,
 )
-from .mlps import AggregationModel, LocalPointConv
+from .mlps import AggregationModel
 
 
 def get_activation(activation: Literal["relu", "gelu"]) -> Callable:
@@ -881,79 +882,26 @@ def __init__(
                 base_layer=model_parameters.position_encoder.base_neurons,
                 activation=get_activation(model_parameters.position_encoder.activation),
             )
-        # BQ for surface
-        self.surface_neighbors_in_radius = (
-            model_parameters.geometry_local.surface_neighbors_in_radius
-        )
-        self.surface_radius = model_parameters.geometry_local.surface_radii
-        self.surface_bq_warp = nn.ModuleList()
-        self.surface_local_point_conv = nn.ModuleList()
-
-        for ct in range(len(self.surface_radius)):
-            if self.geo_encoding_type == "both":
-                total_neighbors_in_radius = self.surface_neighbors_in_radius[ct] * (
-                    len(model_parameters.geometry_rep.geo_conv.surface_radii) + 1
-                )
-            elif self.geo_encoding_type == "stl":
-                total_neighbors_in_radius = self.surface_neighbors_in_radius[ct] * (
-                    len(model_parameters.geometry_rep.geo_conv.surface_radii)
-                )
-            elif self.geo_encoding_type == "sdf":
-                total_neighbors_in_radius = self.surface_neighbors_in_radius[ct]
-
-            self.surface_bq_warp.append(
-                BQWarp(
-                    radius=self.surface_radius[ct],
-                    neighbors_in_radius=self.surface_neighbors_in_radius[ct],
-                )
-            )
-            self.surface_local_point_conv.append(
-                LocalPointConv(
-                    input_features=total_neighbors_in_radius,
-                    base_layer=512,
-                    output_features=self.surface_neighbors_in_radius[ct],
-                    activation=get_activation(
-                        model_parameters.local_point_conv.activation
-                    ),
-                )
-            )
 
-        # BQ for volume
-        self.volume_neighbors_in_radius = (
-            model_parameters.geometry_local.volume_neighbors_in_radius
+        # Create a set of local geometry encodings for the surface data:
+        self.surface_local_geo_encodings = MultiGeometryEncoding(
+            radii=model_parameters.geometry_local.surface_radii,
+            neighbors_in_radius=model_parameters.geometry_local.surface_neighbors_in_radius,
+            geo_encoding_type=self.geo_encoding_type,
+            base_layer=512,
+            activation=get_activation(model_parameters.local_point_conv.activation),
+            grid_resolution=self.grid_resolution,
         )
-        self.volume_radius = model_parameters.geometry_local.volume_radii
-        self.volume_bq_warp = nn.ModuleList()
-        self.volume_local_point_conv = nn.ModuleList()
-
-        for ct in range(len(self.volume_radius)):
-            if self.geo_encoding_type == "both":
-                total_neighbors_in_radius = self.volume_neighbors_in_radius[ct] * (
-                    len(model_parameters.geometry_rep.geo_conv.volume_radii) + 1
-                )
-            elif self.geo_encoding_type == "stl":
-                total_neighbors_in_radius = self.volume_neighbors_in_radius[ct] * (
-                    len(model_parameters.geometry_rep.geo_conv.volume_radii)
-                )
-            elif self.geo_encoding_type == "sdf":
-                total_neighbors_in_radius = self.volume_neighbors_in_radius[ct]
 
-            self.volume_bq_warp.append(
-                BQWarp(
-                    radius=self.volume_radius[ct],
-                    neighbors_in_radius=self.volume_neighbors_in_radius[ct],
-                )
-            )
-            self.volume_local_point_conv.append(
-                LocalPointConv(
-                    input_features=total_neighbors_in_radius,
-                    base_layer=512,
-                    output_features=self.volume_neighbors_in_radius[ct],
-                    activation=get_activation(
-                        model_parameters.local_point_conv.activation
-                    ),
-                )
-            )
+        # Create a set of local geometry encodings for the surface data:
+        self.volume_local_geo_encodings = MultiGeometryEncoding(
+            radii=model_parameters.geometry_local.volume_radii,
+            neighbors_in_radius=model_parameters.geometry_local.volume_neighbors_in_radius,
+            geo_encoding_type=self.geo_encoding_type,
+            base_layer=512,
+            activation=get_activation(model_parameters.local_point_conv.activation),
+            grid_resolution=self.grid_resolution,
+        )
 
         # Transmitting surface to volume
         self.surf_to_vol_conv1 = nn.Conv3d(
@@ -973,7 +921,7 @@ def __init__(
         if self.output_features_surf is not None:
             # Surface
             base_layer_geo_surf = 0
-            for j in self.surface_neighbors_in_radius:
+            for j in model_parameters.geometry_local.surface_neighbors_in_radius:
                 base_layer_geo_surf += j
 
             self.agg_model_surf = nn.ModuleList()
@@ -995,7 +943,7 @@ def __init__(
         if self.output_features_vol is not None:
             # Volume
             base_layer_geo_vol = 0
-            for j in self.volume_neighbors_in_radius:
+            for j in model_parameters.geometry_local.volume_neighbors_in_radius:
                 base_layer_geo_vol += j
 
             self.agg_model_vol = nn.ModuleList()
@@ -1014,83 +962,6 @@ def __init__(
                     )
                 )
 
-    def position_encoder(
-        self,
-        encoding_node: torch.Tensor,
-        eval_mode: Literal["surface", "volume"] = "volume",
-    ) -> torch.Tensor:
-        """
-        Compute positional encoding for input points.
-
-        Args:
-            encoding_node: Tensor containing node position information
-            eval_mode: Mode of evaluation, either "volume" or "surface"
-
-        Returns:
-            Tensor containing positional encoding features
-        """
-        if eval_mode == "volume":
-            x = self.fc_p_vol(encoding_node)
-        elif eval_mode == "surface":
-            x = self.fc_p_surf(encoding_node)
-        else:
-            raise ValueError(
-                f"`eval_mode` must be 'surface' or 'volume', got {eval_mode=}"
-            )
-        return x
-
-    def geo_encoding_local(
-        self, encoding_g, volume_mesh_centers, p_grid, mode="volume"
-    ):
-        """Function to calculate local geometry encoding from global encoding"""
-
-        if mode == "volume":
-            radius = self.volume_radius
-            bq_warp = self.volume_bq_warp
-            point_conv = self.volume_local_point_conv
-        elif mode == "surface":
-            radius = self.surface_radius
-            bq_warp = self.surface_bq_warp
-            point_conv = self.surface_local_point_conv
-
-        batch_size = volume_mesh_centers.shape[0]
-        nx, ny, nz = (
-            self.grid_resolution[0],
-            self.grid_resolution[1],
-            self.grid_resolution[2],
-        )
-
-        encoding_outer = []
-        for p in range(len(radius)):
-            p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3))
-            mapping, outputs = bq_warp[p](
-                volume_mesh_centers, p_grid, reverse_mapping=False
-            )
-            mapping = mapping.type(torch.int64)
-            mask = mapping != 0
-
-            encoding_g_inner = []
-            for j in range(encoding_g.shape[1]):
-                geo_encoding = rearrange(
-                    encoding_g[:, j], "b nx ny nz -> b 1 (nx ny nz)"
-                )
-
-                geo_encoding_sampled = torch.index_select(
-                    geo_encoding, 2, mapping.flatten()
-                )
-                geo_encoding_sampled = torch.reshape(geo_encoding_sampled, mask.shape)
-                geo_encoding_sampled = geo_encoding_sampled * mask
-
-                encoding_g_inner.append(geo_encoding_sampled)
-            encoding_g_inner = torch.cat(encoding_g_inner, dim=2)
-            encoding_g_inner = point_conv[p](encoding_g_inner)
-
-            encoding_outer.append(encoding_g_inner)
-
-        encoding_g = torch.cat(encoding_outer, dim=-1)
-
-        return encoding_g
-
     def calculate_solution_with_neighbors(
         self,
         surface_mesh_centers,
@@ -1300,11 +1171,6 @@ def sample_sphere_shell(self, center, r_inner, r_outer, num_points):
             Tensor of shape (batch_size, num_points, num_samples, 3) containing
             the sampled points within the spherical shell around each center
         """
-        # directions = torch.randn(
-        #     size=(center.shape[0], center.shape[1], num_points, center.shape[2]),
-        #     device=center.device,
-        # )
-        # directions = directions / torch.norm(directions, dim=-1, keepdim=True)
 
         unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1)
 
@@ -1587,9 +1453,7 @@ def forward(self, data_dict, return_volume_neighbors=False):
                 encoding_node_vol = pos_volume_center_of_mass
 
             # Calculate positional encoding on volume nodes
-            encoding_node_vol = self.position_encoder(
-                encoding_node_vol, eval_mode="volume"
-            )
+            encoding_node_vol = self.fc_p_vol(encoding_node_vol)
 
         if self.output_features_surf is not None:
             # Represent geometry on bounding box
@@ -1605,9 +1469,7 @@ def forward(self, data_dict, return_volume_neighbors=False):
             encoding_node_surf = pos_surface_center_of_mass
 
             # Calculate positional encoding on surface centers
-            encoding_node_surf = self.position_encoder(
-                encoding_node_surf, eval_mode="surface"
-            )
+            encoding_node_surf = self.fc_p_surf(encoding_node_surf)
 
         if (
             self.output_features_surf is not None
@@ -1622,8 +1484,10 @@ def forward(self, data_dict, return_volume_neighbors=False):
             # Calculate local geometry encoding for volume
             # Sampled points on volume
             volume_mesh_centers = data_dict["volume_mesh_centers"]
-            encoding_g_vol = self.geo_encoding_local(
-                0.5 * encoding_g_vol, volume_mesh_centers, p_grid, mode="volume"
+            encoding_g_vol = self.volume_local_geo_encodings(
+                0.5 * encoding_g_vol,
+                volume_mesh_centers,
+                p_grid,
             )
 
             # Approximate solution on volume node
@@ -1654,8 +1518,8 @@ def forward(self, data_dict, return_volume_neighbors=False):
             surface_areas = torch.unsqueeze(surface_areas, -1)
             surface_neighbors_areas = torch.unsqueeze(surface_neighbors_areas, -1)
             # Calculate local geometry encoding for surface
-            encoding_g_surf = self.geo_encoding_local(
-                0.5 * encoding_g_surf, surface_mesh_centers, s_grid, mode="surface"
+            encoding_g_surf = self.surface_local_geo_encodings(
+                0.5 * encoding_g_surf, surface_mesh_centers, s_grid
             )
 
             # Approximate solution on surface cell center

From 4038ff3190d05dc0ea9d66d91168a09440b1478e Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 19 Sep 2025 15:55:02 +0000
Subject: [PATCH 35/98] Further refactor DoMINO to put solution calculations in
 separate modules.  Not only does this clean up the main model code, but this
 will enable graph capture of the solution functions which is important on
 hopper.

---
 physicsnemo/models/domino/model.py     | 521 +++----------------------
 physicsnemo/models/domino/solutions.py | 405 +++++++++++++++++++
 2 files changed, 455 insertions(+), 471 deletions(-)
 create mode 100644 physicsnemo/models/domino/solutions.py

diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index f4da77948e..31547c743b 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -22,7 +22,6 @@
 """
 
 import math
-from collections import defaultdict
 from typing import Callable, Literal, Sequence
 
 import torch
@@ -40,6 +39,7 @@
     fourier_encode_vectorized,
 )
 from .mlps import AggregationModel
+from .solutions import SolutionCalculatorSurface, SolutionCalculatorVolume
 
 
 def get_activation(activation: Literal["relu", "gelu"]) -> Callable:
@@ -757,18 +757,6 @@ def __init__(
         else:
             self.num_volume_neighbors = 50
 
-        if hasattr(model_parameters, "return_volume_neighbors"):
-            self.return_volume_neighbors = model_parameters.return_volume_neighbors
-            if (
-                self.return_volume_neighbors
-                and self.solution_calculation_mode == "one-loop"
-            ):
-                print(
-                    "'one-loop' solution_calculation mode not supported when return_volume_neighbors is set to true"
-                )
-                print("Overwriting the solution_calculation mode to 'two-loop'")
-                self.solution_calculation_mode = "two-loop"
-
         if self.use_surface_normals:
             if not self.use_surface_area:
                 input_features_surface = input_features + 3
@@ -940,6 +928,20 @@ def __init__(
                     )
                 )
 
+            self.solution_calculator_surf = SolutionCalculatorSurface(
+                num_variables=self.num_variables_surf,
+                num_sample_points=self.num_sample_points_surface,
+                use_surface_normals=self.use_surface_normals,
+                use_surface_area=self.use_surface_area,
+                noise_intensity=50,
+                encode_parameters=self.encode_parameters,
+                parameter_model=self.parameter_model
+                if self.encode_parameters
+                else None,
+                aggregation_model=self.agg_model_surf,
+                nn_basis=self.nn_basis_surf,
+            )
+
         if self.output_features_vol is not None:
             # Volume
             base_layer_geo_vol = 0
@@ -961,452 +963,24 @@ def __init__(
                         ),
                     )
                 )
-
-    def calculate_solution_with_neighbors(
-        self,
-        surface_mesh_centers,
-        encoding_g,
-        encoding_node,
-        surface_mesh_neighbors,
-        surface_normals,
-        surface_neighbors_normals,
-        surface_areas,
-        surface_neighbors_areas,
-        global_params_values,
-        global_params_reference,
-        num_sample_points=7,
-    ):
-        """Function to approximate solution given the neighborhood information"""
-        num_variables = self.num_variables_surf
-        nn_basis = self.nn_basis_surf
-        agg_model = self.agg_model_surf
-
-        if self.encode_parameters:
-            processed_parameters = []
-            for k in range(global_params_values.shape[1]):
-                param = torch.unsqueeze(global_params_values[:, k, :], 1)
-                ref = torch.unsqueeze(global_params_reference[:, k, :], 1)
-                param = param.expand(
-                    param.shape[0],
-                    surface_mesh_centers.shape[1],
-                    param.shape[2],
-                )
-                param = param / ref
-                processed_parameters.append(param)
-            processed_parameters = torch.cat(processed_parameters, axis=-1)
-            param_encoding = self.parameter_model(processed_parameters)
-
-        if self.use_surface_normals:
-            if not self.use_surface_area:
-                surface_mesh_centers = torch.cat(
-                    (surface_mesh_centers, surface_normals),
-                    dim=-1,
-                )
-                if num_sample_points > 1:
-                    surface_mesh_neighbors = torch.cat(
-                        (
-                            surface_mesh_neighbors,
-                            surface_neighbors_normals,
-                        ),
-                        dim=-1,
-                    )
-
+            if hasattr(model_parameters, "return_volume_neighbors"):
+                return_volume_neighbors = model_parameters.return_volume_neighbors
             else:
-                surface_mesh_centers = torch.cat(
-                    (
-                        surface_mesh_centers,
-                        surface_normals,
-                        torch.log(surface_areas) / 10,
-                    ),
-                    dim=-1,
-                )
-                if num_sample_points > 1:
-                    surface_mesh_neighbors = torch.cat(
-                        (
-                            surface_mesh_neighbors,
-                            surface_neighbors_normals,
-                            torch.log(surface_neighbors_areas) / 10,
-                        ),
-                        dim=-1,
-                    )
-
-        if self.solution_calculation_mode == "one-loop":
-            encoding_list = [
-                encoding_node.unsqueeze(2).expand(-1, -1, num_sample_points, -1),
-                encoding_g.unsqueeze(2).expand(-1, -1, num_sample_points, -1),
-            ]
-
-            for f in range(num_variables):
-                one_loop_centers_expanded = surface_mesh_centers.unsqueeze(2)
-
-                one_loop_noise = one_loop_centers_expanded - (
-                    surface_mesh_neighbors + 1e-6
-                )
-                one_loop_noise = torch.norm(one_loop_noise, dim=-1, keepdim=True)
-
-                # Doing it this way prevents the intermediate one_loop_basis_f from being stored in memory for the rest of the function.
-                agg_output = agg_model[f](
-                    torch.cat(
-                        (
-                            nn_basis[f](
-                                torch.cat(
-                                    (
-                                        one_loop_centers_expanded,
-                                        surface_mesh_neighbors + 1e-6,
-                                    ),
-                                    dim=2,
-                                )
-                            ),
-                            *encoding_list,
-                        ),
-                        dim=-1,
-                    )
-                )
-
-                one_loop_output_center, one_loop_output_neighbor = torch.split(
-                    agg_output, [1, num_sample_points - 1], dim=2
-                )
-                one_loop_output_neighbor = one_loop_output_neighbor * (
-                    1.0 / one_loop_noise
-                )
+                return_volume_neighbors = False
 
-                one_loop_output_center = one_loop_output_center.squeeze(2)
-                one_loop_output_neighbor = one_loop_output_neighbor.sum(2)
-                one_loop_dist_sum = torch.sum(1.0 / one_loop_noise, dim=2)
-
-                # Stop here
-                if num_sample_points > 1:
-                    one_loop_output_res = (
-                        0.5 * one_loop_output_center
-                        + 0.5 * one_loop_output_neighbor / one_loop_dist_sum
-                    )
-                else:
-                    one_loop_output_res = one_loop_output_center
-                if f == 0:
-                    one_loop_output_all = one_loop_output_res
-                else:
-                    one_loop_output_all = torch.cat(
-                        (one_loop_output_all, one_loop_output_res), dim=-1
-                    )
-
-            return one_loop_output_all
-
-        if self.solution_calculation_mode == "two-loop":
-            for f in range(num_variables):
-                for p in range(num_sample_points):
-                    if p == 0:
-                        volume_m_c = surface_mesh_centers
-                    else:
-                        volume_m_c = surface_mesh_neighbors[:, :, p - 1] + 1e-6
-                        noise = surface_mesh_centers - volume_m_c
-                        dist = torch.norm(noise, dim=-1, keepdim=True)
-
-                    basis_f = nn_basis[f](volume_m_c)
-                    output = torch.cat((basis_f, encoding_node, encoding_g), dim=-1)
-                    if self.encode_parameters:
-                        output = torch.cat((output, param_encoding), dim=-1)
-                    if p == 0:
-                        output_center = agg_model[f](output)
-                    else:
-                        if p == 1:
-                            output_neighbor = agg_model[f](output) * (1.0 / dist)
-                            dist_sum = 1.0 / dist
-                        else:
-                            output_neighbor += agg_model[f](output) * (1.0 / dist)
-                            dist_sum += 1.0 / dist
-                if num_sample_points > 1:
-                    output_res = 0.5 * output_center + 0.5 * output_neighbor / dist_sum
-                else:
-                    output_res = output_center
-                if f == 0:
-                    output_all = output_res
-                else:
-                    output_all = torch.cat((output_all, output_res), dim=-1)
-
-            return output_all
-
-    def sample_sphere(self, center, r, num_points):
-        """Uniformly sample points in a 3D sphere around the center.
-
-        This method generates random points within a sphere of radius r centered
-        at each point in the input tensor. The sampling is uniform in volume,
-        meaning points are more likely to be sampled in the outer regions of the sphere.
-
-        Args:
-            center: Tensor of shape (batch_size, num_points, 3) containing center coordinates
-            r: Radius of the sphere for sampling
-            num_points: Number of points to sample per center
-
-        Returns:
-            Tensor of shape (batch_size, num_points, num_samples, 3) containing
-            the sampled points around each center
-        """
-        # Adjust the center points to the final shape:
-        unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1)
-
-        # Generate directions like the centers:
-        directions = torch.randn_like(unsqueezed_center)
-        directions = directions / torch.norm(directions, dim=-1, keepdim=True)
-
-        # Generate radii like the centers:
-        radii = r * torch.pow(torch.rand_like(unsqueezed_center), 1 / 3)
-
-        output = unsqueezed_center + directions * radii
-        return output
-
-    def sample_sphere_shell(self, center, r_inner, r_outer, num_points):
-        """Uniformly sample points in a 3D spherical shell around a center.
-
-        This method generates random points within a spherical shell (annulus)
-        between inner radius r_inner and outer radius r_outer centered at each
-        point in the input tensor. The sampling is uniform in volume within the shell.
-
-        Args:
-            center: Tensor of shape (batch_size, num_points, 3) containing center coordinates
-            r_inner: Inner radius of the spherical shell
-            r_outer: Outer radius of the spherical shell
-            num_points: Number of points to sample per center
-
-        Returns:
-            Tensor of shape (batch_size, num_points, num_samples, 3) containing
-            the sampled points within the spherical shell around each center
-        """
-
-        unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1)
-
-        # Generate directions like the centers:
-        directions = torch.randn_like(unsqueezed_center)
-        directions = directions / torch.norm(directions, dim=-1, keepdim=True)
-
-        radii = (
-            torch.rand_like(unsqueezed_center) * (r_outer**3 - r_inner**3) + r_inner**3
-        )
-        radii = torch.pow(radii, 1 / 3)
-
-        output = unsqueezed_center + directions * radii
-
-        return output
-
-    def calculate_solution(
-        self,
-        volume_mesh_centers,
-        encoding_g,
-        encoding_node,
-        global_params_values,
-        global_params_reference,
-        eval_mode,
-        num_sample_points=20,
-        noise_intensity=50,
-        return_volume_neighbors=False,
-    ):
-        """Function to approximate solution sampling the neighborhood information"""
-        if eval_mode == "volume":
-            num_variables = self.num_variables_vol
-            nn_basis = self.nn_basis_vol
-            agg_model = self.agg_model_vol
-        elif eval_mode == "surface":
-            num_variables = self.num_variables_surf
-            nn_basis = self.nn_basis_surf
-            agg_model = self.agg_model_surf
-
-        if self.encode_parameters:
-            processed_parameters = []
-            for k in range(global_params_values.shape[1]):
-                param = torch.unsqueeze(global_params_values[:, k, :], 1)
-                ref = torch.unsqueeze(global_params_reference[:, k, :], 1)
-                param = param.expand(
-                    param.shape[0],
-                    volume_mesh_centers.shape[1],
-                    param.shape[2],
-                )
-                param = param / ref
-                processed_parameters.append(param)
-            processed_parameters = torch.cat(processed_parameters, axis=-1)
-            param_encoding = self.parameter_model(processed_parameters)
-
-        if self.solution_calculation_mode == "one-loop":
-            # Stretch these out to num_sample_points
-            one_loop_encoding_node = encoding_node.unsqueeze(0).expand(
-                num_sample_points, -1, -1, -1
-            )
-            one_loop_encoding_g = encoding_g.unsqueeze(0).expand(
-                num_sample_points, -1, -1, -1
+            self.solution_calculator_vol = SolutionCalculatorVolume(
+                num_variables=self.num_variables_vol,
+                num_sample_points=self.num_sample_points_volume,
+                noise_intensity=50,
+                return_volume_neighbors=return_volume_neighbors,
+                encode_parameters=self.encode_parameters,
+                parameter_model=self.parameter_model
+                if self.encode_parameters
+                else None,
+                aggregation_model=self.agg_model_vol,
+                nn_basis=self.nn_basis_vol,
             )
 
-            if self.encode_parameters:
-                one_loop_other_terms = (
-                    one_loop_encoding_node,
-                    one_loop_encoding_g,
-                    param_encoding,
-                )
-            else:
-                one_loop_other_terms = (one_loop_encoding_node, one_loop_encoding_g)
-
-            for f in range(num_variables):
-                one_loop_volume_mesh_centers_expanded = volume_mesh_centers.unsqueeze(
-                    0
-                ).expand(num_sample_points, -1, -1, -1)
-                # Bulk_random_noise has shape (num_sample_points, batch_size, num_points, 3)
-                one_loop_bulk_random_noise = torch.rand_like(
-                    one_loop_volume_mesh_centers_expanded
-                )
-
-                one_loop_bulk_random_noise = 2 * (one_loop_bulk_random_noise - 0.5)
-                one_loop_bulk_random_noise = (
-                    one_loop_bulk_random_noise / noise_intensity
-                )
-                one_loop_bulk_dist = torch.norm(
-                    one_loop_bulk_random_noise, dim=-1, keepdim=True
-                )
-
-                _, one_loop_bulk_dist = torch.split(
-                    one_loop_bulk_dist, [1, num_sample_points - 1], dim=0
-                )
-
-                # Set the first sample point to 0.0:
-                one_loop_bulk_random_noise[0] = torch.zeros_like(
-                    one_loop_bulk_random_noise[0]
-                )
-
-                # Add the noise to the expanded volume_mesh_centers:
-                one_loop_volume_m_c = volume_mesh_centers + one_loop_bulk_random_noise
-                # If this looks overly complicated - it is.
-                # But, this makes sure that the memory used to store the output of both nn_basis[f]
-                # as well as the output of torch.cat can be deallocated immediately.
-                # Apply the aggregation model and distance scaling:
-                one_loop_output = agg_model[f](
-                    torch.cat(
-                        (nn_basis[f](one_loop_volume_m_c), *one_loop_other_terms),
-                        dim=-1,
-                    )
-                )
-
-                # select off the first, unperturbed term:
-                one_loop_output_center, one_loop_output_neighbor = torch.split(
-                    one_loop_output, [1, num_sample_points - 1], dim=0
-                )
-
-                # Scale the neighbor terms by the distance:
-                one_loop_output_neighbor = one_loop_output_neighbor / one_loop_bulk_dist
-
-                one_loop_dist_sum = torch.sum(1.0 / one_loop_bulk_dist, dim=0)
-
-                # Adjust shapes:
-                one_loop_output_center = one_loop_output_center.squeeze(1)
-                one_loop_output_neighbor = one_loop_output_neighbor.sum(0)
-
-                # Compare:
-                if num_sample_points > 1:
-                    one_loop_output_res = (
-                        0.5 * one_loop_output_center
-                        + 0.5 * one_loop_output_neighbor / one_loop_dist_sum
-                    )
-                else:
-                    one_loop_output_res = one_loop_output_center
-                if f == 0:
-                    one_loop_output_all = one_loop_output_res
-                else:
-                    one_loop_output_all = torch.cat(
-                        (one_loop_output_all, one_loop_output_res), dim=-1
-                    )
-
-            return one_loop_output_all
-
-        if self.solution_calculation_mode == "two-loop":
-            volume_m_c_perturbed = [volume_mesh_centers.unsqueeze(2)]
-
-            if return_volume_neighbors:
-                num_hop1 = num_sample_points
-                num_hop2 = (
-                    num_sample_points // 2 if num_sample_points != 1 else 1
-                )  # This is per 1 hop node
-                neighbors = defaultdict(list)
-
-                volume_m_c_hop1 = self.sample_sphere(
-                    volume_mesh_centers, 1 / noise_intensity, num_hop1
-                )
-                # 1 hop neighbors
-                for i in range(num_hop1):
-                    idx = len(volume_m_c_perturbed)
-                    volume_m_c_perturbed.append(volume_m_c_hop1[:, :, i : i + 1, :])
-                    neighbors[0].append(idx)
-
-                # 2 hop neighbors
-                for i in range(num_hop1):
-                    parent_idx = (
-                        i + 1
-                    )  # Skipping the first point, which is the original
-                    parent_point = volume_m_c_perturbed[parent_idx]
-
-                    children = self.sample_sphere_shell(
-                        parent_point.squeeze(2),
-                        1 / noise_intensity,
-                        2 / noise_intensity,
-                        num_hop2,
-                    )
-
-                    for c in range(num_hop2):
-                        idx = len(volume_m_c_perturbed)
-                        volume_m_c_perturbed.append(children[:, :, c : c + 1, :])
-                        neighbors[parent_idx].append(idx)
-
-                volume_m_c_perturbed = torch.cat(volume_m_c_perturbed, dim=2)
-                neighbors = dict(neighbors)
-                field_neighbors = {i: [] for i in range(num_variables)}
-            else:
-                volume_m_c_sample = self.sample_sphere(
-                    volume_mesh_centers, 1 / noise_intensity, num_sample_points
-                )
-                for i in range(num_sample_points):
-                    volume_m_c_perturbed.append(volume_m_c_sample[:, :, i : i + 1, :])
-
-                volume_m_c_perturbed = torch.cat(volume_m_c_perturbed, dim=2)
-
-            for f in range(num_variables):
-                for p in range(volume_m_c_perturbed.shape[2]):
-                    volume_m_c = volume_m_c_perturbed[:, :, p, :]
-                    if p != 0:
-                        dist = torch.norm(
-                            volume_m_c - volume_mesh_centers, dim=-1, keepdim=True
-                        )
-                    basis_f = nn_basis[f](volume_m_c)
-                    output = torch.cat((basis_f, encoding_node, encoding_g), dim=-1)
-                    if self.encode_parameters:
-                        output = torch.cat((output, param_encoding), dim=-1)
-                    if p == 0:
-                        output_center = agg_model[f](output)
-                    else:
-                        if p == 1:
-                            output_neighbor = agg_model[f](output) * (1.0 / dist)
-                            dist_sum = 1.0 / dist
-                        else:
-                            output_neighbor += agg_model[f](output) * (1.0 / dist)
-                            dist_sum += 1.0 / dist
-                    if return_volume_neighbors:
-                        field_neighbors[f].append(agg_model[f](output))
-
-                if return_volume_neighbors:
-                    field_neighbors[f] = torch.stack(field_neighbors[f], dim=2)
-
-                if num_sample_points > 1:
-                    output_res = (
-                        0.5 * output_center + 0.5 * output_neighbor / dist_sum
-                    )  # This only applies to the main point, and not the preturbed points
-                else:
-                    output_res = output_center
-                if f == 0:
-                    output_all = output_res
-                else:
-                    output_all = torch.cat((output_all, output_res), axis=-1)
-
-            if return_volume_neighbors:
-                field_neighbors = torch.cat(
-                    [field_neighbors[i] for i in range(num_variables)], dim=3
-                )
-                return output_all, volume_m_c_perturbed, field_neighbors, neighbors
-            else:
-                return output_all
-
     @profile
     def forward(self, data_dict, return_volume_neighbors=False):
         # Loading STL inputs, bounding box grids, precomputed SDF and scaling factors
@@ -1417,9 +991,6 @@ def forward(self, data_dict, return_volume_neighbors=False):
         # Bounding box grid
         s_grid = data_dict["surf_grid"]
         sdf_surf_grid = data_dict["sdf_surf_grid"]
-        # Scaling factors
-        surf_max = data_dict["surface_min_max"][:, 1]
-        surf_min = data_dict["surface_min_max"][:, 0]
 
         # Parameters
         global_params_values = data_dict["global_params_values"]
@@ -1431,11 +1002,16 @@ def forward(self, data_dict, return_volume_neighbors=False):
             p_grid = data_dict["grid"]
             sdf_grid = data_dict["sdf_grid"]
             # Scaling factors
-            vol_max = data_dict["volume_min_max"][:, 1]
-            vol_min = data_dict["volume_min_max"][:, 0]
+            if "volume_min_max" in data_dict.keys():
+                vol_max = data_dict["volume_min_max"][:, 1]
+                vol_min = data_dict["volume_min_max"][:, 0]
 
-            # Normalize based on computational domain
-            geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
+                # Normalize based on computational domain
+                geo_centers_vol = (
+                    2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
+                )
+            else:
+                geo_centers_vol = geo_centers
 
             encoding_g_vol = self.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid)
 
@@ -1457,9 +1033,16 @@ def forward(self, data_dict, return_volume_neighbors=False):
 
         if self.output_features_surf is not None:
             # Represent geometry on bounding box
-            geo_centers_surf = (
-                2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1
-            )
+            # Scaling factors
+            if "surface_min_max" in data_dict.keys():
+                surf_max = data_dict["surface_min_max"][:, 1]
+                surf_min = data_dict["surface_min_max"][:, 0]
+                geo_centers_surf = (
+                    2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1
+                )
+            else:
+                geo_centers_surf = geo_centers
+
             encoding_g_surf = self.geo_rep_surface(
                 geo_centers_surf, s_grid, sdf_surf_grid
             )
@@ -1491,15 +1074,12 @@ def forward(self, data_dict, return_volume_neighbors=False):
             )
 
             # Approximate solution on volume node
-            output_vol = self.calculate_solution(
+            output_vol = self.solution_calculator_vol(
                 volume_mesh_centers,
                 encoding_g_vol,
                 encoding_node_vol,
                 global_params_values,
                 global_params_reference,
-                eval_mode="volume",
-                num_sample_points=self.num_sample_points_volume,
-                return_volume_neighbors=return_volume_neighbors,
             )
 
         else:
@@ -1523,7 +1103,7 @@ def forward(self, data_dict, return_volume_neighbors=False):
             )
 
             # Approximate solution on surface cell center
-            output_surf = self.calculate_solution_with_neighbors(
+            output_surf = self.solution_calculator_surf(
                 surface_mesh_centers,
                 encoding_g_surf,
                 encoding_node_surf,
@@ -1534,7 +1114,6 @@ def forward(self, data_dict, return_volume_neighbors=False):
                 surface_neighbors_areas,
                 global_params_values,
                 global_params_reference,
-                num_sample_points=self.num_sample_points_surface,
             )
         else:
             output_surf = None
diff --git a/physicsnemo/models/domino/solutions.py b/physicsnemo/models/domino/solutions.py
new file mode 100644
index 0000000000..87c2bf0d16
--- /dev/null
+++ b/physicsnemo/models/domino/solutions.py
@@ -0,0 +1,405 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code contains the DoMINO model architecture.
+The DoMINO class contains an architecture to model both surface and
+volume quantities together as well as separately (controlled using
+the config.yaml file)
+"""
+
+from collections import defaultdict
+
+import torch
+import torch.nn as nn
+
+
+def sample_sphere(center, r, num_points):
+    """Uniformly sample points in a 3D sphere around the center.
+
+    This method generates random points within a sphere of radius r centered
+    at each point in the input tensor. The sampling is uniform in volume,
+    meaning points are more likely to be sampled in the outer regions of the sphere.
+
+    Args:
+        center: Tensor of shape (batch_size, num_points, 3) containing center coordinates
+        r: Radius of the sphere for sampling
+        num_points: Number of points to sample per center
+
+    Returns:
+        Tensor of shape (batch_size, num_points, num_samples, 3) containing
+        the sampled points around each center
+    """
+    # Adjust the center points to the final shape:
+    unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1)
+
+    # Generate directions like the centers:
+    directions = torch.randn_like(unsqueezed_center)
+    directions = directions / torch.norm(directions, dim=-1, keepdim=True)
+
+    # Generate radii like the centers:
+    radii = r * torch.pow(torch.rand_like(unsqueezed_center), 1 / 3)
+
+    output = unsqueezed_center + directions * radii
+    return output
+
+
+def sample_sphere_shell(center, r_inner, r_outer, num_points):
+    """Uniformly sample points in a 3D spherical shell around a center.
+
+    This method generates random points within a spherical shell (annulus)
+    between inner radius r_inner and outer radius r_outer centered at each
+    point in the input tensor. The sampling is uniform in volume within the shell.
+
+    Args:
+        center: Tensor of shape (batch_size, num_points, 3) containing center coordinates
+        r_inner: Inner radius of the spherical shell
+        r_outer: Outer radius of the spherical shell
+        num_points: Number of points to sample per center
+
+    Returns:
+        Tensor of shape (batch_size, num_points, num_samples, 3) containing
+        the sampled points within the spherical shell around each center
+    """
+
+    unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1)
+
+    # Generate directions like the centers:
+    directions = torch.randn_like(unsqueezed_center)
+    directions = directions / torch.norm(directions, dim=-1, keepdim=True)
+
+    radii = torch.rand_like(unsqueezed_center) * (r_outer**3 - r_inner**3) + r_inner**3
+    radii = torch.pow(radii, 1 / 3)
+
+    output = unsqueezed_center + directions * radii
+
+    return output
+
+
+class SolutionCalculatorVolume(nn.Module):
+    """
+    Module to calculate the output solution of the DoMINO Model for volume data.
+    """
+
+    def __init__(
+        self,
+        num_variables: int,
+        num_sample_points: int,
+        noise_intensity: float,
+        encode_parameters: bool,
+        return_volume_neighbors: bool,
+        parameter_model: nn.Module | None,
+        aggregation_model: nn.ModuleList,
+        nn_basis: nn.ModuleList,
+    ):
+        super().__init__()
+
+        self.num_variables = num_variables
+        self.num_sample_points = num_sample_points
+        self.noise_intensity = noise_intensity
+        self.encode_parameters = encode_parameters
+        self.return_volume_neighbors = return_volume_neighbors
+        self.parameter_model = parameter_model
+        self.aggregation_model = aggregation_model
+        self.nn_basis = nn_basis
+
+        if self.encode_parameters:
+            if self.parameter_model is None:
+                raise ValueError(
+                    "Parameter model is required when encode_parameters is True"
+                )
+
+    def encode_parameters(
+        self,
+        mesh_centers: torch.Tensor,
+        global_params_values: torch.Tensor,
+        global_params_reference: torch.Tensor,
+    ) -> torch.Tensor:
+        processed_parameters = []
+        for k in range(global_params_values.shape[1]):
+            param = torch.unsqueeze(global_params_values[:, k, :], 1)
+            ref = torch.unsqueeze(global_params_reference[:, k, :], 1)
+            param = param.expand(
+                param.shape[0],
+                mesh_centers.shape[1],
+                param.shape[2],
+            )
+            param = param / ref
+            processed_parameters.append(param)
+        processed_parameters = torch.cat(processed_parameters, axis=-1)
+        param_encoding = self.parameter_model(processed_parameters)
+
+        return param_encoding
+
+    def forward(
+        self,
+        volume_mesh_centers: torch.Tensor,
+        encoding_g: torch.Tensor,
+        encoding_node: torch.Tensor,
+        global_params_values: torch.Tensor,
+        global_params_reference: torch.Tensor,
+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
+        """
+        Forward pass of the SolutionCalculator module.
+        """
+        if self.encode_parameters:
+            param_encoding = self.encode_parameters(
+                volume_mesh_centers, global_params_values, global_params_reference
+            )
+
+        volume_m_c_perturbed = [volume_mesh_centers.unsqueeze(2)]
+
+        if self.return_volume_neighbors:
+            num_hop1 = self.num_sample_points
+            num_hop2 = (
+                self.num_sample_points // 2 if self.num_sample_points != 1 else 1
+            )  # This is per 1 hop node
+            neighbors = defaultdict(list)
+
+            volume_m_c_hop1 = sample_sphere(
+                volume_mesh_centers, 1 / self.noise_intensity, num_hop1
+            )
+            # 1 hop neighbors
+            for i in range(num_hop1):
+                idx = len(volume_m_c_perturbed)
+                volume_m_c_perturbed.append(volume_m_c_hop1[:, :, i : i + 1, :])
+                neighbors[0].append(idx)
+
+            # 2 hop neighbors
+            for i in range(num_hop1):
+                parent_idx = i + 1  # Skipping the first point, which is the original
+                parent_point = volume_m_c_perturbed[parent_idx]
+
+                children = sample_sphere_shell(
+                    parent_point.squeeze(2),
+                    1 / self.noise_intensity,
+                    2 / self.noise_intensity,
+                    num_hop2,
+                )
+
+                for c in range(num_hop2):
+                    idx = len(volume_m_c_perturbed)
+                    volume_m_c_perturbed.append(children[:, :, c : c + 1, :])
+                    neighbors[parent_idx].append(idx)
+
+            volume_m_c_perturbed = torch.cat(volume_m_c_perturbed, dim=2)
+            neighbors = dict(neighbors)
+            field_neighbors = {i: [] for i in range(self.num_variables)}
+        else:
+            volume_m_c_sample = sample_sphere(
+                volume_mesh_centers, 1 / self.noise_intensity, self.num_sample_points
+            )
+            for i in range(self.num_sample_points):
+                volume_m_c_perturbed.append(volume_m_c_sample[:, :, i : i + 1, :])
+
+            volume_m_c_perturbed = torch.cat(volume_m_c_perturbed, dim=2)
+
+        for f in range(self.num_variables):
+            for p in range(volume_m_c_perturbed.shape[2]):
+                volume_m_c = volume_m_c_perturbed[:, :, p, :]
+                if p != 0:
+                    dist = torch.norm(
+                        volume_m_c - volume_mesh_centers, dim=-1, keepdim=True
+                    )
+                basis_f = self.nn_basis[f](volume_m_c)
+                output = torch.cat((basis_f, encoding_node, encoding_g), dim=-1)
+                if self.encode_parameters:
+                    output = torch.cat((output, param_encoding), dim=-1)
+                if p == 0:
+                    output_center = self.aggregation_model[f](output)
+                else:
+                    if p == 1:
+                        output_neighbor = self.aggregation_model[f](output) * (
+                            1.0 / dist
+                        )
+                        dist_sum = 1.0 / dist
+                    else:
+                        output_neighbor += self.aggregation_model[f](output) * (
+                            1.0 / dist
+                        )
+                        dist_sum += 1.0 / dist
+                if self.return_volume_neighbors:
+                    field_neighbors[f].append(self.aggregation_model[f](output))
+
+            if self.return_volume_neighbors:
+                field_neighbors[f] = torch.stack(field_neighbors[f], dim=2)
+
+            if self.num_sample_points > 1:
+                output_res = (
+                    0.5 * output_center + 0.5 * output_neighbor / dist_sum
+                )  # This only applies to the main point, and not the preturbed points
+            else:
+                output_res = output_center
+            if f == 0:
+                output_all = output_res
+            else:
+                output_all = torch.cat((output_all, output_res), axis=-1)
+
+        if self.return_volume_neighbors:
+            field_neighbors = torch.cat(
+                [field_neighbors[i] for i in range(self.num_variables)], dim=3
+            )
+            return output_all, volume_m_c_perturbed, field_neighbors, neighbors
+        else:
+            return output_all
+
+
+class SolutionCalculatorSurface(nn.Module):
+    """
+    Module to calculate the output solution of the DoMINO Model for surface data.
+    """
+
+    def __init__(
+        self,
+        num_variables: int,
+        num_sample_points: int,
+        noise_intensity: float,
+        encode_parameters: bool,
+        use_surface_normals: bool,
+        use_surface_area: bool,
+        parameter_model: nn.Module | None,
+        aggregation_model: nn.ModuleList,
+        nn_basis: nn.ModuleList,
+    ):
+        super().__init__()
+        self.num_variables = num_variables
+        self.num_sample_points = num_sample_points
+        self.noise_intensity = noise_intensity
+        self.encode_parameters = encode_parameters
+        self.use_surface_normals = use_surface_normals
+        self.use_surface_area = use_surface_area
+        self.parameter_model = parameter_model
+        self.aggregation_model = aggregation_model
+        self.nn_basis = nn_basis
+
+        if self.encode_parameters:
+            if self.parameter_model is None:
+                raise ValueError(
+                    "Parameter model is required when encode_parameters is True"
+                )
+
+    def encode_parameters(
+        self,
+        mesh_centers: torch.Tensor,
+        global_params_values: torch.Tensor,
+        global_params_reference: torch.Tensor,
+    ) -> torch.Tensor:
+        processed_parameters = []
+        for k in range(global_params_values.shape[1]):
+            param = torch.unsqueeze(global_params_values[:, k, :], 1)
+            ref = torch.unsqueeze(global_params_reference[:, k, :], 1)
+            param = param.expand(
+                param.shape[0],
+                mesh_centers.shape[1],
+                param.shape[2],
+            )
+            param = param / ref
+            processed_parameters.append(param)
+        processed_parameters = torch.cat(processed_parameters, axis=-1)
+        param_encoding = self.parameter_model(processed_parameters)
+
+        return param_encoding
+
+    def forward(
+        self,
+        surface_mesh_centers: torch.Tensor,
+        encoding_g: torch.Tensor,
+        encoding_node: torch.Tensor,
+        surface_mesh_neighbors: torch.Tensor,
+        surface_normals: torch.Tensor,
+        surface_neighbors_normals: torch.Tensor,
+        surface_areas: torch.Tensor,
+        surface_neighbors_areas: torch.Tensor,
+        global_params_values: torch.Tensor,
+        global_params_reference: torch.Tensor,
+    ) -> torch.Tensor:
+        """Function to approximate solution given the neighborhood information"""
+
+        if self.encode_parameters:
+            param_encoding = self.encode_parameters(
+                surface_mesh_centers, global_params_values, global_params_reference
+            )
+
+        if self.use_surface_normals:
+            if not self.use_surface_area:
+                surface_mesh_centers = torch.cat(
+                    (surface_mesh_centers, surface_normals),
+                    dim=-1,
+                )
+                if self.num_sample_points > 1:
+                    surface_mesh_neighbors = torch.cat(
+                        (
+                            surface_mesh_neighbors,
+                            surface_neighbors_normals,
+                        ),
+                        dim=-1,
+                    )
+
+            else:
+                surface_mesh_centers = torch.cat(
+                    (
+                        surface_mesh_centers,
+                        surface_normals,
+                        torch.log(surface_areas) / 10,
+                    ),
+                    dim=-1,
+                )
+                if self.num_sample_points > 1:
+                    surface_mesh_neighbors = torch.cat(
+                        (
+                            surface_mesh_neighbors,
+                            surface_neighbors_normals,
+                            torch.log(surface_neighbors_areas) / 10,
+                        ),
+                        dim=-1,
+                    )
+
+        for f in range(self.num_variables):
+            for p in range(self.num_sample_points):
+                if p == 0:
+                    volume_m_c = surface_mesh_centers
+                else:
+                    volume_m_c = surface_mesh_neighbors[:, :, p - 1] + 1e-6
+                    noise = surface_mesh_centers - volume_m_c
+                    dist = torch.norm(noise, dim=-1, keepdim=True)
+
+                basis_f = self.nn_basis[f](volume_m_c)
+                output = torch.cat((basis_f, encoding_node, encoding_g), dim=-1)
+                if self.encode_parameters:
+                    output = torch.cat((output, param_encoding), dim=-1)
+                if p == 0:
+                    output_center = self.aggregation_model[f](output)
+                else:
+                    if p == 1:
+                        output_neighbor = self.aggregation_model[f](output) * (
+                            1.0 / dist
+                        )
+                        dist_sum = 1.0 / dist
+                    else:
+                        output_neighbor += self.aggregation_model[f](output) * (
+                            1.0 / dist
+                        )
+                        dist_sum += 1.0 / dist
+            if self.num_sample_points > 1:
+                output_res = 0.5 * output_center + 0.5 * output_neighbor / dist_sum
+            else:
+                output_res = output_center
+            if f == 0:
+                output_all = output_res
+            else:
+                output_all = torch.cat((output_all, output_res), dim=-1)
+
+        return output_all

From 5539c544e40140ede71e0d33290081feeb7aae1d Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 22 Sep 2025 13:49:33 +0000
Subject: [PATCH 36/98] Refactor domino model and add significant test suite
 expansion.

Thie refactor consolidates the MLP implementations into one interface,
and also makes the volume_minmax and surface_minmax rescaling
contingent on the presence of those values in the input dict.
---
 physicsnemo/models/domino/encodings.py        |   3 +-
 physicsnemo/models/domino/model.py            |   2 +-
 physicsnemo/models/domino/solutions.py        |  56 ++--
 test/models/domino/__init__.py                |  15 ++
 test/models/{ => domino}/test_domino.py       |   7 +-
 test/models/domino/test_domino_encodings.py   | 148 +++++++++++
 .../models/domino/test_domino_geometry_rep.py | 128 +++++++++
 test/models/domino/test_domino_mlps.py        |  86 ++++++
 test/models/domino/test_domino_solutions.py   | 244 ++++++++++++++++++
 test/models/domino/utils.py                   | 154 +++++++++++
 10 files changed, 802 insertions(+), 41 deletions(-)
 create mode 100644 test/models/domino/__init__.py
 rename test/models/{ => domino}/test_domino.py (97%)
 create mode 100644 test/models/domino/test_domino_encodings.py
 create mode 100644 test/models/domino/test_domino_geometry_rep.py
 create mode 100644 test/models/domino/test_domino_mlps.py
 create mode 100644 test/models/domino/test_domino_solutions.py
 create mode 100644 test/models/domino/utils.py

diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py
index ca2ef34f0f..068e4b3a1f 100644
--- a/physicsnemo/models/domino/encodings.py
+++ b/physicsnemo/models/domino/encodings.py
@@ -88,7 +88,7 @@ def __init__(
         if self.fourier_features:
             input_features_calculated = input_features + input_features * num_modes * 2
             self.register_buffer(
-                "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes))
+                "freqs", torch.exp(torch.linspace(0, math.pi, num_modes))
             )
         else:
             input_features_calculated = input_features
@@ -160,6 +160,7 @@ def forward(
         mapping, outputs = self.bq_warp(
             volume_mesh_centers, p_grid, reverse_mapping=False
         )
+
         mapping = mapping.type(torch.int64)
         mask = mapping != 0
 
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 31547c743b..4ad6ae2856 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -127,7 +127,7 @@ def forward(
 
         Args:
             x: Input tensor containing coordinates of the neighboring points
-               (batch_size, nx*ny*nz, 3, n_points)
+               (batch_size, nx*ny*nz, n_points, 3)
             grid: Input tensor represented as a grid of shape
                 (batch_size, nx, ny, nz, 3)
 
diff --git a/physicsnemo/models/domino/solutions.py b/physicsnemo/models/domino/solutions.py
index 87c2bf0d16..c3968e8dcf 100644
--- a/physicsnemo/models/domino/solutions.py
+++ b/physicsnemo/models/domino/solutions.py
@@ -122,7 +122,7 @@ def __init__(
                     "Parameter model is required when encode_parameters is True"
                 )
 
-    def encode_parameters(
+    def apply_parameter_encoding(
         self,
         mesh_centers: torch.Tensor,
         global_params_values: torch.Tensor,
@@ -156,7 +156,7 @@ def forward(
         Forward pass of the SolutionCalculator module.
         """
         if self.encode_parameters:
-            param_encoding = self.encode_parameters(
+            param_encoding = self.apply_parameter_encoding(
                 volume_mesh_centers, global_params_values, global_params_reference
             )
 
@@ -291,7 +291,7 @@ def __init__(
                     "Parameter model is required when encode_parameters is True"
                 )
 
-    def encode_parameters(
+    def apply_parameter_encoding(
         self,
         mesh_centers: torch.Tensor,
         global_params_values: torch.Tensor,
@@ -329,43 +329,29 @@ def forward(
         """Function to approximate solution given the neighborhood information"""
 
         if self.encode_parameters:
-            param_encoding = self.encode_parameters(
+            param_encoding = self.apply_parameter_encoding(
                 surface_mesh_centers, global_params_values, global_params_reference
             )
 
+        centers_inputs = [
+            surface_mesh_centers,
+        ]
+        neighbors_inputs = [
+            surface_mesh_neighbors,
+        ]
+
         if self.use_surface_normals:
-            if not self.use_surface_area:
-                surface_mesh_centers = torch.cat(
-                    (surface_mesh_centers, surface_normals),
-                    dim=-1,
-                )
-                if self.num_sample_points > 1:
-                    surface_mesh_neighbors = torch.cat(
-                        (
-                            surface_mesh_neighbors,
-                            surface_neighbors_normals,
-                        ),
-                        dim=-1,
-                    )
+            centers_inputs.append(surface_normals)
+            if self.num_sample_points > 1:
+                neighbors_inputs.append(surface_neighbors_normals)
 
-            else:
-                surface_mesh_centers = torch.cat(
-                    (
-                        surface_mesh_centers,
-                        surface_normals,
-                        torch.log(surface_areas) / 10,
-                    ),
-                    dim=-1,
-                )
-                if self.num_sample_points > 1:
-                    surface_mesh_neighbors = torch.cat(
-                        (
-                            surface_mesh_neighbors,
-                            surface_neighbors_normals,
-                            torch.log(surface_neighbors_areas) / 10,
-                        ),
-                        dim=-1,
-                    )
+        if self.use_surface_area:
+            centers_inputs.append(torch.log(surface_areas) / 10)
+            if self.num_sample_points > 1:
+                neighbors_inputs.append(torch.log(surface_neighbors_areas) / 10)
+
+        surface_mesh_centers = torch.cat(centers_inputs, dim=-1)
+        surface_mesh_neighbors = torch.cat(neighbors_inputs, dim=-1)
 
         for f in range(self.num_variables):
             for p in range(self.num_sample_points):
diff --git a/test/models/domino/__init__.py b/test/models/domino/__init__.py
new file mode 100644
index 0000000000..b2f171d4ac
--- /dev/null
+++ b/test/models/domino/__init__.py
@@ -0,0 +1,15 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/test/models/test_domino.py b/test/models/domino/test_domino.py
similarity index 97%
rename from test/models/test_domino.py
rename to test/models/domino/test_domino.py
index 87110491d0..e606b9ecf8 100644
--- a/test/models/test_domino.py
+++ b/test/models/domino/test_domino.py
@@ -22,9 +22,8 @@
 import torch
 from pytest_utils import import_or_fail
 
-# from . import common
-from .common.fwdaccuracy import save_output
-from .common.utils import compare_output
+from ..common.fwdaccuracy import save_output
+from ..common.utils import compare_output
 
 
 def validate_domino(
@@ -44,7 +43,7 @@ def validate_domino(
     if file_name is None:
         file_name = model.meta.name + "_output.pth"
     file_name = (
-        Path(__file__).parents[0].resolve() / Path("data") / Path(file_name.lower())
+        Path(__file__).parents[1].resolve() / Path("data") / Path(file_name.lower())
     )
     # If file does not exist, we will create it then error
     # Model should then reproduce it on next pytest run
diff --git a/test/models/domino/test_domino_encodings.py b/test/models/domino/test_domino_encodings.py
new file mode 100644
index 0000000000..a27e2dd0a9
--- /dev/null
+++ b/test/models/domino/test_domino_encodings.py
@@ -0,0 +1,148 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import pytest
+import torch
+
+from .utils import validate_output_shape_and_values
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("fourier_features", [True, False])
+@pytest.mark.parametrize("num_modes", [3, 5, 10])
+def test_encoding_mlp(device, fourier_features, num_modes):
+    """Test EncodingMLP with various configurations"""
+    from physicsnemo.models.domino.encodings import EncodingMLP
+    from physicsnemo.models.domino.model import get_activation
+
+    torch.manual_seed(0)
+
+    model = EncodingMLP(
+        input_features=3,
+        base_layer=64,
+        fourier_features=fourier_features,
+        num_modes=num_modes,
+        activation=get_activation("relu"),
+    ).to(device)
+
+    x = torch.randn(2, 100, 3).to(device)
+    output = model(x)
+
+    validate_output_shape_and_values(output, (2, 100, 64))
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_fourier_encode_vectorized(device):
+    """Test fourier encoding function"""
+    from physicsnemo.models.domino.encodings import fourier_encode_vectorized
+
+    torch.manual_seed(0)
+
+    coords = torch.randn(4, 20, 3).to(device)
+    freqs = torch.exp(torch.linspace(0, math.pi, 5)).to(device)
+
+    output = fourier_encode_vectorized(coords, freqs)
+
+    # Output should be [batch, points, D * 2 * F] = [4, 20, 3 * 2 * 5] = [4, 20, 30]
+    validate_output_shape_and_values(output, (4, 20, 30))
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_local_geometry_encoding(device):
+    """Test LocalGeometryEncoding"""
+    from physicsnemo.models.domino.encodings import LocalGeometryEncoding
+    from physicsnemo.models.domino.model import get_activation
+
+    BATCH_SIZE = 1
+
+    torch.manual_seed(0)
+
+    N_ENCODING_CHANNELS = 3
+    N_NEIGHBORS = 32
+    N_MESH_POINTS = 50
+    GRID_RESOLUTION = (32, 32, 32)
+
+    model = LocalGeometryEncoding(
+        radius=0.1,
+        neighbors_in_radius=N_NEIGHBORS,
+        total_neighbors_in_radius=N_ENCODING_CHANNELS * N_NEIGHBORS,
+        base_layer=128,
+        activation=get_activation("relu"),
+        grid_resolution=GRID_RESOLUTION,
+    ).to(device)
+
+    encoding_g = torch.randn(BATCH_SIZE, N_ENCODING_CHANNELS, *GRID_RESOLUTION).to(
+        device
+    )
+    volume_mesh_centers = torch.randn(BATCH_SIZE, N_MESH_POINTS, 3).to(device)
+    p_grid = torch.randn(BATCH_SIZE, *GRID_RESOLUTION, 3).to(device)
+
+    output = model(encoding_g, volume_mesh_centers, p_grid)
+
+    validate_output_shape_and_values(output, (BATCH_SIZE, N_MESH_POINTS, 32))
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("geo_encoding_type", ["both", "stl", "sdf"])
+def test_multi_geometry_encoding(device, geo_encoding_type):
+    """Test MultiGeometryEncoding with different encoding types"""
+    from physicsnemo.models.domino.encodings import MultiGeometryEncoding
+    from physicsnemo.models.domino.model import get_activation
+
+    torch.manual_seed(0)
+
+    BATCH_SIZE = 1
+    N_MESH_POINTS = 50
+    GRID_RESOLUTION = (32, 32, 32)
+
+    radii = [0.05, 0.1]
+    neighbors_in_radius = [16, 32]
+
+    model = MultiGeometryEncoding(
+        radii=radii,
+        neighbors_in_radius=neighbors_in_radius,
+        geo_encoding_type=geo_encoding_type,
+        base_layer=64,
+        activation=get_activation("relu"),
+        grid_resolution=GRID_RESOLUTION,
+    ).to(device)
+
+    if geo_encoding_type == "both":
+        num_channels = len(radii) + 1
+    elif geo_encoding_type == "stl":
+        num_channels = len(radii)
+    else:  # sdf
+        num_channels = 1
+
+    encoding_g = torch.randn(BATCH_SIZE, num_channels, *GRID_RESOLUTION).to(device)
+    volume_mesh_centers = torch.randn(BATCH_SIZE, N_MESH_POINTS, 3).to(device)
+    p_grid = torch.randn(BATCH_SIZE, *GRID_RESOLUTION, 3).to(device)
+
+    print(f"encoding_g.shape: {encoding_g.shape}")
+    print(f"volume_mesh_centers.shape: {volume_mesh_centers.shape}")
+    print(f"p_grid.shape: {p_grid.shape}")
+
+    output = model(encoding_g, volume_mesh_centers, p_grid)
+
+    print(f"output.shape: {output.shape}")
+
+    expected_output_dim = sum(neighbors_in_radius)
+
+    validate_output_shape_and_values(
+        output, (BATCH_SIZE, N_MESH_POINTS, expected_output_dim)
+    )
diff --git a/test/models/domino/test_domino_geometry_rep.py b/test/models/domino/test_domino_geometry_rep.py
new file mode 100644
index 0000000000..12ede24be5
--- /dev/null
+++ b/test/models/domino/test_domino_geometry_rep.py
@@ -0,0 +1,128 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+
+import pytest
+import torch
+
+from .utils import validate_output_shape_and_values
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("act", ["relu", "gelu"])
+def test_geo_conv_out(device, act):
+    """Test GeoConvOut layer"""
+    from physicsnemo.models.domino.model import GeoConvOut
+
+    torch.manual_seed(0)
+
+    @dataclass
+    class TestParams:
+        base_neurons: int = 32
+        base_neurons_in: int = 8
+        fourier_features: bool = False
+        num_modes: int = 5
+        activation: str = act
+
+    params = TestParams()
+    grid_resolution = [32, 32, 32]
+
+    layer = GeoConvOut(
+        input_features=3, model_parameters=params, grid_resolution=grid_resolution
+    ).to(device)
+
+    x = torch.randn(1, 32 * 32 * 32, 10, 3).to(device)
+    grid = torch.randn(1, *grid_resolution, 3).to(device)
+
+    output = layer(x, grid)
+
+    validate_output_shape_and_values(
+        output, (1, params.base_neurons_in, *grid_resolution)
+    )
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("act", ["relu", "gelu"])
+def test_geo_processor(device, act):
+    """Test GeoProcessor CNN"""
+    from physicsnemo.models.domino.model import GeoProcessor
+
+    torch.manual_seed(0)
+
+    @dataclass
+    class TestParams:
+        base_filters: int = 8
+        activation: str = act
+
+    params = TestParams()
+
+    processor = GeoProcessor(
+        input_filters=4, output_filters=2, model_parameters=params
+    ).to(device)
+
+    x = torch.randn(2, 4, 16, 16, 16).to(device)
+    output = processor(x)
+
+    validate_output_shape_and_values(output, (2, 2, 16, 16, 16))
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("geometry_encoding_type", ["both", "stl", "sdf"])
+@pytest.mark.parametrize("processor_type", ["unet", "conv"])
+def test_geometry_rep(
+    device, geometry_encoding_type, processor_type, base_model_params
+):
+    """Test GeometryRep module with different configurations"""
+    from physicsnemo.models.domino.model import GeometryRep
+
+    torch.manual_seed(0)
+
+    # Modify params for this test
+    params = base_model_params()
+    params.geometry_encoding_type = geometry_encoding_type
+    params.geometry_rep.geo_processor.processor_type = processor_type
+    params.geometry_rep.geo_processor.self_attention = False
+    params.geometry_rep.geo_processor.cross_attention = False
+    params.interp_res = (16, 16, 16)  # Smaller for faster testing
+
+    radii = [0.1, 0.2]
+    neighbors_in_radius = [8, 16]
+
+    geo_rep = GeometryRep(
+        input_features=3,
+        radii=radii,
+        neighbors_in_radius=neighbors_in_radius,
+        hops=1,
+        model_parameters=params,
+    ).to(device)
+
+    # Test inputs
+    x = torch.randn(1, 20, 3).to(device)
+    p_grid = torch.randn(1, 16, 16, 16, 3).to(device)
+    sdf = torch.randn(1, 16, 16, 16).to(device)
+
+    output = geo_rep(x, p_grid, sdf)
+
+    # Determine expected output channels
+    if geometry_encoding_type == "both":
+        expected_channels = len(radii) + 1  # STL channels + SDF channel
+    elif geometry_encoding_type == "stl":
+        expected_channels = len(radii)
+    else:  # sdf
+        expected_channels = 1
+
+    validate_output_shape_and_values(output, (1, expected_channels, 16, 16, 16))
diff --git a/test/models/domino/test_domino_mlps.py b/test/models/domino/test_domino_mlps.py
new file mode 100644
index 0000000000..86d2d9a208
--- /dev/null
+++ b/test/models/domino/test_domino_mlps.py
@@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+
+from .utils import validate_output_shape_and_values
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("activation", ["relu", "gelu"])
+@pytest.mark.parametrize("n_layers", [1, 2, 3, 5])
+def test_mlp(device, activation, n_layers):
+    """Test basic MLP functionality"""
+    from physicsnemo.models.domino.mlps import MLP
+    from physicsnemo.models.domino.model import get_activation
+
+    torch.manual_seed(0)
+
+    mlp = MLP(
+        input_features=10,
+        output_features=5,
+        base_layer=32,
+        activation=get_activation(activation),
+        n_layers=n_layers,
+    ).to(device)
+
+    x = torch.randn(4, 50, 10).to(device)
+    output = mlp(x)
+
+    validate_output_shape_and_values(output, (4, 50, 5))
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_aggregation_model(device):
+    """Test AggregationModel"""
+    from physicsnemo.models.domino.mlps import AggregationModel
+    from physicsnemo.models.domino.model import get_activation
+
+    torch.manual_seed(0)
+
+    model = AggregationModel(
+        input_features=100,
+        output_features=1,
+        base_layer=64,
+        activation=get_activation("relu"),
+    ).to(device)
+
+    x = torch.randn(2, 30, 100).to(device)
+    output = model(x)
+
+    validate_output_shape_and_values(output, (2, 30, 1))
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_local_point_conv(device):
+    """Test LocalPointConv"""
+    from physicsnemo.models.domino.mlps import LocalPointConv
+    from physicsnemo.models.domino.model import get_activation
+
+    torch.manual_seed(0)
+
+    model = LocalPointConv(
+        input_features=50,
+        base_layer=128,
+        output_features=32,
+        activation=get_activation("relu"),
+    ).to(device)
+
+    x = torch.randn(2, 100, 50).to(device)
+    output = model(x)
+
+    validate_output_shape_and_values(output, (2, 100, 32))
diff --git a/test/models/domino/test_domino_solutions.py b/test/models/domino/test_domino_solutions.py
new file mode 100644
index 0000000000..be4797eafc
--- /dev/null
+++ b/test/models/domino/test_domino_solutions.py
@@ -0,0 +1,244 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+import torch.nn as nn
+
+from .utils import validate_output_shape_and_values
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("num_variables", [1, 3, 5])
+@pytest.mark.parametrize("num_sample_points", [1, 3, 7])
+@pytest.mark.parametrize("encode_parameters", [True, False])
+def test_solution_calculator_volume(
+    device, num_variables, num_sample_points, encode_parameters
+):
+    """Test SolutionCalculatorVolume with various configurations"""
+    from physicsnemo.models.domino.encodings import EncodingMLP
+    from physicsnemo.models.domino.mlps import AggregationModel
+    from physicsnemo.models.domino.model import get_activation
+    from physicsnemo.models.domino.solutions import SolutionCalculatorVolume
+
+    torch.manual_seed(0)
+
+    activation = get_activation("relu")
+
+    # Create parameter model if needed
+    parameter_model = (
+        EncodingMLP(
+            input_features=2,
+            base_layer=32,
+            fourier_features=True,
+            num_modes=3,
+            activation=activation,
+        ).to(device)
+        if encode_parameters
+        else None
+    )
+
+    # Create aggregation models
+    aggregation_model = nn.ModuleList(
+        [
+            AggregationModel(
+                input_features=64 + 32 + 32 + (32 if encode_parameters else 0),
+                output_features=1,
+                base_layer=64,
+                activation=activation,
+            ).to(device)
+            for _ in range(num_variables)
+        ]
+    )
+
+    # Create basis functions
+    nn_basis = nn.ModuleList(
+        [
+            EncodingMLP(
+                input_features=3,
+                base_layer=32,
+                fourier_features=False,
+                num_modes=5,
+                activation=activation,
+            ).to(device)
+            for _ in range(num_variables)
+        ]
+    )
+
+    model = SolutionCalculatorVolume(
+        num_variables=num_variables,
+        num_sample_points=num_sample_points,
+        noise_intensity=50.0,
+        encode_parameters=encode_parameters,
+        return_volume_neighbors=False,
+        parameter_model=parameter_model,
+        aggregation_model=aggregation_model,
+        nn_basis=nn_basis,
+    ).to(device)
+
+    # Test data
+    volume_mesh_centers = torch.randn(2, 30, 3).to(device)
+    encoding_g = torch.randn(2, 30, 32).to(device)
+    encoding_node = torch.randn(2, 30, 64).to(device)
+    global_params_values = torch.randn(2, 2, 1).to(device)
+    global_params_reference = torch.randn(2, 2, 1).to(device)
+
+    output = model(
+        volume_mesh_centers,
+        encoding_g,
+        encoding_node,
+        global_params_values,
+        global_params_reference,
+    )
+
+    validate_output_shape_and_values(output, (2, 30, num_variables))
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("num_variables", [1, 3, 5])
+@pytest.mark.parametrize("use_surface_normals", [True, False])
+@pytest.mark.parametrize("use_surface_area", [True, False])
+def test_solution_calculator_surface(
+    device, num_variables, use_surface_normals, use_surface_area
+):
+    """Test SolutionCalculatorSurface with various configurations"""
+    from physicsnemo.models.domino.encodings import EncodingMLP
+    from physicsnemo.models.domino.mlps import AggregationModel
+    from physicsnemo.models.domino.model import get_activation
+    from physicsnemo.models.domino.solutions import SolutionCalculatorSurface
+
+    torch.manual_seed(0)
+
+    activation = get_activation("relu")
+
+    # Determine input features based on surface configuration
+    input_features = 3
+    if use_surface_normals:
+        input_features += 3
+    if use_surface_area:
+        input_features += 1
+
+    print(f"Input features: {input_features}")
+
+    # Create aggregation models
+    aggregation_model = nn.ModuleList(
+        [
+            AggregationModel(
+                input_features=64 + 32 + 32,
+                output_features=1,
+                base_layer=64,
+                activation=activation,
+            ).to(device)
+            for _ in range(num_variables)
+        ]
+    )
+
+    # Create basis functions
+    nn_basis = nn.ModuleList(
+        [
+            EncodingMLP(
+                input_features=input_features,
+                base_layer=32,
+                fourier_features=False,
+                num_modes=5,
+                activation=activation,
+            ).to(device)
+            for _ in range(num_variables)
+        ]
+    )
+
+    model = SolutionCalculatorSurface(
+        num_variables=num_variables,
+        num_sample_points=3,
+        noise_intensity=50.0,
+        encode_parameters=False,
+        use_surface_normals=use_surface_normals,
+        use_surface_area=use_surface_area,
+        parameter_model=None,
+        aggregation_model=aggregation_model,
+        nn_basis=nn_basis,
+    ).to(device)
+
+    # Test data
+    surface_mesh_centers = torch.randn(2, 30, 3).to(device)
+    encoding_g = torch.randn(2, 30, 32).to(device)
+    encoding_node = torch.randn(2, 30, 64).to(device)
+    surface_mesh_neighbors = torch.randn(2, 30, 2, 3).to(device)
+    surface_normals = torch.randn(2, 30, 3).to(device)
+    surface_neighbors_normals = torch.randn(2, 30, 2, 3).to(device)
+    surface_areas = torch.rand(2, 30, 1).to(device) + 1e-6
+    surface_neighbors_areas = torch.rand(2, 30, 2, 1).to(device) + 1e-6
+    global_params_values = torch.randn(2, 2, 1).to(device)
+    global_params_reference = torch.randn(2, 2, 1).to(device)
+
+    output = model(
+        surface_mesh_centers,
+        encoding_g,
+        encoding_node,
+        surface_mesh_neighbors,
+        surface_normals,
+        surface_neighbors_normals,
+        surface_areas,
+        surface_neighbors_areas,
+        global_params_values,
+        global_params_reference,
+    )
+
+    validate_output_shape_and_values(output, (2, 30, num_variables))
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("r", [0.5, 1.0, 2.0])
+@pytest.mark.parametrize("num_points", [10, 50, 100])
+def test_sample_sphere(device, r, num_points):
+    """Test sphere sampling function"""
+    from physicsnemo.models.domino.solutions import sample_sphere
+
+    torch.manual_seed(0)
+
+    center = torch.randn(2, 30, 3).to(device)
+    output = sample_sphere(center, r, num_points)
+
+    validate_output_shape_and_values(output, (2, 30, num_points, 3))
+
+    # Check that points are within the sphere radius
+    distances = torch.norm(output - center.unsqueeze(2), dim=-1)
+    assert (distances <= r + 1e-6).all(), "Some sampled points are outside the sphere"
+
+
+@pytest.mark.parametrize("device", ["cuda:0"])
+def test_sample_sphere_shell(device):
+    """Test spherical shell sampling function"""
+    from physicsnemo.models.domino.solutions import sample_sphere_shell
+
+    torch.manual_seed(0)
+
+    center = torch.randn(2, 30, 3).to(device)
+    r_inner, r_outer = 0.5, 1.5
+    num_points = 50
+
+    output = sample_sphere_shell(center, r_inner, r_outer, num_points)
+
+    validate_output_shape_and_values(output, (2, 30, num_points, 3))
+
+    # Check that points are within the shell
+    distances = torch.norm(output - center.unsqueeze(2), dim=-1)
+    assert (distances >= r_inner - 1e-6).all(), (
+        "Some sampled points are inside inner radius"
+    )
+    assert (distances <= r_outer + 1e-6).all(), (
+        "Some sampled points are outside outer radius"
+    )
diff --git a/test/models/domino/utils.py b/test/models/domino/utils.py
new file mode 100644
index 0000000000..8c5fb971f8
--- /dev/null
+++ b/test/models/domino/utils.py
@@ -0,0 +1,154 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Sequence
+
+import pytest
+import torch
+
+
+def generate_test_data(bsize, nx, ny, nz, num_neigh, device):
+    """Generate test data for DoMINO"""
+    return {
+        "pos_volume_closest": torch.randn(bsize, 50, 3).to(device),
+        "pos_volume_center_of_mass": torch.randn(bsize, 50, 3).to(device),
+        "pos_surface_center_of_mass": torch.randn(bsize, 50, 3).to(device),
+        "geometry_coordinates": torch.randn(bsize, 50, 3).to(device),
+        "grid": torch.randn(bsize, nx, ny, nz, 3).to(device),
+        "surf_grid": torch.randn(bsize, nx, ny, nz, 3).to(device),
+        "sdf_grid": torch.randn(bsize, nx, ny, nz).to(device),
+        "sdf_surf_grid": torch.randn(bsize, nx, ny, nz).to(device),
+        "sdf_nodes": torch.randn(bsize, 50, 1).to(device),
+        "surface_mesh_centers": torch.randn(bsize, 50, 3).to(device),
+        "surface_mesh_neighbors": torch.randn(bsize, 50, num_neigh, 3).to(device),
+        "surface_normals": torch.randn(bsize, 50, 3).to(device),
+        "surface_neighbors_normals": torch.randn(bsize, 50, num_neigh, 3).to(device),
+        "surface_areas": torch.rand(bsize, 50).to(device) + 1e-6,
+        "surface_neighbors_areas": torch.rand(bsize, 50, num_neigh).to(device) + 1e-6,
+        "volume_mesh_centers": torch.randn(bsize, 50, 3).to(device),
+        "volume_min_max": torch.randn(bsize, 2, 3).to(device),
+        "surface_min_max": torch.randn(bsize, 2, 3).to(device),
+        "global_params_values": torch.randn(bsize, 2, 1).to(device),
+        "global_params_reference": torch.randn(bsize, 2, 1).to(device),
+    }
+
+
+@pytest.fixture
+def base_model_params():
+    """Base model parameters for testing"""
+
+    @dataclass
+    class model_params:
+        @dataclass
+        class geometry_rep:
+            @dataclass
+            class geo_conv:
+                base_neurons: int = 32
+                base_neurons_in: int = 8
+                base_neurons_out: int = 8
+                surface_hops: int = 1
+                volume_hops: int = 1
+                volume_radii: Sequence = (0.1, 0.5)
+                volume_neighbors_in_radius: Sequence = (10, 10)
+                surface_radii: Sequence = (0.05,)
+                surface_neighbors_in_radius: Sequence = (10,)
+                activation: str = "relu"
+                fourier_features: bool = False
+                num_modes: int = 5
+
+            @dataclass
+            class geo_processor:
+                base_filters: int = 8
+                activation: str = "relu"
+                processor_type: str = "unet"
+                self_attention: bool = True
+                cross_attention: bool = False
+
+            base_filters: int = 8
+            geo_conv = geo_conv
+            geo_processor = geo_processor
+
+        @dataclass
+        class geometry_local:
+            base_layer: int = 512
+            volume_neighbors_in_radius: Sequence = (128, 128)
+            surface_neighbors_in_radius: Sequence = (128,)
+            volume_radii: Sequence = (0.05, 0.1)
+            surface_radii: Sequence = (0.05,)
+
+        @dataclass
+        class nn_basis_functions:
+            base_layer: int = 512
+            fourier_features: bool = False
+            num_modes: int = 5
+            activation: str = "relu"
+
+        @dataclass
+        class local_point_conv:
+            activation: str = "relu"
+
+        @dataclass
+        class aggregation_model:
+            base_layer: int = 512
+            activation: str = "relu"
+
+        @dataclass
+        class position_encoder:
+            base_neurons: int = 512
+            activation: str = "relu"
+            fourier_features: bool = False
+            num_modes: int = 5
+
+        @dataclass
+        class parameter_model:
+            base_layer: int = 512
+            fourier_features: bool = True
+            num_modes: int = 5
+            activation: str = "relu"
+
+        model_type: str = "combined"
+        activation: str = "relu"
+        interp_res: Sequence = (64, 64, 64)  # Smaller for testing
+        use_sdf_in_basis_func: bool = True
+        positional_encoding: bool = False
+        surface_neighbors: bool = True
+        num_neighbors_surface: int = 7
+        num_neighbors_volume: int = 7
+        use_surface_normals: bool = True
+        use_surface_area: bool = True
+        encode_parameters: bool = False
+        combine_volume_surface: bool = False
+        geometry_encoding_type: str = "both"
+        solution_calculation_mode: str = "two-loop"
+        geometry_rep = geometry_rep
+        nn_basis_functions = nn_basis_functions
+        aggregation_model = aggregation_model
+        position_encoder = position_encoder
+        geometry_local = geometry_local
+
+    return model_params
+
+
+def validate_output_shape_and_values(output, expected_shape, check_finite=True):
+    """Validate output tensor shape and values"""
+    if output is not None:
+        assert output.shape == expected_shape, (
+            f"Expected shape {expected_shape}, got {output.shape}"
+        )
+        if check_finite:
+            assert torch.isfinite(output).all(), "Output contains non-finite values"
+        assert not torch.isnan(output).any(), "Output contains NaN values"

From 260c2405e7727ff7aaeccd5a68a572b8a3798a3b Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 22 Sep 2025 14:18:45 +0000
Subject: [PATCH 37/98] Move geometry rep codes to a separate file for model
 simplicity too.

---
 physicsnemo/models/domino/geometry_rep.py     | 505 ++++++++++++++++++
 physicsnemo/models/domino/model.py            | 488 +----------------
 test/models/domino/conftest.py                | 116 ++++
 .../models/domino/test_domino_geometry_rep.py |   6 +-
 test/models/domino/utils.py                   | 100 ----
 5 files changed, 625 insertions(+), 590 deletions(-)
 create mode 100644 physicsnemo/models/domino/geometry_rep.py
 create mode 100644 test/models/domino/conftest.py

diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py
new file mode 100644
index 0000000000..ea77ef5f15
--- /dev/null
+++ b/physicsnemo/models/domino/geometry_rep.py
@@ -0,0 +1,505 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+from typing import Callable, Literal, Sequence
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+
+from physicsnemo.models.unet import UNet
+
+from .ball_query import BQWarp
+from .encodings import fourier_encode_vectorized
+
+
+def get_activation(activation: Literal["relu", "gelu"]) -> Callable:
+    """
+    Return a PyTorch activation function corresponding to the given name.
+    """
+    if activation == "relu":
+        return nn.ReLU()
+    elif activation == "gelu":
+        return nn.GELU()
+    else:
+        raise ValueError(f"Activation function {activation} not found")
+
+
+def scale_sdf(sdf: torch.Tensor) -> torch.Tensor:
+    """
+    Scale a signed distance function (SDF) to emphasize surface regions.
+
+    This function applies a non-linear scaling to the SDF values that compresses
+    the range while preserving the sign, effectively giving more weight to points
+    near surfaces where abs(SDF) is small.
+
+    Args:
+        sdf: Tensor containing signed distance function values
+
+    Returns:
+        Tensor with scaled SDF values in range [-1, 1]
+    """
+    return sdf / (0.4 + torch.abs(sdf))
+
+
+class GeoConvOut(nn.Module):
+    """
+    Geometry layer to project STL geometry data onto regular grids.
+    """
+
+    def __init__(
+        self,
+        input_features: int,
+        model_parameters,
+        grid_resolution=None,
+    ):
+        """
+        Initialize the GeoConvOut layer.
+
+        Args:
+            input_features: Number of input feature dimensions
+            model_parameters: Configuration parameters for the model
+            grid_resolution: Resolution of the output grid [nx, ny, nz]
+        """
+        super().__init__()
+        if grid_resolution is None:
+            grid_resolution = [256, 96, 64]
+        base_neurons = model_parameters.base_neurons
+        self.fourier_features = model_parameters.fourier_features
+        self.num_modes = model_parameters.num_modes
+
+        if self.fourier_features:
+            input_features_calculated = input_features * (1 + 2 * self.num_modes)
+        else:
+            input_features_calculated = input_features
+
+        self.fc1 = nn.Linear(input_features_calculated, base_neurons)
+        self.fc2 = nn.Linear(base_neurons, base_neurons // 2)
+        self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in)
+
+        self.grid_resolution = grid_resolution
+
+        self.activation = get_activation(model_parameters.activation)
+
+        if self.fourier_features:
+            self.register_buffer(
+                "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes))
+            )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        grid: torch.Tensor,
+        radius: float = 0.025,
+        neighbors_in_radius: int = 10,
+    ) -> torch.Tensor:
+        """
+        Process and project geometric features onto a 3D grid.
+
+        Args:
+            x: Input tensor containing coordinates of the neighboring points
+               (batch_size, nx*ny*nz, n_points, 3)
+            grid: Input tensor represented as a grid of shape
+                (batch_size, nx, ny, nz, 3)
+
+        Returns:
+            Processed geometry features of shape (batch_size, base_neurons_in, nx, ny, nz)
+        """
+
+        nx, ny, nz = (
+            self.grid_resolution[0],
+            self.grid_resolution[1],
+            self.grid_resolution[2],
+        )
+        grid = grid.reshape(1, nx * ny * nz, 3, 1)
+        x_transposed = torch.transpose(x, 2, 3)
+        dist_weights = 1.0 / (1e-6 + (x_transposed - grid) ** 2.0)
+        dist_weights = torch.transpose(dist_weights, 2, 3)
+
+        # x = torch.sum(x * dist_weights, 2) / torch.sum(dist_weights, 2)
+        # x = torch.sum(x, 2)
+        mask = abs(x - 0) > 1e-6
+        if self.fourier_features:
+            facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1)
+        else:
+            facets = x
+        x = self.activation(self.fc1(facets))
+        x = self.activation(self.fc2(x))
+        x = F.tanh(self.fc3(x))
+
+        mask = mask[:, :, :, 0:1].expand(
+            mask.shape[0], mask.shape[1], mask.shape[2], x.shape[-1]
+        )
+
+        x = torch.sum(x * mask, 2)
+        x = rearrange(x, "b (x y z) c -> b c x y z", x=nx, y=ny, z=nz)
+        return x
+
+
+class GeoProcessor(nn.Module):
+    """Geometry processing layer using CNNs"""
+
+    def __init__(self, input_filters: int, output_filters: int, model_parameters):
+        """
+        Initialize the GeoProcessor network.
+
+        Args:
+            input_filters: Number of input channels
+            model_parameters: Configuration parameters for the model
+        """
+        super().__init__()
+        base_filters = model_parameters.base_filters
+        self.conv1 = nn.Conv3d(
+            input_filters, base_filters, kernel_size=3, padding="same"
+        )
+        self.conv2 = nn.Conv3d(
+            base_filters, 2 * base_filters, kernel_size=3, padding="same"
+        )
+        self.conv3 = nn.Conv3d(
+            2 * base_filters, 4 * base_filters, kernel_size=3, padding="same"
+        )
+        self.conv3_1 = nn.Conv3d(
+            4 * base_filters, 4 * base_filters, kernel_size=3, padding="same"
+        )
+        self.conv4 = nn.Conv3d(
+            4 * base_filters, 2 * base_filters, kernel_size=3, padding="same"
+        )
+        self.conv5 = nn.Conv3d(
+            4 * base_filters, base_filters, kernel_size=3, padding="same"
+        )
+        self.conv6 = nn.Conv3d(
+            2 * base_filters, input_filters, kernel_size=3, padding="same"
+        )
+        self.conv7 = nn.Conv3d(
+            2 * input_filters, input_filters, kernel_size=3, padding="same"
+        )
+        self.conv8 = nn.Conv3d(
+            input_filters, output_filters, kernel_size=3, padding="same"
+        )
+        self.avg_pool = torch.nn.AvgPool3d((2, 2, 2))
+        self.max_pool = nn.MaxPool3d(2)
+        self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
+        self.activation = get_activation(model_parameters.activation)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Process geometry information through the 3D CNN network.
+
+        The network follows an encoder-decoder architecture with skip connections:
+        1. Downsampling path (encoder) with three levels of max pooling
+        2. Processing loop in the bottleneck
+        3. Upsampling path (decoder) with skip connections from the encoder
+
+        Args:
+            x: Input tensor containing grid-represented geometry of shape
+               (batch_size, input_filters, nx, ny, nz)
+
+        Returns:
+            Processed geometry features of shape (batch_size, 1, nx, ny, nz)
+        """
+        # Encoder
+        x0 = x
+        x = self.conv1(x)
+        x = self.activation(x)
+        x = self.max_pool(x)
+
+        x1 = x
+        x = self.conv2(x)
+        x = self.activation(x)
+        x = self.max_pool(x)
+
+        x2 = x
+        x = self.conv3(x)
+        x = self.activation(x)
+        x = self.max_pool(x)
+
+        # Processor loop
+        x = self.activation(self.conv3_1(x))
+
+        # Decoder
+        x = self.conv4(x)
+        x = self.activation(x)
+        x = self.upsample(x)
+        x = torch.cat((x, x2), dim=1)
+
+        x = self.conv5(x)
+        x = self.activation(x)
+        x = self.upsample(x)
+        x = torch.cat((x, x1), dim=1)
+
+        x = self.conv6(x)
+        x = self.activation(x)
+        x = self.upsample(x)
+        x = torch.cat((x, x0), dim=1)
+
+        x = self.activation(self.conv7(x))
+        x = self.conv8(x)
+
+        return x
+
+
+class GeometryRep(nn.Module):
+    """
+    Geometry representation module that processes STL geometry data.
+
+    This module constructs a multiscale representation of geometry by:
+    1. Computing multi-scale geometry encoding for local and global context
+    2. Processing signed distance field (SDF) data for surface information
+
+    The combined encoding enables the model to reason about both local and global
+    geometric properties.
+    """
+
+    def __init__(
+        self,
+        input_features: int,
+        radii: Sequence[float],
+        neighbors_in_radius,
+        hops=1,
+        model_parameters=None,
+    ):
+        """
+        Initialize the GeometryRep module.
+
+        Args:
+            input_features: Number of input feature dimensions
+            model_parameters: Configuration parameters for the model
+        """
+        super().__init__()
+        geometry_rep = model_parameters.geometry_rep
+        self.geo_encoding_type = model_parameters.geometry_encoding_type
+        self.cross_attention = geometry_rep.geo_processor.cross_attention
+        self.self_attention = geometry_rep.geo_processor.self_attention
+        self.activation_conv = get_activation(geometry_rep.geo_conv.activation)
+        self.activation_processor = geometry_rep.geo_processor.activation
+
+        self.bq_warp = nn.ModuleList()
+        self.geo_processors = nn.ModuleList()
+        for j in range(len(radii)):
+            self.bq_warp.append(
+                BQWarp(
+                    radius=radii[j],
+                    neighbors_in_radius=neighbors_in_radius[j],
+                )
+            )
+            if geometry_rep.geo_processor.processor_type == "unet":
+                h = geometry_rep.geo_processor.base_filters
+                if self.self_attention:
+                    normalization_in_unet = "layernorm"
+                else:
+                    normalization_in_unet = None
+                self.geo_processors.append(
+                    UNet(
+                        in_channels=geometry_rep.geo_conv.base_neurons_in,
+                        out_channels=geometry_rep.geo_conv.base_neurons_out,
+                        model_depth=3,
+                        feature_map_channels=[
+                            h,
+                            2 * h,
+                            4 * h,
+                        ],
+                        num_conv_blocks=1,
+                        kernel_size=3,
+                        stride=1,
+                        conv_activation=self.activation_processor,
+                        padding=1,
+                        padding_mode="zeros",
+                        pooling_type="MaxPool3d",
+                        pool_size=2,
+                        normalization=normalization_in_unet,
+                        use_attn_gate=self.self_attention,
+                        attn_decoder_feature_maps=[4 * h, 2 * h],
+                        attn_feature_map_channels=[2 * h, h],
+                        attn_intermediate_channels=4 * h,
+                        gradient_checkpointing=True,
+                    )
+                )
+            elif geometry_rep.geo_processor.processor_type == "conv":
+                self.geo_processors.append(
+                    nn.Sequential(
+                        GeoProcessor(
+                            input_filters=geometry_rep.geo_conv.base_neurons_in,
+                            output_filters=geometry_rep.geo_conv.base_neurons_out,
+                            model_parameters=geometry_rep.geo_processor,
+                        ),
+                        GeoProcessor(
+                            input_filters=geometry_rep.geo_conv.base_neurons_in,
+                            output_filters=geometry_rep.geo_conv.base_neurons_out,
+                            model_parameters=geometry_rep.geo_processor,
+                        ),
+                    )
+                )
+            else:
+                raise ValueError("Invalid prompt. Specify unet or conv ...")
+
+        self.geo_conv_out = nn.ModuleList()
+        self.geo_processor_out = nn.ModuleList()
+        for _ in range(len(radii)):
+            self.geo_conv_out.append(
+                GeoConvOut(
+                    input_features=input_features,
+                    model_parameters=geometry_rep.geo_conv,
+                    grid_resolution=model_parameters.interp_res,
+                )
+            )
+            self.geo_processor_out.append(
+                nn.Conv3d(
+                    geometry_rep.geo_conv.base_neurons_out,
+                    1,
+                    kernel_size=3,
+                    padding="same",
+                )
+            )
+
+        if geometry_rep.geo_processor.processor_type == "unet":
+            h = geometry_rep.geo_processor.base_filters
+            if self.self_attention:
+                normalization_in_unet = "layernorm"
+            else:
+                normalization_in_unet = None
+            self.geo_processor_sdf = UNet(
+                in_channels=6,
+                out_channels=geometry_rep.geo_conv.base_neurons_out,
+                model_depth=3,
+                feature_map_channels=[
+                    h,
+                    2 * h,
+                    4 * h,
+                ],
+                num_conv_blocks=1,
+                kernel_size=3,
+                stride=1,
+                conv_activation=self.activation_processor,
+                padding=1,
+                padding_mode="zeros",
+                pooling_type="MaxPool3d",
+                pool_size=2,
+                normalization=normalization_in_unet,
+                use_attn_gate=self.self_attention,
+                attn_decoder_feature_maps=[4 * h, 2 * h],
+                attn_feature_map_channels=[2 * h, h],
+                attn_intermediate_channels=4 * h,
+                gradient_checkpointing=True,
+            )
+        elif geometry_rep.geo_processor.processor_type == "conv":
+            self.geo_processor_sdf = nn.Sequential(
+                GeoProcessor(
+                    input_filters=6,
+                    output_filters=geometry_rep.geo_conv.base_neurons_out,
+                    model_parameters=geometry_rep.geo_processor,
+                ),
+                GeoProcessor(
+                    input_filters=geometry_rep.geo_conv.base_neurons_out,
+                    output_filters=geometry_rep.geo_conv.base_neurons_out,
+                    model_parameters=geometry_rep.geo_processor,
+                ),
+            )
+        else:
+            raise ValueError("Invalid prompt. Specify unet or conv ...")
+        self.radii = radii
+        self.hops = hops
+
+        self.geo_processor_sdf_out = nn.Conv3d(
+            geometry_rep.geo_conv.base_neurons_out, 1, kernel_size=3, padding="same"
+        )
+
+        if self.cross_attention:
+            self.combined_unet = UNet(
+                in_channels=1 + len(radii),
+                out_channels=1 + len(radii),
+                model_depth=3,
+                feature_map_channels=[
+                    h,
+                    2 * h,
+                    4 * h,
+                ],
+                num_conv_blocks=1,
+                kernel_size=3,
+                stride=1,
+                conv_activation=self.activation_processor,
+                padding=1,
+                padding_mode="zeros",
+                pooling_type="MaxPool3d",
+                pool_size=2,
+                normalization="layernorm",
+                use_attn_gate=True,
+                attn_decoder_feature_maps=[4 * h, 2 * h],
+                attn_feature_map_channels=[2 * h, h],
+                attn_intermediate_channels=4 * h,
+                gradient_checkpointing=True,
+            )
+
+    def forward(
+        self, x: torch.Tensor, p_grid: torch.Tensor, sdf: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Process geometry data to create a comprehensive representation.
+
+        This method combines short-range, long-range, and SDF-based geometry
+        encodings to create a rich representation of the geometry.
+
+        Args:
+            x: Input tensor containing geometric point data
+            p_grid: Grid points for sampling
+            sdf: Signed distance field tensor
+
+        Returns:
+            Comprehensive geometry encoding that concatenates short-range,
+            SDF-based, and long-range features
+        """
+        if self.geo_encoding_type == "both" or self.geo_encoding_type == "stl":
+            # Calculate multi-scale geoemtry dependency
+            x_encoding = []
+            for j in range(len(self.radii)):
+                mapping, k_short = self.bq_warp[j](x, p_grid)
+                x_encoding_inter = self.geo_conv_out[j](k_short, p_grid)
+                # Propagate information in the geometry enclosed BBox
+                for _ in range(self.hops):
+                    dx = self.geo_processors[j](x_encoding_inter) / self.hops
+                    x_encoding_inter = x_encoding_inter + dx
+                x_encoding_inter = self.geo_processor_out[j](x_encoding_inter)
+                x_encoding.append(x_encoding_inter)
+            x_encoding = torch.cat(x_encoding, dim=1)
+
+        if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf":
+            # Expand SDF
+            sdf = torch.unsqueeze(sdf, 1)
+            # Scaled sdf to emphasize near surface
+            scaled_sdf = scale_sdf(sdf)
+            # Binary sdf
+            binary_sdf = torch.where(sdf >= 0, 0.0, 1.0)
+            # Gradients of SDF
+            sdf_x, sdf_y, sdf_z = torch.gradient(sdf, dim=[2, 3, 4])
+
+            # Process SDF and its computed features
+            sdf = torch.cat((sdf, scaled_sdf, binary_sdf, sdf_x, sdf_y, sdf_z), 1)
+            sdf_encoding = self.geo_processor_sdf(sdf)
+            sdf_encoding = self.geo_processor_sdf_out(sdf_encoding)
+
+        if self.geo_encoding_type == "both":
+            # Geometry encoding comprised of short-range, long-range and SDF features
+            encoding_g = torch.cat((x_encoding, sdf_encoding), 1)
+        elif self.geo_encoding_type == "sdf":
+            encoding_g = sdf_encoding
+        elif self.geo_encoding_type == "stl":
+            encoding_g = x_encoding
+
+        if self.cross_attention:
+            encoding_g = self.combined_unet(encoding_g)
+
+        return encoding_g
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 4ad6ae2856..bc06289a6b 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -21,504 +21,19 @@
 the config.yaml file)
 """
 
-import math
-from typing import Callable, Literal, Sequence
-
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange
 
 from physicsnemo.models.unet import UNet
-from physicsnemo.utils.profiling import profile
 
-from .ball_query import BQWarp
 from .encodings import (
     EncodingMLP,
     MultiGeometryEncoding,
-    fourier_encode_vectorized,
 )
+from .geometry_rep import GeometryRep, get_activation
 from .mlps import AggregationModel
 from .solutions import SolutionCalculatorSurface, SolutionCalculatorVolume
 
-
-def get_activation(activation: Literal["relu", "gelu"]) -> Callable:
-    """
-    Return a PyTorch activation function corresponding to the given name.
-    """
-    if activation == "relu":
-        return nn.ReLU()
-    elif activation == "gelu":
-        return nn.GELU()
-    else:
-        raise ValueError(f"Activation function {activation} not found")
-
-
-def scale_sdf(sdf: torch.Tensor) -> torch.Tensor:
-    """
-    Scale a signed distance function (SDF) to emphasize surface regions.
-
-    This function applies a non-linear scaling to the SDF values that compresses
-    the range while preserving the sign, effectively giving more weight to points
-    near surfaces where abs(SDF) is small.
-
-    Args:
-        sdf: Tensor containing signed distance function values
-
-    Returns:
-        Tensor with scaled SDF values in range [-1, 1]
-    """
-    return sdf / (0.4 + torch.abs(sdf))
-
-
-class GeoConvOut(nn.Module):
-    """
-    Geometry layer to project STL geometry data onto regular grids.
-    """
-
-    def __init__(
-        self,
-        input_features: int,
-        model_parameters,
-        grid_resolution=None,
-    ):
-        """
-        Initialize the GeoConvOut layer.
-
-        Args:
-            input_features: Number of input feature dimensions
-            model_parameters: Configuration parameters for the model
-            grid_resolution: Resolution of the output grid [nx, ny, nz]
-        """
-        super().__init__()
-        if grid_resolution is None:
-            grid_resolution = [256, 96, 64]
-        base_neurons = model_parameters.base_neurons
-        self.fourier_features = model_parameters.fourier_features
-        self.num_modes = model_parameters.num_modes
-
-        if self.fourier_features:
-            input_features_calculated = input_features * (1 + 2 * self.num_modes)
-        else:
-            input_features_calculated = input_features
-
-        self.fc1 = nn.Linear(input_features_calculated, base_neurons)
-        self.fc2 = nn.Linear(base_neurons, base_neurons // 2)
-        self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in)
-
-        self.grid_resolution = grid_resolution
-
-        self.activation = get_activation(model_parameters.activation)
-
-        if self.fourier_features:
-            self.register_buffer(
-                "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes))
-            )
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        grid: torch.Tensor,
-        radius: float = 0.025,
-        neighbors_in_radius: int = 10,
-    ) -> torch.Tensor:
-        """
-        Process and project geometric features onto a 3D grid.
-
-        Args:
-            x: Input tensor containing coordinates of the neighboring points
-               (batch_size, nx*ny*nz, n_points, 3)
-            grid: Input tensor represented as a grid of shape
-                (batch_size, nx, ny, nz, 3)
-
-        Returns:
-            Processed geometry features of shape (batch_size, base_neurons_in, nx, ny, nz)
-        """
-
-        nx, ny, nz = (
-            self.grid_resolution[0],
-            self.grid_resolution[1],
-            self.grid_resolution[2],
-        )
-        grid = grid.reshape(1, nx * ny * nz, 3, 1)
-        x_transposed = torch.transpose(x, 2, 3)
-        dist_weights = 1.0 / (1e-6 + (x_transposed - grid) ** 2.0)
-        dist_weights = torch.transpose(dist_weights, 2, 3)
-
-        # x = torch.sum(x * dist_weights, 2) / torch.sum(dist_weights, 2)
-        # x = torch.sum(x, 2)
-        mask = abs(x - 0) > 1e-6
-        if self.fourier_features:
-            facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1)
-        else:
-            facets = x
-        x = self.activation(self.fc1(facets))
-        x = self.activation(self.fc2(x))
-        x = F.tanh(self.fc3(x))
-
-        mask = mask[:, :, :, 0:1].expand(
-            mask.shape[0], mask.shape[1], mask.shape[2], x.shape[-1]
-        )
-
-        x = torch.sum(x * mask, 2)
-        x = rearrange(x, "b (x y z) c -> b c x y z", x=nx, y=ny, z=nz)
-        return x
-
-
-class GeoProcessor(nn.Module):
-    """Geometry processing layer using CNNs"""
-
-    def __init__(self, input_filters: int, output_filters: int, model_parameters):
-        """
-        Initialize the GeoProcessor network.
-
-        Args:
-            input_filters: Number of input channels
-            model_parameters: Configuration parameters for the model
-        """
-        super().__init__()
-        base_filters = model_parameters.base_filters
-        self.conv1 = nn.Conv3d(
-            input_filters, base_filters, kernel_size=3, padding="same"
-        )
-        self.conv2 = nn.Conv3d(
-            base_filters, 2 * base_filters, kernel_size=3, padding="same"
-        )
-        self.conv3 = nn.Conv3d(
-            2 * base_filters, 4 * base_filters, kernel_size=3, padding="same"
-        )
-        self.conv3_1 = nn.Conv3d(
-            4 * base_filters, 4 * base_filters, kernel_size=3, padding="same"
-        )
-        self.conv4 = nn.Conv3d(
-            4 * base_filters, 2 * base_filters, kernel_size=3, padding="same"
-        )
-        self.conv5 = nn.Conv3d(
-            4 * base_filters, base_filters, kernel_size=3, padding="same"
-        )
-        self.conv6 = nn.Conv3d(
-            2 * base_filters, input_filters, kernel_size=3, padding="same"
-        )
-        self.conv7 = nn.Conv3d(
-            2 * input_filters, input_filters, kernel_size=3, padding="same"
-        )
-        self.conv8 = nn.Conv3d(
-            input_filters, output_filters, kernel_size=3, padding="same"
-        )
-        self.avg_pool = torch.nn.AvgPool3d((2, 2, 2))
-        self.max_pool = nn.MaxPool3d(2)
-        self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
-        self.activation = get_activation(model_parameters.activation)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Process geometry information through the 3D CNN network.
-
-        The network follows an encoder-decoder architecture with skip connections:
-        1. Downsampling path (encoder) with three levels of max pooling
-        2. Processing loop in the bottleneck
-        3. Upsampling path (decoder) with skip connections from the encoder
-
-        Args:
-            x: Input tensor containing grid-represented geometry of shape
-               (batch_size, input_filters, nx, ny, nz)
-
-        Returns:
-            Processed geometry features of shape (batch_size, 1, nx, ny, nz)
-        """
-        # Encoder
-        x0 = x
-        x = self.conv1(x)
-        x = self.activation(x)
-        x = self.max_pool(x)
-
-        x1 = x
-        x = self.conv2(x)
-        x = self.activation(x)
-        x = self.max_pool(x)
-
-        x2 = x
-        x = self.conv3(x)
-        x = self.activation(x)
-        x = self.max_pool(x)
-
-        # Processor loop
-        x = self.activation(self.conv3_1(x))
-
-        # Decoder
-        x = self.conv4(x)
-        x = self.activation(x)
-        x = self.upsample(x)
-        x = torch.cat((x, x2), dim=1)
-
-        x = self.conv5(x)
-        x = self.activation(x)
-        x = self.upsample(x)
-        x = torch.cat((x, x1), dim=1)
-
-        x = self.conv6(x)
-        x = self.activation(x)
-        x = self.upsample(x)
-        x = torch.cat((x, x0), dim=1)
-
-        x = self.activation(self.conv7(x))
-        x = self.conv8(x)
-
-        return x
-
-
-class GeometryRep(nn.Module):
-    """
-    Geometry representation module that processes STL geometry data.
-
-    This module constructs a multiscale representation of geometry by:
-    1. Computing multi-scale geometry encoding for local and global context
-    2. Processing signed distance field (SDF) data for surface information
-
-    The combined encoding enables the model to reason about both local and global
-    geometric properties.
-    """
-
-    def __init__(
-        self,
-        input_features: int,
-        radii: Sequence[float],
-        neighbors_in_radius,
-        hops=1,
-        model_parameters=None,
-    ):
-        """
-        Initialize the GeometryRep module.
-
-        Args:
-            input_features: Number of input feature dimensions
-            model_parameters: Configuration parameters for the model
-        """
-        super().__init__()
-        geometry_rep = model_parameters.geometry_rep
-        self.geo_encoding_type = model_parameters.geometry_encoding_type
-        self.cross_attention = geometry_rep.geo_processor.cross_attention
-        self.self_attention = geometry_rep.geo_processor.self_attention
-        self.activation_conv = get_activation(geometry_rep.geo_conv.activation)
-        self.activation_processor = geometry_rep.geo_processor.activation
-
-        self.bq_warp = nn.ModuleList()
-        self.geo_processors = nn.ModuleList()
-        for j in range(len(radii)):
-            self.bq_warp.append(
-                BQWarp(
-                    radius=radii[j],
-                    neighbors_in_radius=neighbors_in_radius[j],
-                )
-            )
-            if geometry_rep.geo_processor.processor_type == "unet":
-                h = geometry_rep.geo_processor.base_filters
-                if self.self_attention:
-                    normalization_in_unet = "layernorm"
-                else:
-                    normalization_in_unet = None
-                self.geo_processors.append(
-                    UNet(
-                        in_channels=geometry_rep.geo_conv.base_neurons_in,
-                        out_channels=geometry_rep.geo_conv.base_neurons_out,
-                        model_depth=3,
-                        feature_map_channels=[
-                            h,
-                            2 * h,
-                            4 * h,
-                        ],
-                        num_conv_blocks=1,
-                        kernel_size=3,
-                        stride=1,
-                        conv_activation=self.activation_processor,
-                        padding=1,
-                        padding_mode="zeros",
-                        pooling_type="MaxPool3d",
-                        pool_size=2,
-                        normalization=normalization_in_unet,
-                        use_attn_gate=self.self_attention,
-                        attn_decoder_feature_maps=[4 * h, 2 * h],
-                        attn_feature_map_channels=[2 * h, h],
-                        attn_intermediate_channels=4 * h,
-                        gradient_checkpointing=True,
-                    )
-                )
-            elif geometry_rep.geo_processor.processor_type == "conv":
-                self.geo_processors.append(
-                    nn.Sequential(
-                        GeoProcessor(
-                            input_filters=geometry_rep.geo_conv.base_neurons_in,
-                            output_filters=geometry_rep.geo_conv.base_neurons_out,
-                            model_parameters=geometry_rep.geo_processor,
-                        ),
-                        GeoProcessor(
-                            input_filters=geometry_rep.geo_conv.base_neurons_in,
-                            output_filters=geometry_rep.geo_conv.base_neurons_out,
-                            model_parameters=geometry_rep.geo_processor,
-                        ),
-                    )
-                )
-            else:
-                raise ValueError("Invalid prompt. Specify unet or conv ...")
-
-        self.geo_conv_out = nn.ModuleList()
-        self.geo_processor_out = nn.ModuleList()
-        for _ in range(len(radii)):
-            self.geo_conv_out.append(
-                GeoConvOut(
-                    input_features=input_features,
-                    model_parameters=geometry_rep.geo_conv,
-                    grid_resolution=model_parameters.interp_res,
-                )
-            )
-            self.geo_processor_out.append(
-                nn.Conv3d(
-                    geometry_rep.geo_conv.base_neurons_out,
-                    1,
-                    kernel_size=3,
-                    padding="same",
-                )
-            )
-
-        if geometry_rep.geo_processor.processor_type == "unet":
-            h = geometry_rep.geo_processor.base_filters
-            if self.self_attention:
-                normalization_in_unet = "layernorm"
-            else:
-                normalization_in_unet = None
-            self.geo_processor_sdf = UNet(
-                in_channels=6,
-                out_channels=geometry_rep.geo_conv.base_neurons_out,
-                model_depth=3,
-                feature_map_channels=[
-                    h,
-                    2 * h,
-                    4 * h,
-                ],
-                num_conv_blocks=1,
-                kernel_size=3,
-                stride=1,
-                conv_activation=self.activation_processor,
-                padding=1,
-                padding_mode="zeros",
-                pooling_type="MaxPool3d",
-                pool_size=2,
-                normalization=normalization_in_unet,
-                use_attn_gate=self.self_attention,
-                attn_decoder_feature_maps=[4 * h, 2 * h],
-                attn_feature_map_channels=[2 * h, h],
-                attn_intermediate_channels=4 * h,
-                gradient_checkpointing=True,
-            )
-        elif geometry_rep.geo_processor.processor_type == "conv":
-            self.geo_processor_sdf = nn.Sequential(
-                GeoProcessor(
-                    input_filters=6,
-                    output_filters=geometry_rep.geo_conv.base_neurons_out,
-                    model_parameters=geometry_rep.geo_processor,
-                ),
-                GeoProcessor(
-                    input_filters=geometry_rep.geo_conv.base_neurons_out,
-                    output_filters=geometry_rep.geo_conv.base_neurons_out,
-                    model_parameters=geometry_rep.geo_processor,
-                ),
-            )
-        else:
-            raise ValueError("Invalid prompt. Specify unet or conv ...")
-        self.radii = radii
-        self.hops = hops
-
-        self.geo_processor_sdf_out = nn.Conv3d(
-            geometry_rep.geo_conv.base_neurons_out, 1, kernel_size=3, padding="same"
-        )
-
-        if self.cross_attention:
-            self.combined_unet = UNet(
-                in_channels=1 + len(radii),
-                out_channels=1 + len(radii),
-                model_depth=3,
-                feature_map_channels=[
-                    h,
-                    2 * h,
-                    4 * h,
-                ],
-                num_conv_blocks=1,
-                kernel_size=3,
-                stride=1,
-                conv_activation=self.activation_processor,
-                padding=1,
-                padding_mode="zeros",
-                pooling_type="MaxPool3d",
-                pool_size=2,
-                normalization="layernorm",
-                use_attn_gate=True,
-                attn_decoder_feature_maps=[4 * h, 2 * h],
-                attn_feature_map_channels=[2 * h, h],
-                attn_intermediate_channels=4 * h,
-                gradient_checkpointing=True,
-            )
-
-    def forward(
-        self, x: torch.Tensor, p_grid: torch.Tensor, sdf: torch.Tensor
-    ) -> torch.Tensor:
-        """
-        Process geometry data to create a comprehensive representation.
-
-        This method combines short-range, long-range, and SDF-based geometry
-        encodings to create a rich representation of the geometry.
-
-        Args:
-            x: Input tensor containing geometric point data
-            p_grid: Grid points for sampling
-            sdf: Signed distance field tensor
-
-        Returns:
-            Comprehensive geometry encoding that concatenates short-range,
-            SDF-based, and long-range features
-        """
-        if self.geo_encoding_type == "both" or self.geo_encoding_type == "stl":
-            # Calculate multi-scale geoemtry dependency
-            x_encoding = []
-            for j in range(len(self.radii)):
-                mapping, k_short = self.bq_warp[j](x, p_grid)
-                x_encoding_inter = self.geo_conv_out[j](k_short, p_grid)
-                # Propagate information in the geometry enclosed BBox
-                for _ in range(self.hops):
-                    dx = self.geo_processors[j](x_encoding_inter) / self.hops
-                    x_encoding_inter = x_encoding_inter + dx
-                x_encoding_inter = self.geo_processor_out[j](x_encoding_inter)
-                x_encoding.append(x_encoding_inter)
-            x_encoding = torch.cat(x_encoding, dim=1)
-
-        if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf":
-            # Expand SDF
-            sdf = torch.unsqueeze(sdf, 1)
-            # Scaled sdf to emphasize near surface
-            scaled_sdf = scale_sdf(sdf)
-            # Binary sdf
-            binary_sdf = torch.where(sdf >= 0, 0.0, 1.0)
-            # Gradients of SDF
-            sdf_x, sdf_y, sdf_z = torch.gradient(sdf, dim=[2, 3, 4])
-
-            # Process SDF and its computed features
-            sdf = torch.cat((sdf, scaled_sdf, binary_sdf, sdf_x, sdf_y, sdf_z), 1)
-            sdf_encoding = self.geo_processor_sdf(sdf)
-            sdf_encoding = self.geo_processor_sdf_out(sdf_encoding)
-
-        if self.geo_encoding_type == "both":
-            # Geometry encoding comprised of short-range, long-range and SDF features
-            encoding_g = torch.cat((x_encoding, sdf_encoding), 1)
-        elif self.geo_encoding_type == "sdf":
-            encoding_g = sdf_encoding
-        elif self.geo_encoding_type == "stl":
-            encoding_g = x_encoding
-
-        if self.cross_attention:
-            encoding_g = self.combined_unet(encoding_g)
-
-        return encoding_g
-
-
 # @dataclass
 # class MetaData(ModelMetaData):
 #     name: str = "DoMINO"
@@ -981,7 +496,6 @@ def __init__(
                 nn_basis=self.nn_basis_vol,
             )
 
-    @profile
     def forward(self, data_dict, return_volume_neighbors=False):
         # Loading STL inputs, bounding box grids, precomputed SDF and scaling factors
 
diff --git a/test/models/domino/conftest.py b/test/models/domino/conftest.py
new file mode 100644
index 0000000000..9d8c8a71d5
--- /dev/null
+++ b/test/models/domino/conftest.py
@@ -0,0 +1,116 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Sequence
+
+import pytest
+
+
+@pytest.fixture(scope="module")
+def base_model_params():
+    """Base model parameters for testing"""
+
+    @dataclass
+    class model_params:
+        @dataclass
+        class geometry_rep:
+            @dataclass
+            class geo_conv:
+                base_neurons: int = 32
+                base_neurons_in: int = 8
+                base_neurons_out: int = 8
+                surface_hops: int = 1
+                volume_hops: int = 1
+                volume_radii: Sequence = (0.1, 0.5)
+                volume_neighbors_in_radius: Sequence = (10, 10)
+                surface_radii: Sequence = (0.05,)
+                surface_neighbors_in_radius: Sequence = (10,)
+                activation: str = "relu"
+                fourier_features: bool = False
+                num_modes: int = 5
+
+            @dataclass
+            class geo_processor:
+                base_filters: int = 8
+                activation: str = "relu"
+                processor_type: str = "unet"
+                self_attention: bool = True
+                cross_attention: bool = False
+
+            base_filters: int = 8
+            geo_conv = geo_conv
+            geo_processor = geo_processor
+
+        @dataclass
+        class geometry_local:
+            base_layer: int = 512
+            volume_neighbors_in_radius: Sequence = (128, 128)
+            surface_neighbors_in_radius: Sequence = (128,)
+            volume_radii: Sequence = (0.05, 0.1)
+            surface_radii: Sequence = (0.05,)
+
+        @dataclass
+        class nn_basis_functions:
+            base_layer: int = 512
+            fourier_features: bool = False
+            num_modes: int = 5
+            activation: str = "relu"
+
+        @dataclass
+        class local_point_conv:
+            activation: str = "relu"
+
+        @dataclass
+        class aggregation_model:
+            base_layer: int = 512
+            activation: str = "relu"
+
+        @dataclass
+        class position_encoder:
+            base_neurons: int = 512
+            activation: str = "relu"
+            fourier_features: bool = False
+            num_modes: int = 5
+
+        @dataclass
+        class parameter_model:
+            base_layer: int = 512
+            fourier_features: bool = True
+            num_modes: int = 5
+            activation: str = "relu"
+
+        model_type: str = "combined"
+        activation: str = "relu"
+        interp_res: Sequence = (64, 64, 64)  # Smaller for testing
+        use_sdf_in_basis_func: bool = True
+        positional_encoding: bool = False
+        surface_neighbors: bool = True
+        num_neighbors_surface: int = 7
+        num_neighbors_volume: int = 7
+        use_surface_normals: bool = True
+        use_surface_area: bool = True
+        encode_parameters: bool = False
+        combine_volume_surface: bool = False
+        geometry_encoding_type: str = "both"
+        solution_calculation_mode: str = "two-loop"
+        geometry_rep = geometry_rep
+        nn_basis_functions = nn_basis_functions
+        aggregation_model = aggregation_model
+        position_encoder = position_encoder
+        geometry_local = geometry_local
+
+    return model_params
diff --git a/test/models/domino/test_domino_geometry_rep.py b/test/models/domino/test_domino_geometry_rep.py
index 12ede24be5..813b246bae 100644
--- a/test/models/domino/test_domino_geometry_rep.py
+++ b/test/models/domino/test_domino_geometry_rep.py
@@ -26,7 +26,7 @@
 @pytest.mark.parametrize("act", ["relu", "gelu"])
 def test_geo_conv_out(device, act):
     """Test GeoConvOut layer"""
-    from physicsnemo.models.domino.model import GeoConvOut
+    from physicsnemo.models.domino.geometry_rep import GeoConvOut
 
     torch.manual_seed(0)
 
@@ -59,7 +59,7 @@ class TestParams:
 @pytest.mark.parametrize("act", ["relu", "gelu"])
 def test_geo_processor(device, act):
     """Test GeoProcessor CNN"""
-    from physicsnemo.models.domino.model import GeoProcessor
+    from physicsnemo.models.domino.geometry_rep import GeoProcessor
 
     torch.manual_seed(0)
 
@@ -87,7 +87,7 @@ def test_geometry_rep(
     device, geometry_encoding_type, processor_type, base_model_params
 ):
     """Test GeometryRep module with different configurations"""
-    from physicsnemo.models.domino.model import GeometryRep
+    from physicsnemo.models.domino.geometry_rep import GeometryRep
 
     torch.manual_seed(0)
 
diff --git a/test/models/domino/utils.py b/test/models/domino/utils.py
index 8c5fb971f8..6f16a2c9fb 100644
--- a/test/models/domino/utils.py
+++ b/test/models/domino/utils.py
@@ -14,10 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from dataclasses import dataclass
-from typing import Sequence
-
-import pytest
 import torch
 
 
@@ -47,102 +43,6 @@ def generate_test_data(bsize, nx, ny, nz, num_neigh, device):
     }
 
 
-@pytest.fixture
-def base_model_params():
-    """Base model parameters for testing"""
-
-    @dataclass
-    class model_params:
-        @dataclass
-        class geometry_rep:
-            @dataclass
-            class geo_conv:
-                base_neurons: int = 32
-                base_neurons_in: int = 8
-                base_neurons_out: int = 8
-                surface_hops: int = 1
-                volume_hops: int = 1
-                volume_radii: Sequence = (0.1, 0.5)
-                volume_neighbors_in_radius: Sequence = (10, 10)
-                surface_radii: Sequence = (0.05,)
-                surface_neighbors_in_radius: Sequence = (10,)
-                activation: str = "relu"
-                fourier_features: bool = False
-                num_modes: int = 5
-
-            @dataclass
-            class geo_processor:
-                base_filters: int = 8
-                activation: str = "relu"
-                processor_type: str = "unet"
-                self_attention: bool = True
-                cross_attention: bool = False
-
-            base_filters: int = 8
-            geo_conv = geo_conv
-            geo_processor = geo_processor
-
-        @dataclass
-        class geometry_local:
-            base_layer: int = 512
-            volume_neighbors_in_radius: Sequence = (128, 128)
-            surface_neighbors_in_radius: Sequence = (128,)
-            volume_radii: Sequence = (0.05, 0.1)
-            surface_radii: Sequence = (0.05,)
-
-        @dataclass
-        class nn_basis_functions:
-            base_layer: int = 512
-            fourier_features: bool = False
-            num_modes: int = 5
-            activation: str = "relu"
-
-        @dataclass
-        class local_point_conv:
-            activation: str = "relu"
-
-        @dataclass
-        class aggregation_model:
-            base_layer: int = 512
-            activation: str = "relu"
-
-        @dataclass
-        class position_encoder:
-            base_neurons: int = 512
-            activation: str = "relu"
-            fourier_features: bool = False
-            num_modes: int = 5
-
-        @dataclass
-        class parameter_model:
-            base_layer: int = 512
-            fourier_features: bool = True
-            num_modes: int = 5
-            activation: str = "relu"
-
-        model_type: str = "combined"
-        activation: str = "relu"
-        interp_res: Sequence = (64, 64, 64)  # Smaller for testing
-        use_sdf_in_basis_func: bool = True
-        positional_encoding: bool = False
-        surface_neighbors: bool = True
-        num_neighbors_surface: int = 7
-        num_neighbors_volume: int = 7
-        use_surface_normals: bool = True
-        use_surface_area: bool = True
-        encode_parameters: bool = False
-        combine_volume_surface: bool = False
-        geometry_encoding_type: str = "both"
-        solution_calculation_mode: str = "two-loop"
-        geometry_rep = geometry_rep
-        nn_basis_functions = nn_basis_functions
-        aggregation_model = aggregation_model
-        position_encoder = position_encoder
-        geometry_local = geometry_local
-
-    return model_params
-
-
 def validate_output_shape_and_values(output, expected_shape, check_finite=True):
     """Validate output tensor shape and values"""
     if output is not None:

From 5732199eb9783b6065b68c27a6ee4e17cc309f77 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 22 Sep 2025 16:50:13 +0000
Subject: [PATCH 38/98] This commit purges some code that was moved into
 another commit.

---
 .../distributed/shard_utils/__init__.py       |   2 -
 physicsnemo/distributed/shard_utils/knn.py    | 212 ------------------
 .../distributed/shard_utils/mesh_ops.py       | 146 ------------
 test/distributed/shard_tensor/ops/test_knn.py |  72 ------
 test/distributed/shard_tensor/ops/test_sdf.py | 119 ----------
 5 files changed, 551 deletions(-)
 delete mode 100644 physicsnemo/distributed/shard_utils/knn.py
 delete mode 100644 physicsnemo/distributed/shard_utils/mesh_ops.py
 delete mode 100644 test/distributed/shard_tensor/ops/test_knn.py
 delete mode 100644 test/distributed/shard_tensor/ops/test_sdf.py

diff --git a/physicsnemo/distributed/shard_utils/__init__.py b/physicsnemo/distributed/shard_utils/__init__.py
index e332307869..ef7b219aa2 100644
--- a/physicsnemo/distributed/shard_utils/__init__.py
+++ b/physicsnemo/distributed/shard_utils/__init__.py
@@ -37,8 +37,6 @@ def register_shard_wrappers():
             sharded_select_backward_helper,
             sharded_select_helper,
         )
-        from .knn import knn_sharded_wrapper
-        from .mesh_ops import sharded_signed_distance_field_wrapper
 
         # Currently disabled until wrapt is removed
         # from .natten_patches import na2d_wrapper
diff --git a/physicsnemo/distributed/shard_utils/knn.py b/physicsnemo/distributed/shard_utils/knn.py
deleted file mode 100644
index 689223000c..0000000000
--- a/physicsnemo/distributed/shard_utils/knn.py
+++ /dev/null
@@ -1,212 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any, Callable
-
-import numpy as np
-import torch
-import torch.distributed as dist
-
-from physicsnemo.utils.neighbors.knn._cuml_impl import knn_impl
-from physicsnemo.utils.version_check import check_module_requirements
-
-check_module_requirements("physicsnemo.distributed.shard_tensor")
-
-from physicsnemo.distributed import ShardTensor  # noqa: E402
-from physicsnemo.distributed.shard_utils.patch_core import (  # noqa: E402
-    MissingShardPatch,
-)
-from physicsnemo.distributed.shard_utils.ring import (  # noqa: E402
-    RingPassingConfig,
-    perform_ring_iteration,
-)
-
-
-def ring_knn(
-    points: ShardTensor, queries: ShardTensor, k: int
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """
-    Ring based kNN implementation, where the points travel around a ring and the
-    queries stay local.
-    """
-    # Each tensor has a _spec attribute, which contains information about the tensor's placement
-    # and the devices it lives on:
-    points_spec = points._spec
-    queries_spec = queries._spec
-
-    # ** In general ** you want to do some checking on the placements, since each
-    # point cloud might be sharded differently.  By construction, I know they're both
-    # sharded along the points axis here (and not, say, replicated).
-
-    if not points_spec.mesh == queries_spec.mesh:
-        raise NotImplementedError("Tensors must be sharded on the same mesh")
-
-    mesh = points_spec.mesh
-    local_group = mesh.get_group(0)
-    local_size = dist.get_world_size(group=local_group)
-    mesh_rank = mesh.get_local_rank()
-
-    # points and queries are both sharded - and since we're returning the nearest
-    # neighbors to points, let's make sure the output keeps that sharding too.
-
-    # One memory-efficient way to do this is with with a ring computation.
-    # We'll compute the knn on the local tensors, get the distances and outputs,
-    # then shuffle the queries shards along the mesh.
-
-    # we'll need to sort the results and make sure we have just the top-k,
-    # which is a little extra computation.
-
-    # Physics nemo has a ring passing utility we can use.
-    ring_config = RingPassingConfig(
-        mesh_dim=0,
-        mesh_size=local_size,
-        ring_direction="forward",
-        communication_method="p2p",
-    )
-
-    local_points, local_queries = points.to_local(), queries.to_local()
-    current_dists = None
-    current_topk_idx = None
-
-    points_spec = points._spec
-
-    points_sharding_shapes = points_spec.sharding_shapes()[0]
-
-    sharding_dim = points_spec.placements[0].dim
-
-    # This is to help specify the offset from local to global tensor.
-    points_strides_along_ring = [s[sharding_dim] for s in points_sharding_shapes]
-    points_strides_along_ring = np.cumsum(points_strides_along_ring)
-    points_strides_along_ring = [
-        0,
-    ] + list(points_strides_along_ring[0:-1])
-
-    for i in range(local_size):
-        source_rank = (mesh_rank - i) % local_size
-
-        # For point clouds, we need to pass the size of the incoming shard.
-        next_source_rank = (source_rank - 1) % local_size
-        recv_shape = points_sharding_shapes[next_source_rank]
-        if i != local_size - 1:
-            # Don't do a ring on the last iteration.
-            next_local_points = perform_ring_iteration(
-                local_points,
-                mesh,
-                ring_config,
-                recv_shape=recv_shape,
-            )
-
-        # Compute the knn on the local tensors:
-        local_idx, local_distances = knn_impl(local_points, local_queries, k)
-
-        # The local_idx indexes into the _local_ tensor, but for
-        # Correctness we need it to index into the _global_ tensor.
-        # Make sure to index using the rank the points came from!
-        offset = points_strides_along_ring[source_rank]
-        local_idx = local_idx + offset
-
-        if current_dists is None:
-            current_dists = local_distances
-            current_topk_idx = local_idx
-        else:
-            # Combine with the topk so far:
-            current_dists = torch.cat([current_dists, local_distances], dim=1)
-            current_topk_idx = torch.cat([current_topk_idx, local_idx], dim=1)
-            # And take the topk again:
-            current_dists, running_indexes = torch.topk(
-                current_dists, k=k, dim=1, sorted=True, largest=False
-            )
-
-            # This creates proper indexing to select specific elements along dim 1
-
-            current_topk_idx = torch.gather(current_topk_idx, 1, running_indexes)
-
-        if i != local_size - 1:
-            # Don't do a ring on the last iteration.
-            local_points = next_local_points
-
-    return current_topk_idx, current_dists
-
-
-def extract_knn_args(points, queries, k, *args, **kwargs):
-    return points, queries, k
-
-
-def knn_sharded_wrapper(
-    func: Callable, types: Any, args: tuple, kwargs: dict
-) -> tuple[ShardTensor, ShardTensor]:
-    """
-    Dispatch the proper kNN tools based on the input sharding.
-    """
-
-    points, queries, k = extract_knn_args(*args, **kwargs)
-
-    # kNN will only work with 1D sharding
-    if points._spec.mesh != queries._spec.mesh:
-        raise MissingShardPatch(
-            "sharded knn: All point inputs must be on the same mesh"
-        )
-
-    # make sure all meshes are 1D
-    if points._spec.mesh.ndim != 1:
-        raise MissingShardPatch(
-            "point_cloud_ops.radius_search_wrapper: All point inputs must be on 1D meshes"
-        )
-
-    # Do we need a ring?
-    points_placement = points._spec.placements[0]
-
-    if points_placement.is_shard():
-        # We need a ring
-        idx, distances = ring_knn(points, queries, k)
-    else:
-        # No ring is needed.  Get the local tensors and compute directly:
-        local_points = points.to_local()  # This is replicated, getting all of it
-        local_queries = queries.to_local()  # This sharding doesn't matter!
-        idx, distances = knn_impl(local_points, local_queries, k)
-
-    # The outputs only depend on the local queries shape
-    input_queries_spec = queries._spec
-    # The global output tensor will be (N_q, k)
-
-    output_queries_shard_shapes = {
-        mesh_dim: tuple(
-            torch.Size((s[0], k))
-            for s in input_queries_spec.sharding_shapes()[mesh_dim]
-        )
-        for mesh_dim in input_queries_spec.sharding_shapes().keys()
-    }
-
-    # Convert the selected points and indexes to shards:
-    shard_idx = ShardTensor.from_local(
-        idx,
-        queries._spec.mesh,
-        queries._spec.placements,
-        sharding_shapes=output_queries_shard_shapes,
-    )
-    shard_distances = ShardTensor.from_local(
-        distances,
-        queries._spec.mesh,
-        queries._spec.placements,
-        sharding_shapes=output_queries_shard_shapes,
-    )
-
-    return shard_idx, shard_distances
-
-
-ShardTensor.register_named_function_handler(
-    "physicsnemo.knn_cuml.default", knn_sharded_wrapper
-)
diff --git a/physicsnemo/distributed/shard_utils/mesh_ops.py b/physicsnemo/distributed/shard_utils/mesh_ops.py
deleted file mode 100644
index c04ad66a89..0000000000
--- a/physicsnemo/distributed/shard_utils/mesh_ops.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any
-
-import torch
-
-from physicsnemo.utils.sdf import signed_distance_field
-from physicsnemo.utils.version_check import check_module_requirements
-
-check_module_requirements("physicsnemo.distributed.shard_tensor")
-
-
-from physicsnemo.distributed import ShardTensor  # noqa: E402
-
-
-def sharded_signed_distance_field(
-    mesh_vertices: ShardTensor,
-    mesh_indices: ShardTensor,
-    input_points: ShardTensor,
-    max_dist: float = 1e8,
-    use_sign_winding_number: bool = False,
-) -> tuple[ShardTensor, ShardTensor]:
-    """
-    Compute the signed distance field for a (possibly sharded) mesh.
-
-    Args:
-        mesh_vertices: Sharded tensor of mesh vertices
-        mesh_indices: Sharded tensor of mesh indices
-        input_points: Sharded tensor of input points
-        max_dist: Maximum distance for the signed distance field
-        use_sign_winding_number: Whether to use sign winding number
-    """
-
-    # We can not actually compute the signed distance function on a sharded mesh.
-    # So, in this case, force the mesh to replicate placement if necessary:
-
-    local_mesh_vertices = mesh_vertices.full_tensor()
-    local_mesh_indices = mesh_indices.full_tensor()
-
-    # For the input points, though, it doesn't matter - they can be sharded.
-    # No communication is necessary
-
-    local_input_points = input_points.to_local()
-
-    local_sdf, local_sdf_hit_point = signed_distance_field(
-        local_mesh_vertices,
-        local_mesh_indices,
-        local_input_points,
-        max_dist,
-        use_sign_winding_number,
-    )
-
-    # Then, construct the output shard tensors:
-
-    if input_points._spec.placements[0].is_shard():
-        # Compute the output sharding shapes
-
-        # Output shape is always (N, 1), hit point is (N, 3)
-        input_shard_shapes = input_points._spec.sharding_shapes()
-
-        output_shard_shapes = {
-            mesh_dim: tuple(torch.Size((s[0],)) for s in input_shard_shapes[mesh_dim])
-            for mesh_dim in input_shard_shapes.keys()
-        }
-
-        sharded_sdf_output = ShardTensor.from_local(
-            local_sdf,
-            input_points._spec.mesh,
-            input_points._spec.placements,
-            sharding_shapes=output_shard_shapes,
-        ).reshape(input_points.shape[:-1])
-
-        sharded_sdf_hit_point_output = ShardTensor.from_local(
-            local_sdf_hit_point,
-            input_points._spec.mesh,
-            input_points._spec.placements,
-            sharding_shapes=input_shard_shapes,
-        ).reshape(input_points.shape)
-
-    else:
-        # The input points were replicated, use that for output:
-        sharded_sdf_output = ShardTensor.from_local(
-            local_sdf,
-            input_points._spec.mesh,
-            input_points._spec.placements,
-        )
-        sharded_sdf_hit_point_output = ShardTensor.from_local(
-            local_sdf_hit_point,
-            input_points._spec.mesh,
-            input_points._spec.placements,
-        )
-
-    return sharded_sdf_output, sharded_sdf_hit_point_output
-
-
-def repackage_radius_search_wrapper_args(
-    mesh_vertices: torch.Tensor,
-    mesh_indices: torch.Tensor,
-    input_points: torch.Tensor,
-    max_dist: float = 1e8,
-    use_sign_winding_number: bool = False,
-    *args,
-    **kwargs,
-) -> tuple[ShardTensor, ShardTensor, dict]:
-    """Repackages sdf arguments into a standard format."""
-    # Extract any additional parameters that might be in kwargs
-    # or use defaults if not provided
-    return_kwargs = {
-        "max_dist": max_dist,
-        "use_sign_winding_number": use_sign_winding_number,
-    }
-
-    # Add any explicitly passed parameters
-    if kwargs:
-        return_kwargs.update(kwargs)
-
-    return mesh_vertices, mesh_indices, input_points, return_kwargs
-
-
-def sharded_signed_distance_field_wrapper(
-    func: Any, type: Any, args: tuple, kwargs: dict
-) -> tuple[ShardTensor, ShardTensor]:
-    """
-    Wrapper for sharded_signed_distance_field to support sharded tensors.
-    """
-
-    return sharded_signed_distance_field(*args, **kwargs)
-
-
-ShardTensor.register_named_function_handler(
-    "physicsnemo.signed_distance_field.default", sharded_signed_distance_field_wrapper
-)
diff --git a/test/distributed/shard_tensor/ops/test_knn.py b/test/distributed/shard_tensor/ops/test_knn.py
deleted file mode 100644
index b41b62748a..0000000000
--- a/test/distributed/shard_tensor/ops/test_knn.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-import torch
-from torch.distributed.tensor.placement_types import Replicate, Shard
-
-from physicsnemo.distributed import DistributedManager, scatter_tensor
-from physicsnemo.utils.neighbors import knn
-
-from .utils import numerical_shard_tensor_check
-
-
-class kNNModule(torch.nn.Module):
-    def __init__(
-        self,
-        num_neighbors=4,
-    ):
-        super().__init__()
-
-        self.num_neighbors = num_neighbors
-
-    def forward(self, points, queries):
-        return knn(points, queries, self.num_neighbors)
-
-
-@pytest.mark.multigpu_static
-@pytest.mark.parametrize("scatter_points", [True, False])
-@pytest.mark.parametrize("scatter_queries", [True, False])
-def test_knn_1dmesh(
-    distributed_mesh,
-    scatter_points: bool,
-    scatter_queries: bool,
-):
-    dm = DistributedManager()
-
-    # Generate random points for the points and queries
-    points = torch.randn(1043, 3).to(dm.device)
-    queries = torch.randn(2198, 3).to(dm.device)
-
-    # points = torch.randn(10, 3).to(dm.device)
-    # queries = torch.randn(8, 3).to(dm.device)
-
-    # Distribute the inputs:
-    points_placements = (Shard(0),) if scatter_points else (Replicate(),)
-    queries_placements = (Shard(0),) if scatter_queries else (Replicate(),)
-
-    sharded_points = scatter_tensor(points, 0, distributed_mesh, points_placements)
-    sharded_queries = scatter_tensor(queries, 0, distributed_mesh, queries_placements)
-
-    module = kNNModule()
-
-    numerical_shard_tensor_check(
-        distributed_mesh,
-        module,
-        [sharded_points, sharded_queries],
-        {},
-        check_grads=False,
-    )
diff --git a/test/distributed/shard_tensor/ops/test_sdf.py b/test/distributed/shard_tensor/ops/test_sdf.py
deleted file mode 100644
index 079127a229..0000000000
--- a/test/distributed/shard_tensor/ops/test_sdf.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import pytest
-import torch
-from scipy.spatial import ConvexHull
-from torch.distributed.tensor.placement_types import Replicate, Shard
-
-from physicsnemo.distributed import DistributedManager, scatter_tensor
-from physicsnemo.utils.sdf import signed_distance_field
-
-from .utils import numerical_shard_tensor_check
-
-
-# This is from the domino datapipe, too:
-def random_sample_on_unit_sphere(n_points):
-    # Random points on the sphere:
-    phi = np.random.uniform(0, 2 * np.pi, n_points)
-    cos_theta = np.random.uniform(-1, 1, n_points)
-    theta = np.arccos(cos_theta)
-
-    # Convert to x/y/z and stack:
-    x = np.sin(theta) * np.cos(phi)
-    y = np.sin(theta) * np.sin(phi)
-    z = np.cos(theta)
-    points = np.stack([x, y, z], axis=1)
-    return points
-
-
-def mesh_vertices_and_indices(n_points):
-    # We are generating a mesh on a random sphere.
-    stl_points = random_sample_on_unit_sphere(n_points)
-
-    # Generate the triangles with ConvexHull:
-    hull = ConvexHull(stl_points)
-    faces = hull.simplices  # (M, 3)
-
-    return stl_points, faces
-
-
-class SDFModule(torch.nn.Module):
-    """
-    This is a test module to run the SDF function ... don't use it elsewhere.
-    """
-
-    def __init__(self, max_dist=1e8, use_sign_winding_number=False):
-        super().__init__()
-
-        self.max_dist = max_dist
-        self.use_sign_winding_number = use_sign_winding_number
-
-    def forward(self, mesh_vertices, mesh_indices, input_points):
-        return signed_distance_field(
-            mesh_vertices,
-            mesh_indices,
-            input_points,
-            self.max_dist,
-            self.use_sign_winding_number,
-        )
-
-
-@pytest.mark.multigpu_static
-@pytest.mark.parametrize("scatter_mesh", [True, False])
-@pytest.mark.parametrize("scatter_inputs", [True, False])
-def test_sdf_1dmesh(
-    distributed_mesh,
-    scatter_mesh: bool,
-    scatter_inputs: bool,
-):
-    dm = DistributedManager()
-
-    # Generate a mesh on a unit sphere:
-    mesh_vertices, mesh_indices = mesh_vertices_and_indices(932)
-
-    # Cast the vertices and indices to tensors:
-    mesh_vertices = torch.tensor(mesh_vertices).to(dm.device)
-    mesh_indices = torch.tensor(mesh_indices.flatten()).to(dm.device)
-
-    # Distribute the inputs:
-    mesh_placements = (Shard(0),) if scatter_mesh else (Replicate(),)
-    input_placements = (Shard(0),) if scatter_inputs else (Replicate(),)
-
-    sharded_mesh_vertices = scatter_tensor(
-        mesh_vertices, 0, distributed_mesh, mesh_placements
-    )
-    sharded_mesh_indices = scatter_tensor(
-        mesh_indices, 0, distributed_mesh, mesh_placements
-    )
-
-    # Generate random points in the volume:
-    input_points = torch.randn(1043, 3).to(dm.device)
-
-    sharded_input_points = scatter_tensor(
-        input_points, 0, distributed_mesh, input_placements
-    )
-
-    module = SDFModule()
-
-    numerical_shard_tensor_check(
-        distributed_mesh,
-        module,
-        [sharded_mesh_vertices, sharded_mesh_indices, sharded_input_points],
-        {},
-        check_grads=False,
-    )

From b2b10ad6a1d2895b84fb65d5ba0f1b450ac07dad Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 22 Sep 2025 16:51:32 +0000
Subject: [PATCH 39/98] Missed a piece of moved code.

---
 test/distributed/shard_tensor/ops/utils.py | 44 ----------------------
 1 file changed, 44 deletions(-)

diff --git a/test/distributed/shard_tensor/ops/utils.py b/test/distributed/shard_tensor/ops/utils.py
index cdece254e7..19e3de4d73 100644
--- a/test/distributed/shard_tensor/ops/utils.py
+++ b/test/distributed/shard_tensor/ops/utils.py
@@ -18,7 +18,6 @@
 from collections.abc import Iterable
 
 import torch
-import torch.distributed as dist
 from torch.distributed.tensor import DTensor, distribute_module
 from torch.distributed.tensor.device_mesh import DeviceMesh
 
@@ -84,18 +83,6 @@ def sharded_to_local(container):
 def default_tensor_comparison(output, d_output, atol, rtol):
     # We assume a single output!
 
-    if not isinstance(output, torch.Tensor):
-        if isinstance(output, Iterable):
-            return all(
-                [
-                    default_tensor_comparison(item, d_item, atol, rtol)
-                    for item, d_item in zip(output, d_output)
-                ]
-            )
-
-    if isinstance(d_output, ShardTensor):
-        validate_shard_tensor_spec(d_output)
-
     local_output = sharded_to_local(d_output)
 
     # Check forward agreement:
@@ -108,37 +95,6 @@ def default_loss_fn(output):
     return output.mean()
 
 
-def validate_shard_tensor_spec(shard_tensor):
-    # Take a shard tensor and cross check on the dimensions.
-    # Take care about assertions here, since this is a collective
-
-    # Check out shard shapes
-    # The local shard shape needs to match the local tensor shape:
-    sharding_shapes = shard_tensor._spec.sharding_shapes()
-    mesh = shard_tensor._spec.mesh
-
-    for mesh_dim in range(mesh.ndim):
-        mesh_rank = mesh.get_local_rank(mesh_dim)
-        mesh_size = dist.get_world_size(mesh.get_group(mesh_dim))
-
-        # Is this axis sharded?
-        this_placement = shard_tensor._spec.placements[mesh_dim]
-        if this_placement.is_shard():
-            # This axis is sharded.  the mesh dim should be in the shapes
-            assert mesh_dim in sharding_shapes.keys()
-
-            # The length of the sharding shapes should match the mesh size:
-            assert len(sharding_shapes[mesh_dim]) == mesh_size
-
-            # The local shape should match the listed shape for this rank:
-            # this_shape = shard_tensor._spec.sharding_shapes()[mesh_dim]
-            # print(f"local tensor shape: {shard_tensor._local_tensor.shape}")
-            # print(f"sharding shapes: {sharding_shapes[mesh_dim][mesh_rank]}")
-            assert (
-                sharding_shapes[mesh_dim][mesh_rank] == shard_tensor._local_tensor.shape
-            )
-
-
 def numerical_shard_tensor_check(
     mesh: DeviceMesh,
     module: torch.nn.Module,

From 8a34242b0f9309da2edd495684ffb4c289fda4fd Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 22 Sep 2025 17:59:06 +0000
Subject: [PATCH 40/98] move sdf, knn, and radius_search torch interface and
 stream fixes to a different PR

---
 physicsnemo/utils/neighbors/knn/_cuml_impl.py |  11 +-
 .../neighbors/radius_search/_warp_impl.py     | 167 +++++++++---------
 physicsnemo/utils/sdf.py                      | 157 ++++++++--------
 3 files changed, 158 insertions(+), 177 deletions(-)

diff --git a/physicsnemo/utils/neighbors/knn/_cuml_impl.py b/physicsnemo/utils/neighbors/knn/_cuml_impl.py
index 72546cf6a7..10d20ce1f5 100644
--- a/physicsnemo/utils/neighbors/knn/_cuml_impl.py
+++ b/physicsnemo/utils/neighbors/knn/_cuml_impl.py
@@ -28,21 +28,12 @@
     def knn_impl(
         points: torch.Tensor, queries: torch.Tensor, k: int = 3
     ) -> tuple[torch.Tensor, torch.Tensor]:
-        # Create a cuml handle to ensure we use the right stream:
-        torch_stream = torch.cuda.current_stream()
-
-        # Get the raw CUDA stream pointer (as an integer)
-        ptr = torch_stream.cuda_stream
-
-        # Build a cuML handle with that stream
-        handle = cuml.Handle(stream=ptr)
-
         # Use dlpack to move the data without copying between pytorch and cuml:
         points = cp.from_dlpack(points)
         queries = cp.from_dlpack(queries)
 
         # Construct the knn:
-        knn = cuml.neighbors.NearestNeighbors(n_neighbors=k, handle=handle)
+        knn = cuml.neighbors.NearestNeighbors(n_neighbors=k)
         # First pass partitions everything in points to make lookups fast
         knn.fit(points)
 
diff --git a/physicsnemo/utils/neighbors/radius_search/_warp_impl.py b/physicsnemo/utils/neighbors/radius_search/_warp_impl.py
index 997f95d3fd..9b15b6816f 100644
--- a/physicsnemo/utils/neighbors/radius_search/_warp_impl.py
+++ b/physicsnemo/utils/neighbors/radius_search/_warp_impl.py
@@ -278,6 +278,11 @@ def radius_search_impl(
         if points.device != queries.device:
             raise ValueError("points and queries must be on the same device")
 
+        # We're in the warp-backended regime.  So, the first thing to do is to convert these torch tensors to warp
+        # These are readonly in warp, allocated with pytorch.
+        wp_points = wp.from_torch(points, dtype=wp.vec3)
+        wp_queries = wp.from_torch(queries, dtype=wp.vec3, return_ctype=True)
+
         N_queries = len(queries)
 
         # Compute follows data.
@@ -292,104 +297,92 @@ def radius_search_impl(
             wp_launch_stream = None
             wp_launch_device = "cpu"  # CPUs have no streams
 
-        with wp.ScopedStream(wp_launch_stream):
-            # We're in the warp-backended regime.  So, the first thing to do is to convert these torch tensors to warp
-            # These are readonly in warp, allocated with pytorch.
-            wp_points = wp.from_torch(points, dtype=wp.vec3)
-            wp_queries = wp.from_torch(queries, dtype=wp.vec3, return_ctype=True)
+        # We need to create a hash grid:
+        grid = wp.HashGrid(dim_x=128, dim_y=128, dim_z=128, device=wp_points.device)
+        grid.reserve(N_queries)
+        grid.build(points=wp_points, radius=0.5 * radius)
 
-            # We need to create a hash grid:
-            grid = wp.HashGrid(dim_x=128, dim_y=128, dim_z=128, device=wp_points.device)
-            grid.reserve(N_queries)
-            grid.build(points=wp_points, radius=0.5 * radius)
+        # Now, the situations diverge based on max_points.
 
-            # Now, the situations diverge based on max_points.
+        if max_points is None:
+            total_count, wp_offset = count_neighbors(
+                grid,
+                wp_points,
+                wp_queries,
+                wp_launch_device,
+                wp_launch_stream,
+                radius,
+                N_queries,
+            )
 
-            if max_points is None:
-                total_count, wp_offset = count_neighbors(
-                    grid,
-                    wp_points,
-                    wp_queries,
-                    wp_launch_device,
-                    wp_launch_stream,
-                    radius,
-                    N_queries,
+            if not total_count < 2**31 - 1:
+                raise RuntimeError(
+                    f"Total found neighbors is too large: {total_count} > 2**31 - 1"
                 )
 
-                if not total_count < 2**31 - 1:
-                    raise RuntimeError(
-                        f"Total found neighbors is too large: {total_count} > 2**31 - 1"
-                    )
+            return gather_neighbors(
+                grid,
+                points.device,
+                wp_points,
+                wp_queries,
+                wp_offset,
+                wp_launch_device,
+                wp_launch_stream,
+                radius,
+                N_queries,
+                return_dists,
+                return_points,
+                total_count,
+            )
 
-                return gather_neighbors(
-                    grid,
-                    points.device,
-                    wp_points,
-                    wp_queries,
-                    wp_offset,
-                    wp_launch_device,
-                    wp_launch_stream,
-                    radius,
-                    N_queries,
-                    return_dists,
-                    return_points,
-                    total_count,
+        else:
+            # With a fixed number of output points, we have no need for a second kernel.
+            indices = torch.full(
+                (N_queries, max_points), 0, dtype=torch.int32, device=points.device
+            )
+            if return_dists:
+                distances = torch.zeros(
+                    (N_queries, max_points), dtype=torch.float32, device=points.device
                 )
-
             else:
-                # With a fixed number of output points, we have no need for a second kernel.
-                indices = torch.full(
-                    (N_queries, max_points), 0, dtype=torch.int32, device=points.device
+                distances = torch.empty(0, dtype=torch.float32, device=points.device)
+            num_neighbors = torch.zeros(
+                (N_queries,), dtype=torch.int32, device=points.device
+            )
+
+            if return_points:
+                points = torch.zeros(
+                    (len(queries), max_points, 3),
+                    dtype=torch.float32,
+                    device=points.device,
                 )
-                if return_dists:
-                    distances = torch.zeros(
-                        (N_queries, max_points),
-                        dtype=torch.float32,
-                        device=points.device,
-                    )
-                else:
-                    distances = torch.empty(
-                        0, dtype=torch.float32, device=points.device
-                    )
-                num_neighbors = torch.zeros(
-                    (N_queries,), dtype=torch.int32, device=points.device
+            else:
+                points = torch.empty(
+                    (0, max_points, 3), dtype=torch.float32, device=points.device
                 )
+            # This kernel selects up to max_points hits per query.
+            # It is not necessarily deterministic.
+            # If the number of matches > max_points, you may get different results.
 
-                if return_points:
-                    points = torch.zeros(
-                        (len(queries), max_points, 3),
-                        dtype=torch.float32,
-                        device=points.device,
-                    )
-                else:
-                    points = torch.empty(
-                        (0, max_points, 3), dtype=torch.float32, device=points.device
-                    )
-                # This kernel selects up to max_points hits per query.
-                # It is not necessarily deterministic.
-                # If the number of matches > max_points, you may get different results.
-
-                wp.launch(
-                    kernel=radius_search_limited_select,
-                    dim=N_queries,
-                    inputs=[
-                        grid.id,
-                        wp_points,
-                        wp_queries,
-                        max_points,
-                        radius,
-                        wp.from_torch(indices, return_ctype=True),
-                        wp.from_torch(num_neighbors, return_ctype=True),
-                        return_dists,
-                        wp.from_torch(distances, return_ctype=True),
-                        return_points,
-                        wp.from_torch(points, return_ctype=True)
-                        if return_points
-                        else None,
-                    ],
-                    stream=wp_launch_stream,
-                    device=wp_launch_device,
-                )
+            wp.launch(
+                kernel=radius_search_limited_select,
+                dim=N_queries,
+                inputs=[
+                    grid.id,
+                    wp_points,
+                    wp_queries,
+                    max_points,
+                    radius,
+                    wp.from_torch(indices, return_ctype=True),
+                    wp.from_torch(num_neighbors, return_ctype=True),
+                    return_dists,
+                    wp.from_torch(distances, return_ctype=True),
+                    return_points,
+                    wp.from_torch(points, return_ctype=True) if return_points else None,
+                ],
+                stream=wp_launch_stream,
+                device=wp_launch_device,
+            )
 
         # Handle the matrix of return values:
         return indices, points, distances, num_neighbors
diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py
index 9abbafa460..a095074a96 100644
--- a/physicsnemo/utils/sdf.py
+++ b/physicsnemo/utils/sdf.py
@@ -14,7 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import torch
+import cupy as cp
+import numpy as np
 import warp as wp
 
 wp.config.quiet = True
@@ -27,6 +28,7 @@ def _bvh_query_distance(
     max_dist: wp.float32,
     sdf: wp.array(dtype=wp.float32),
     sdf_hit_point: wp.array(dtype=wp.vec3f),
+    sdf_hit_point_id: wp.array(dtype=wp.int32),
     use_sign_winding_number: bool = False,
 ):
     """
@@ -65,16 +67,22 @@ def _bvh_query_distance(
 
     sdf[tid] = res.sign * wp.abs(wp.length(points[tid] - p_closest))
     sdf_hit_point[tid] = p_closest
+    sdf_hit_point_id[tid] = res.face
+
+
+Array = np.ndarray | cp.ndarray
 
 
-@torch.library.custom_op("physicsnemo::signed_distance_field", mutates_args=())
 def signed_distance_field(
-    mesh_vertices: torch.Tensor,
-    mesh_indices: torch.Tensor,
-    input_points: torch.Tensor,
+    mesh_vertices: Array,
+    mesh_indices: Array,
+    input_points: Array,
     max_dist: float = 1e8,
+    include_hit_points: bool = False,
+    include_hit_points_id: bool = False,
     use_sign_winding_number: bool = False,
-) -> tuple[torch.Tensor, torch.Tensor]:
+    return_cupy: bool | None = None,
+) -> Array | tuple[Array, ...]:
     """
     Computes the signed distance field (SDF) for a given mesh and input points.
 
@@ -92,7 +100,11 @@ def signed_distance_field(
         max_dist (float, optional): Maximum distance within which
             to search for the closest point on the mesh. Default is 1e8.
         include_hit_points (bool, optional): Whether to include hit points in
-            the output. Here,
+            the output. Here, "hit points" are the points on the mesh that are
+            closest to the input points, and hence, are defining the SDF.
+            Default is False.
+        include_hit_points_id (bool, optional): Whether to include hit point
+            IDs in the output. Default is False.
         use_sign_winding_number (bool, optional): Whether to use sign winding
             number method for SDF. Default is False. If False, your mesh should
             be watertight to obtain correct results.
@@ -103,103 +115,88 @@ def signed_distance_field(
     Returns:
     -------
     Returns:
-        tuple[torch.Tensor, torch.Tensor] of:
-            - signed distance to the mesh, per input point
-            - hith point, per input point. "hit points" are the points on the
-              mesh that are closest to the input points, and hence, are
-              defining the SDF.
+        np.ndarray | cp.ndarray or tuple:
+            - If both `include_hit_points` and `include_hit_points_id` are False
+              (default), returns a 1D array of signed distances for each input
+              point.
+            - If `include_hit_points` is True, returns a tuple: (sdf,
+              hit_points), where `hit_points` contains the closest mesh point
+              for each input point.
+            - If `include_hit_points_id` is True, returns a tuple: (sdf,
+              hit_point_ids), where `hit_point_ids` contains the face index of
+              the closest mesh face for each input point.
+            - If both `include_hit_points` and `include_hit_points_id` are True,
+              returns a tuple: (sdf, hit_points, hit_point_ids).
+            - The returned array type (NumPy or CuPy) is determined by the
+            `return_cupy` argument, or inferred from the input arrays.
 
     Example:
     -------
     >>> mesh_vertices = [(0, 0, 0), (1, 0, 0), (0, 1, 0)]
-    >>> mesh_indices = torch.tensor((0, 1, 2))
-    >>> input_points = torch.tensor((0.5, 0.5, 0.5))
+    >>> mesh_indices = np.array((0, 1, 2))
+    >>> input_points = [(0.5, 0.5, 0.5)]
     >>> signed_distance_field(mesh_vertices, mesh_indices, input_points)
-    (tensor([0.5]), tensor([0.5, 0.5, 0.5]))
+    array([0.5], dtype=float32)
     """
+    if return_cupy is None:
+        return_cupy = any(
+            isinstance(arr, cp.ndarray)
+            for arr in (mesh_vertices, mesh_indices, input_points)
+        )
 
-    if input_points.shape[-1] != 3:
-        raise ValueError("Input points must be a tensor with last dimension of size 3")
-
-    input_shape = input_points.shape
-
-    # Flatten the input points:
-    input_points = input_points.reshape(-1, 3)
-
-    N = len(input_points)
+    wp.init()
 
-    # Allocate output tensors with torch:
-    sdf = torch.zeros(N, dtype=torch.float32, device=input_points.device)
-    sdf_hit_point = torch.zeros(N, 3, dtype=torch.float32, device=input_points.device)
+    if isinstance(mesh_vertices, cp.ndarray):
+        device = mesh_vertices.device
+        wp_device = f"cuda:{device.id}"
+    else:
+        wp_device = wp.get_device()
 
-    if input_points.device.type == "cuda":
-        wp_launch_stream = wp.stream_from_torch(
-            torch.cuda.current_stream(input_points.device)
+    with wp.ScopedDevice(wp_device):
+        mesh = wp.Mesh(
+            points=wp.array(mesh_vertices, dtype=wp.vec3f, device=wp_device),
+            indices=wp.array(mesh_indices, dtype=wp.int32, device=wp_device),
         )
-        wp_launch_device = None  # We explicitly pass None if using the stream.
-    else:
-        wp_launch_stream = None
-        wp_launch_device = "cpu"  # CPUs have no streams
 
-    with wp.ScopedStream(wp_launch_stream):
-        wp.init()
+        warp_input_points = wp.array(input_points, dtype=wp.vec3f, device=wp_device)
 
-        # zero copy the vertices, indices, and input points to warp:
-        wp_vertices = wp.from_torch(mesh_vertices.to(torch.float32), dtype=wp.vec3)
-        wp_indices = wp.from_torch(mesh_indices.to(torch.int32), dtype=wp.int32)
-        wp_input_points = wp.from_torch(input_points.to(torch.float32), dtype=wp.vec3)
+        N = len(warp_input_points)
 
-        # Convert output points:
-        wp_sdf = wp.from_torch(sdf, dtype=wp.float32)
-        wp_sdf_hit_point = wp.from_torch(sdf_hit_point, dtype=wp.vec3f)
-
-        mesh = wp.Mesh(
-            points=wp_vertices,
-            indices=wp_indices,
-            support_winding_number=use_sign_winding_number,
-        )
+        sdf = wp.empty(shape=(N,), dtype=wp.float32, device=wp_device)
+        sdf_hit_point = wp.empty(shape=(N,), dtype=wp.vec3f, device=wp_device)
+        sdf_hit_point_id = wp.empty(shape=(N,), dtype=wp.int32, device=wp_device)
 
         wp.launch(
             kernel=_bvh_query_distance,
             dim=N,
             inputs=[
                 mesh.id,
-                wp_input_points,
+                warp_input_points,
                 max_dist,
-                wp_sdf,
-                wp_sdf_hit_point,
+                sdf,
+                sdf_hit_point,
+                sdf_hit_point_id,
                 use_sign_winding_number,
             ],
-            device=wp_launch_device,
-            stream=wp_launch_stream,
+            device=wp_device,
         )
 
-    # Unflatten the output to be like the input:
-    sdf = sdf.reshape(input_shape[:-1])
-    sdf_hit_point = sdf_hit_point.reshape(input_shape)
-
-    return sdf.to(input_points.dtype), sdf_hit_point.to(input_points.dtype)
+        def convert(array: wp.array) -> np.ndarray | cp.ndarray:
+            """Converts a Warp array to CuPy/NumPy based on the `return_cupy` flag."""
+            if return_cupy:
+                return cp.asarray(array)
+            else:
+                return array.numpy()
 
+        arrays_to_return: list[np.ndarray | cp.ndarray] = [convert(sdf)]
 
-@signed_distance_field.register_fake
-def _(
-    mesh_vertices: torch.Tensor,
-    mesh_indices: torch.Tensor,
-    input_points: torch.Tensor,
-    max_dist: float = 1e8,
-    use_sign_winding_number: bool = False,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    if mesh_vertices.device != input_points.device:
-        raise RuntimeError("mesh_vertices and input_points must be on the same device")
+        if include_hit_points:
+            arrays_to_return.append(convert(sdf_hit_point))
+        if include_hit_points_id:
+            arrays_to_return.append(convert(sdf_hit_point_id))
 
-    if mesh_vertices.device != mesh_indices.device:
-        raise RuntimeError("mesh_vertices and mesh_indices must be on the same device")
-
-    N = input_points.shape[0]
-
-    sdf_output = torch.empty(N, 1, device=input_points.device, dtype=input_points.dtype)
-    sdf_hit_point_output = torch.empty(
-        N, 3, device=input_points.device, dtype=input_points.dtype
-    )
-
-    return sdf_output, sdf_hit_point_output
+        return (
+            arrays_to_return[0]
+            if len(arrays_to_return) == 1
+            else tuple(arrays_to_return)
+        )

From 3c0a551fd15473bae039fbae9c4e2441d04032f1 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 22 Sep 2025 18:02:14 +0000
Subject: [PATCH 41/98] Move sdf test changes to a different PR

---
 test/utils/test_sdf.py | 51 ++++++++++++++++--------------------------
 1 file changed, 19 insertions(+), 32 deletions(-)

diff --git a/test/utils/test_sdf.py b/test/utils/test_sdf.py
index f449469b5a..107e5e0316 100644
--- a/test/utils/test_sdf.py
+++ b/test/utils/test_sdf.py
@@ -16,13 +16,12 @@
 # ruff: noqa: E402
 
 
-import pytest
-import torch
+import numpy as np
 from pytest_utils import import_or_fail
 
 
 def tet_verts(flip_x=1):
-    tet = torch.tensor(
+    tet = np.array(
         [
             flip_x * 0,
             0,
@@ -61,47 +60,35 @@ def tet_verts(flip_x=1):
             0,
             1,
         ],
-        dtype=torch.float64,
+        dtype=np.float64,
     )
 
     return tet
 
 
 @import_or_fail("warp")
-@pytest.mark.parametrize("dtype", [torch.float32, torch.float64])
-@pytest.mark.parametrize("device", ["cpu", "cuda"])
-def test_sdf(pytestconfig, dtype, device):
+def test_sdf(pytestconfig):
     from physicsnemo.utils.sdf import signed_distance_field
 
-    mesh_vertices = tet_verts().reshape(-1, 3)
+    tet = tet_verts()
 
-    if device == "cuda":
-        device = torch.device("cuda")
-    else:
-        device = torch.device("cpu")
-
-    mesh_indices = torch.tensor(
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=torch.int32
+    sdf_tet = signed_distance_field(
+        tet,
+        np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
+        np.array([1, 1, 1, 0.1, 0.1, 0.1], dtype=np.float64),
     )
-    input_points = torch.tensor([[1, 1, 1], [0.05, 0.1, 0.1]], dtype=torch.float64)
-
-    mesh_vertices = mesh_vertices.to(dtype)
-    input_points = input_points.to(dtype)
+    np.testing.assert_allclose(sdf_tet, [1.15470052, -0.1], atol=1e-7)
 
-    sdf_tet, sdf_hit_point = signed_distance_field(
-        mesh_vertices,
-        mesh_indices,
-        input_points,
-        use_sign_winding_number=False,
+    sdf_tet, sdf_hit_point, sdf_hit_point_id = signed_distance_field(
+        tet,
+        np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=np.int32),
+        np.array([1, 1, 1, 0.12, 0.11, 0.1], dtype=np.float64),
+        include_hit_points=True,
+        include_hit_points_id=True,
     )
-
-    expected_sdf = torch.tensor([[1.1547], [-0.05]], dtype=dtype)
-    assert torch.allclose(sdf_tet, expected_sdf, atol=1e-7)
-
-    assert torch.allclose(
+    np.testing.assert_allclose(
         sdf_hit_point,
-        torch.tensor(
-            [[0.33333322, 0.33333334, 0.3333334], [0.0, 0.10, 0.10]], dtype=dtype
-        ),
+        [[0.33333322, 0.33333334, 0.3333334], [0.12000002, 0.11, 0.0]],
         atol=1e-7,
     )
+    np.testing.assert_allclose(sdf_hit_point_id, [3, 0], atol=1e-7)

From 737201fef88e259776ee20fbcb7d2935e1abc469 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 22 Sep 2025 20:15:24 +0000
Subject: [PATCH 42/98] Move minor model changes to the model refactor.

---
 physicsnemo/models/domino/model.py | 61 +++++++++++-------------------
 1 file changed, 22 insertions(+), 39 deletions(-)

diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 24588b123c..c95f971e97 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -155,8 +155,9 @@ def forward(
                 - outputs: Tensor containing coordinates of the neighboring points
         """
         batch_size = x.shape[0]
+        nx, ny, nz = self.grid_resolution
 
-        p_grid = p_grid.reshape(batch_size, -1, 3)
+        p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3))
 
         if reverse_mapping:
             mapping, outputs = radius_search(
@@ -591,23 +592,15 @@ def forward(
         if self.geo_encoding_type == "both" or self.geo_encoding_type == "stl":
             # Calculate multi-scale geoemtry dependency
             x_encoding = []
-
             for j in range(len(self.radii)):
-                with torch.autograd.profiler.record_function(f"bq_warp_{j}"):
-                    mapping, k_short = self.bq_warp[j](x, p_grid)
-                    x_encoding_inter = self.geo_conv_out[j](k_short, p_grid)
-                    # Propagate information in the geometry enclosed BBox
-                    for _i in range(self.hops):
-                        with torch.autograd.profiler.record_function(
-                            f"geo_processor_{j}_{_i}"
-                        ):
-                            dx = self.geo_processors[j](x_encoding_inter) / self.hops
-                            x_encoding_inter = x_encoding_inter + dx
-                    x_encoding_inter = self.geo_processor_out[j](x_encoding_inter)
-
-                    x_encoding.append(x_encoding_inter)
-
-            # current_stream.
+                mapping, k_short = self.bq_warp[j](x, p_grid)
+                x_encoding_inter = self.geo_conv_out[j](k_short, p_grid)
+                # Propagate information in the geometry enclosed BBox
+                for _ in range(self.hops):
+                    dx = self.geo_processors[j](x_encoding_inter) / self.hops
+                    x_encoding_inter = x_encoding_inter + dx
+                x_encoding_inter = self.geo_processor_out[j](x_encoding_inter)
+                x_encoding.append(x_encoding_inter)
             x_encoding = torch.cat(x_encoding, dim=1)
 
         if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf":
@@ -1666,7 +1659,6 @@ def calculate_solution(
         return_volume_neighbors=False,
     ):
         """Function to approximate solution sampling the neighborhood information"""
-
         if eval_mode == "volume":
             num_variables = self.num_variables_vol
             nn_basis = self.nn_basis_vol
@@ -1884,6 +1876,9 @@ def forward(self, data_dict, return_volume_neighbors=False):
         # Bounding box grid
         s_grid = data_dict["surf_grid"]
         sdf_surf_grid = data_dict["sdf_surf_grid"]
+        # Scaling factors
+        surf_max = data_dict["surface_min_max"][:, 1]
+        surf_min = data_dict["surface_min_max"][:, 0]
 
         # Parameters
         global_params_values = data_dict["global_params_values"]
@@ -1894,17 +1889,12 @@ def forward(self, data_dict, return_volume_neighbors=False):
             # Computational domain grid
             p_grid = data_dict["grid"]
             sdf_grid = data_dict["sdf_grid"]
-            if "volume_min_max" in data_dict.keys():
-                # Scaling factors
-                vol_max = data_dict["volume_min_max"][:, 1]
-                vol_min = data_dict["volume_min_max"][:, 0]
-
-                # Normalize based on computational domain
-                geo_centers_vol = (
-                    2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
-                )
-            else:
-                geo_centers_vol = geo_centers
+            # Scaling factors
+            vol_max = data_dict["volume_min_max"][:, 1]
+            vol_min = data_dict["volume_min_max"][:, 0]
+
+            # Normalize based on computational domain
+            geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
 
             encoding_g_vol = self.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid)
 
@@ -1928,16 +1918,9 @@ def forward(self, data_dict, return_volume_neighbors=False):
 
         if self.output_features_surf is not None:
             # Represent geometry on bounding box
-            if "surface_min_max" in data_dict.keys():
-                # Scaling factors
-                surf_max = data_dict["surface_min_max"][:, 1]
-                surf_min = data_dict["surface_min_max"][:, 0]
-                geo_centers_surf = (
-                    2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1
-                )
-            else:
-                geo_centers_surf = geo_centers
-
+            geo_centers_surf = (
+                2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1
+            )
             encoding_g_surf = self.geo_rep_surface(
                 geo_centers_surf, s_grid, sdf_surf_grid
             )

From 378218f53e7a786fec7e00a4f24ffe5faa57cfac Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 23 Sep 2025 18:39:01 +0000
Subject: [PATCH 43/98] Fix minor errors in the datapipe

---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 781402a5b3..53abe95822 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -494,7 +494,8 @@ def process_surface(
                 )
 
             # Select out the sampled points for non-neighbor arrays:
-            surface_fields = surface_fields[idx_surface]
+            if surface_fields is not None:
+                surface_fields = surface_fields[idx_surface]
             pos_normals_com_surface = pos_normals_com_surface[idx_surface]
             # Subsample the normals and sizes:
             surface_normals = surface_normals[idx_surface]
@@ -606,12 +607,10 @@ def process_volume(
         if self.config.sampling:
             # Generate a series of idx to sample the volume
             # without replacement
-
             volume_coordinates_sampled, idx_volume = shuffle_array(
                 volume_coordinates, self.config.volume_points_sample
             )
             volume_coordinates_sampled = volume_coordinates[idx_volume]
-
             # In case too few points are in the sampled data (because the
             # inputs were too few), pad the outputs:
             if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample:
@@ -619,6 +618,7 @@ def process_volume(
                     self.config.volume_points_sample
                     - volume_coordinates_sampled.shape[0]
                 )
+
                 volume_coordinates_sampled = torch.nn.functional.pad(
                     volume_coordinates_sampled,
                     (0, 0, 0, 0, 0, padding_size),
@@ -1122,17 +1122,15 @@ def create_domino_dataset(
     device_mesh: torch.distributed.DeviceMesh | None = None,
     placements: dict[str, torch.distributed.tensor.Placement] | None = None,
 ):
+    model_type = cfg.model.model_type
     if phase == "train":
         input_path = cfg.data.input_dir
-        model_type = cfg.model.model_type
         dataloader_cfg = cfg.train.dataloader
     elif phase == "val":
         input_path = cfg.data.input_dir_val
-        model_type = cfg.model.model_type
         dataloader_cfg = cfg.val.dataloader
     elif phase == "test":
         input_path = cfg.eval.test_path
-        model_type = "inference"
         dataloader_cfg = None
     else:
         raise ValueError(f"Invalid phase {phase}")

From b0bd877d6d338842692728dc22360a9908847e85 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 23 Sep 2025 19:09:34 +0000
Subject: [PATCH 44/98] Move several components of the recipe to the
 deprecation bin.

---
 .../external_aerodynamics/domino/src/deprecated/README.md    | 5 +++++
 .../domino/src/{ => deprecated}/openfoam_datapipe.py         | 0
 .../domino/src/{ => deprecated}/retraining.py                | 0
 3 files changed, 5 insertions(+)
 create mode 100644 examples/cfd/external_aerodynamics/domino/src/deprecated/README.md
 rename examples/cfd/external_aerodynamics/domino/src/{ => deprecated}/openfoam_datapipe.py (100%)
 rename examples/cfd/external_aerodynamics/domino/src/{ => deprecated}/retraining.py (100%)

diff --git a/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md b/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md
new file mode 100644
index 0000000000..9124353b7e
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md
@@ -0,0 +1,5 @@
+# DoMINO Deprecation
+
+The files in this folder have been deprecated as of the physicsnemo 25.11 release -
+they are no longer officially supported.  They are kept here only as a reference,
+and may be removed in a future release.
diff --git a/examples/cfd/external_aerodynamics/domino/src/openfoam_datapipe.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/openfoam_datapipe.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/domino/src/openfoam_datapipe.py
rename to examples/cfd/external_aerodynamics/domino/src/deprecated/openfoam_datapipe.py
diff --git a/examples/cfd/external_aerodynamics/domino/src/retraining.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/retraining.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/domino/src/retraining.py
rename to examples/cfd/external_aerodynamics/domino/src/deprecated/retraining.py

From 614710e4dadecf8004490a85ed6c88354afff53f Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 23 Sep 2025 19:11:20 +0000
Subject: [PATCH 45/98] Move and rename inference scripts

---
 .../domino/src/deprecated/inference_on_stl.py | 1586 +++++++++++++
 .../domino/src/inference_on_stl.py            | 2072 +++++------------
 .../domino/src/inference_on_stl2.py           |  634 -----
 3 files changed, 2146 insertions(+), 2146 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py
 delete mode 100644 examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py

diff --git a/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py
new file mode 100644
index 0000000000..a85cc7df86
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py
@@ -0,0 +1,1586 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This code defines a standalone distributed inference pipeline the DoMINO model.
+This inference pipeline can be used to evaluate the model given an STL and
+an inflow speed. The pre-trained model checkpoint can be specified in this script
+or inferred from the config file. The results are calculated on a point cloud
+sampled in the volume around the STL and on the surface of the STL. They are stored
+in a dictionary, which can be written out for visualization.
+"""
+
+import os
+import time
+
+import hydra, re
+from hydra import compose, initialize
+from hydra.utils import to_absolute_path
+from omegaconf import DictConfig, OmegaConf
+
+import numpy as np
+import torch
+
+from physicsnemo.models.domino.model import DoMINO
+from physicsnemo.utils.domino.utils import (
+    unnormalize,
+    create_directory,
+    nd_interpolator,
+    get_filenames,
+    write_to_vtp,
+)
+from torch.cuda.amp import autocast
+from torch.nn.parallel import DistributedDataParallel
+from physicsnemo.distributed import DistributedManager
+
+from numpy.typing import NDArray
+from typing import Any, Iterable, List, Literal, Mapping, Optional, Union, Callable
+import warp as wp
+from pathlib import Path
+import pandas as pd
+import matplotlib.pyplot as plt
+import pyvista as pv
+
+try:
+    from physicsnemo.sym.geometry.tessellation import Tessellation
+
+    SYM_AVAILABLE = True
+except ImportError:
+    SYM_AVAILABLE = False
+
+
+def combine_stls(stl_path, stl_files):
+    meshes = []
+    combined_mesh = pv.PolyData()
+    for file in stl_files:
+        if ".stl" in file and "single_solid" not in file:
+            stl_file_path = os.path.join(stl_path, file)
+            reader = pv.get_reader(stl_file_path)
+            mesh_stl = reader.read()
+            combined_mesh = combined_mesh.merge(mesh_stl)
+            # meshes.append(mesh_stl)
+            break
+    # combined_mesh = pv.merge(meshes)
+    return combined_mesh
+
+
+def plot(truth, prediction, var, save_path, axes_titles=None, plot_error=True):
+    if plot_error:
+        c = 3
+    else:
+        c = 2
+    fig, axes = plt.subplots(1, c, figsize=(15, 5))
+    error = truth - prediction
+    # Plot Truth
+    im = axes[0].imshow(
+        truth,
+        cmap="jet",
+        vmax=np.ma.masked_invalid(truth).max(),
+        vmin=np.ma.masked_invalid(truth).min(),
+    )
+    axes[0].axis("off")
+    cbar = fig.colorbar(im, ax=axes[0], orientation="vertical")
+    cbar.ax.tick_params(labelsize=12)
+    if axes_titles is None:
+        axes[0].set_title(f"{var} Truth")
+    else:
+        axes[0].set_title(axes_titles[0])
+
+    # Plot Predicted
+    im = axes[1].imshow(
+        prediction,
+        cmap="jet",
+        vmax=np.ma.masked_invalid(prediction).max(),
+        vmin=np.ma.masked_invalid(prediction).min(),
+    )
+    axes[1].axis("off")
+    cbar = fig.colorbar(im, ax=axes[1], orientation="vertical")
+    cbar.ax.tick_params(labelsize=12)
+    if axes_titles is None:
+        axes[1].set_title(f"{var} Predicted")
+    else:
+        axes[1].set_title(axes_titles[1])
+
+    if plot_error:
+        # Plot Error
+        im = axes[2].imshow(
+            error,
+            cmap="jet",
+            vmax=np.ma.masked_invalid(error).max(),
+            vmin=np.ma.masked_invalid(error).min(),
+        )
+        axes[2].axis("off")
+        cbar = fig.colorbar(im, ax=axes[2], orientation="vertical")
+        cbar.ax.tick_params(labelsize=12)
+        if axes_titles is None:
+            axes[2].set_title(f"{var} Error")
+        else:
+            axes[2].set_title(axes_titles[2])
+
+        MAE = np.mean(np.ma.masked_invalid((error)))
+
+        if MAE:
+            fig.suptitle(f"MAE {MAE}", fontsize=18, x=0.5)
+
+    plt.tight_layout()
+
+    path_to_save_path = os.path.join(save_path)
+    plt.savefig(path_to_save_path, bbox_inches="tight", pad_inches=0.1)
+    plt.close()
+
+
+@wp.kernel
+def _bvh_query_distance(
+    mesh: wp.uint64,
+    points: wp.array(dtype=wp.vec3f),
+    max_dist: wp.float32,
+    sdf: wp.array(dtype=wp.float32),
+    sdf_hit_point: wp.array(dtype=wp.vec3f),
+    sdf_hit_point_id: wp.array(dtype=wp.int32),
+):
+    """
+    Computes the signed distance from each point in the given array `points`
+    to the mesh represented by `mesh`,within the maximum distance `max_dist`,
+    and stores the result in the array `sdf`.
+
+    Parameters:
+        mesh (wp.uint64): The identifier of the mesh.
+        points (wp.array): An array of 3D points for which to compute the
+            signed distance.
+        max_dist (wp.float32): The maximum distance within which to search
+            for the closest point on the mesh.
+        sdf (wp.array): An array to store the computed signed distances.
+        sdf_hit_point (wp.array): An array to store the computed hit points.
+        sdf_hit_point_id (wp.array): An array to store the computed hit point ids.
+
+    Returns:
+        None
+    """
+    tid = wp.tid()
+
+    res = wp.mesh_query_point_sign_winding_number(mesh, points[tid], max_dist)
+
+    mesh_ = wp.mesh_get(mesh)
+
+    p0 = mesh_.points[mesh_.indices[3 * res.face + 0]]
+    p1 = mesh_.points[mesh_.indices[3 * res.face + 1]]
+    p2 = mesh_.points[mesh_.indices[3 * res.face + 2]]
+
+    p_closest = res.u * p0 + res.v * p1 + (1.0 - res.u - res.v) * p2
+
+    sdf[tid] = res.sign * wp.abs(wp.length(points[tid] - p_closest))
+    sdf_hit_point[tid] = p_closest
+    sdf_hit_point_id[tid] = res.face
+
+
+def signed_distance_field(
+    mesh_vertices: list[tuple[float, float, float]],
+    mesh_indices: NDArray[float],
+    input_points: list[tuple[float, float, float]],
+    max_dist: float = 1e8,
+    include_hit_points: bool = False,
+    include_hit_points_id: bool = False,
+    device: int = 0,
+) -> wp.array:
+    """
+    Computes the signed distance field (SDF) for a given mesh and input points.
+
+    Parameters:
+    ----------
+        mesh_vertices (list[tuple[float, float, float]]): List of vertices defining the mesh.
+        mesh_indices (list[tuple[int, int, int]]): List of indices defining the triangles of the mesh.
+        input_points (list[tuple[float, float, float]]): List of input points for which to compute the SDF.
+        max_dist (float, optional): Maximum distance within which to search for
+            the closest point on the mesh. Default is 1e8.
+        include_hit_points (bool, optional): Whether to include hit points in
+            the output. Default is False.
+        include_hit_points_id (bool, optional): Whether to include hit point
+            IDs in the output. Default is False.
+
+    Returns:
+    -------
+        wp.array: An array containing the computed signed distance field.
+
+    Example:
+    -------
+    >>> mesh_vertices = [(0, 0, 0), (1, 0, 0), (0, 1, 0)]
+    >>> mesh_indices = np.array((0, 1, 2))
+    >>> input_points = [(0.5, 0.5, 0.5)]
+    >>> signed_distance_field(mesh_vertices, mesh_indices, input_points).numpy()
+    Module ...
+    array([0.5], dtype=float32)
+    """
+
+    wp.init()
+    # mesh = wp.Mesh(
+    #     wp.array(mesh_vertices.cpu(), dtype=wp.vec3), wp.array(mesh_indices.cpu(), dtype=wp.int32)
+    # )
+    mesh = wp.Mesh(
+        wp.from_torch(mesh_vertices, dtype=wp.vec3),
+        wp.from_torch(mesh_indices, dtype=wp.int32),
+    )
+
+    sdf_points = wp.from_torch(input_points, dtype=wp.vec3)
+    sdf = wp.zeros(shape=sdf_points.shape, dtype=wp.float32)
+    sdf_hit_point = wp.zeros(shape=sdf_points.shape, dtype=wp.vec3f)
+    sdf_hit_point_id = wp.zeros(shape=sdf_points.shape, dtype=wp.int32)
+    wp.launch(
+        kernel=_bvh_query_distance,
+        dim=len(sdf_points),
+        inputs=[mesh.id, sdf_points, max_dist, sdf, sdf_hit_point, sdf_hit_point_id],
+    )
+    if include_hit_points and include_hit_points_id:
+        return (
+            wp.to_torch(sdf),
+            wp.to_torch(sdf_hit_point),
+            wp.to_torch(sdf_hit_point_id),
+        )
+    elif include_hit_points:
+        return (wp.to_torch(sdf), wp.to_torch(sdf_hit_point))
+    elif include_hit_points_id:
+        return (wp.to_torch(sdf), wp.to_torch(sdf_hit_point_id))
+    else:
+        return wp.to_torch(sdf)
+
+
+def shuffle_array_torch(surface_vertices, geometry_points, device):
+    idx = torch.unsqueeze(
+        torch.randperm(surface_vertices.shape[0])[:geometry_points], -1
+    ).to(device)
+    idx = idx.repeat(1, 3)
+    surface_sampled = torch.gather(surface_vertices, 0, idx)
+    return surface_sampled
+
+
+class inferenceDataPipe:
+    def __init__(
+        self,
+        device: int = 0,
+        grid_resolution: Optional[list] = [256, 96, 64],
+        normalize_coordinates: bool = False,
+        geom_points_sample: int = 300000,
+        positional_encoding: bool = False,
+        surface_vertices=None,
+        surface_indices=None,
+        surface_areas=None,
+        surface_centers=None,
+        use_sdf_basis=False,
+    ):
+        self.surface_vertices = surface_vertices
+        self.surface_indices = surface_indices
+        self.surface_areas = surface_areas
+        self.surface_centers = surface_centers
+        self.device = device
+        self.grid_resolution = grid_resolution
+        self.normalize_coordinates = normalize_coordinates
+        self.geom_points_sample = geom_points_sample
+        self.positional_encoding = positional_encoding
+        self.use_sdf_basis = use_sdf_basis
+        torch.manual_seed(int(42 + torch.cuda.current_device()))
+        self.data_dict = {}
+
+    def clear_dict(self):
+        del self.data_dict
+
+    def clear_volume_dict(self):
+        del self.data_dict["volume_mesh_centers"]
+        del self.data_dict["pos_enc_closest"]
+        del self.data_dict["pos_normals_com"]
+        del self.data_dict["sdf_nodes"]
+
+    def create_grid_torch(self, mx, mn, nres):
+        start_time = time.time()
+        dx = torch.linspace(mn[0], mx[0], nres[0], device=self.device)
+        dy = torch.linspace(mn[1], mx[1], nres[1], device=self.device)
+        dz = torch.linspace(mn[2], mx[2], nres[2], device=self.device)
+
+        xv, yv, zv = torch.meshgrid(dx, dy, dz, indexing="ij")
+        xv = torch.unsqueeze(xv, -1)
+        yv = torch.unsqueeze(yv, -1)
+        zv = torch.unsqueeze(zv, -1)
+        grid = torch.cat((xv, yv, zv), axis=-1)
+        return grid
+
+    def process_surface_mesh(self, bounding_box=None, bounding_box_surface=None):
+        # Use coarse mesh to calculate SDF
+        surface_vertices = self.surface_vertices
+        surface_indices = self.surface_indices
+        surface_areas = self.surface_areas
+        surface_centers = self.surface_centers
+
+        start_time = time.time()
+
+        if bounding_box is None:
+            # Create a bounding box
+            s_max = torch.amax(surface_vertices, 0)
+            s_min = torch.amin(surface_vertices, 0)
+
+            c_max = s_max + (s_max - s_min) / 2
+            c_min = s_min - (s_max - s_min) / 2
+            c_min[2] = s_min[2]
+        else:
+            c_min = bounding_box[0]
+            c_max = bounding_box[1]
+
+        if bounding_box_surface is None:
+            # Create a bounding box
+            s_max = torch.amax(surface_vertices, 0)
+            s_min = torch.amin(surface_vertices, 0)
+
+            surf_max = s_max + (s_max - s_min) / 2
+            surf_min = s_min - (s_max - s_min) / 2
+            surf_min[2] = s_min[2]
+        else:
+            surf_min = bounding_box_surface[0]
+            surf_max = bounding_box_surface[1]
+
+        nx, ny, nz = self.grid_resolution
+
+        grid = self.create_grid_torch(c_max, c_min, self.grid_resolution)
+        grid_reshaped = torch.reshape(grid, (nx * ny * nz, 3))
+
+        # SDF on grid
+        sdf_grid = signed_distance_field(
+            surface_vertices, surface_indices, grid_reshaped, device=self.device
+        )
+        sdf_grid = torch.reshape(sdf_grid, (nx, ny, nz))
+
+        surface_areas = torch.unsqueeze(surface_areas, -1)
+        center_of_mass = torch.sum(surface_centers * surface_areas, 0) / torch.sum(
+            surface_areas
+        )
+
+        s_grid = self.create_grid_torch(surf_max, surf_min, self.grid_resolution)
+        surf_grid_reshaped = torch.reshape(s_grid, (nx * ny * nz, 3))
+
+        surf_sdf_grid = signed_distance_field(
+            surface_vertices, surface_indices, surf_grid_reshaped, device=self.device
+        )
+        surf_sdf_grid = torch.reshape(surf_sdf_grid, (nx, ny, nz))
+
+        if self.normalize_coordinates:
+            grid = 2.0 * (grid - c_min) / (c_max - c_min) - 1.0
+            s_grid = 2.0 * (s_grid - surf_min) / (surf_max - surf_min) - 1.0
+
+        surface_vertices = torch.unsqueeze(surface_vertices, 0)
+        grid = torch.unsqueeze(grid, 0)
+        s_grid = torch.unsqueeze(s_grid, 0)
+        sdf_grid = torch.unsqueeze(sdf_grid, 0)
+        surf_sdf_grid = torch.unsqueeze(surf_sdf_grid, 0)
+        max_min = [c_min, c_max]
+        surf_max_min = [surf_min, surf_max]
+        center_of_mass = center_of_mass
+
+        return (
+            surface_vertices,
+            grid,
+            sdf_grid,
+            max_min,
+            s_grid,
+            surf_sdf_grid,
+            surf_max_min,
+            center_of_mass,
+        )
+
+    def sample_stl_points(
+        self,
+        num_points,
+        stl_centers,
+        stl_area,
+        stl_normals,
+        max_min,
+        center_of_mass,
+        bounding_box=None,
+        stencil_size=7,
+    ):
+        if bounding_box is not None:
+            c_max = bounding_box[1]
+            c_min = bounding_box[0]
+        else:
+            c_min = max_min[0]
+            c_max = max_min[1]
+
+        start_time = time.time()
+
+        nx, ny, nz = self.grid_resolution
+
+        idx = np.arange(stl_centers.shape[0])
+        # np.random.shuffle(idx)
+        if num_points is not None:
+            idx = idx[:num_points]
+
+        surface_coordinates = stl_centers
+        surface_normals = stl_normals
+        surface_area = stl_area
+
+        if stencil_size > 1:
+            interp_func = KDTree(surface_coordinates)
+            dd, ii = interp_func.query(surface_coordinates, k=stencil_size)
+            surface_neighbors = surface_coordinates[ii]
+            surface_neighbors = surface_neighbors[:, 1:] + 1e-6
+            surface_neighbors_normals = surface_normals[ii]
+            surface_neighbors_normals = surface_neighbors_normals[:, 1:]
+            surface_neighbors_area = surface_area[ii]
+            surface_neighbors_area = surface_neighbors_area[:, 1:]
+        else:
+            surface_neighbors = np.expand_dims(surface_coordinates, 1) + 1e-6
+            surface_neighbors_normals = np.expand_dims(surface_normals, 1)
+            surface_neighbors_area = np.expand_dims(surface_area, 1)
+
+        surface_coordinates = torch.from_numpy(surface_coordinates).to(self.device)
+        surface_normals = torch.from_numpy(surface_normals).to(self.device)
+        surface_area = torch.from_numpy(surface_area).to(self.device)
+        surface_neighbors = torch.from_numpy(surface_neighbors).to(self.device)
+        surface_neighbors_normals = torch.from_numpy(surface_neighbors_normals).to(
+            self.device
+        )
+        surface_neighbors_area = torch.from_numpy(surface_neighbors_area).to(
+            self.device
+        )
+
+        pos_normals_com = surface_coordinates - center_of_mass
+
+        if self.normalize_coordinates:
+            surface_coordinates = (
+                2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0
+            )
+            surface_neighbors = (
+                2.0 * (surface_neighbors - c_min) / (c_max - c_min) - 1.0
+            )
+
+        surface_coordinates = surface_coordinates[idx]
+        surface_area = surface_area[idx]
+        surface_normals = surface_normals[idx]
+        pos_normals_com = pos_normals_com[idx]
+        surface_coordinates = torch.unsqueeze(surface_coordinates, 0)
+        surface_normals = torch.unsqueeze(surface_normals, 0)
+        surface_area = torch.unsqueeze(surface_area, 0)
+        pos_normals_com = torch.unsqueeze(pos_normals_com, 0)
+
+        surface_neighbors = surface_neighbors[idx]
+        surface_neighbors_normals = surface_neighbors_normals[idx]
+        surface_neighbors_area = surface_neighbors_area[idx]
+        surface_neighbors = torch.unsqueeze(surface_neighbors, 0)
+        surface_neighbors_normals = torch.unsqueeze(surface_neighbors_normals, 0)
+        surface_neighbors_area = torch.unsqueeze(surface_neighbors_area, 0)
+
+        scaling_factors = [c_max, c_min]
+
+        return (
+            surface_coordinates,
+            surface_neighbors,
+            surface_normals,
+            surface_neighbors_normals,
+            surface_area,
+            surface_neighbors_area,
+            pos_normals_com,
+            scaling_factors,
+            idx,
+        )
+
+    def sample_points_on_surface(
+        self,
+        num_points_surf,
+        max_min,
+        center_of_mass,
+        stl_path,
+        bounding_box=None,
+        stencil_size=7,
+    ):
+        if bounding_box is not None:
+            c_max = bounding_box[1]
+            c_min = bounding_box[0]
+        else:
+            c_min = max_min[0]
+            c_max = max_min[1]
+
+        start_time = time.time()
+
+        nx, ny, nz = self.grid_resolution
+
+        obj = Tessellation.from_stl(stl_path, airtight=False)
+
+        boundary = obj.sample_boundary(num_points_surf)
+        surface_coordinates = np.concatenate(
+            [
+                np.float32(boundary["x"]),
+                np.float32(boundary["y"]),
+                np.float32(boundary["z"]),
+            ],
+            axis=1,
+        )
+        surface_normals = np.concatenate(
+            [
+                np.float32(boundary["normal_x"]),
+                np.float32(boundary["normal_y"]),
+                np.float32(boundary["normal_z"]),
+            ],
+            axis=1,
+        )
+
+        surface_area = np.float32(boundary["area"])
+
+        interp_func = KDTree(surface_coordinates)
+        dd, ii = interp_func.query(surface_coordinates, k=stencil_size)
+        surface_neighbors = surface_coordinates[ii]
+        surface_neighbors = surface_neighbors[:, 1:]
+        surface_neighbors_normals = surface_normals[ii]
+        surface_neighbors_normals = surface_neighbors_normals[:, 1:]
+        surface_neighbors_area = surface_area[ii]
+        surface_neighbors_area = surface_neighbors_area[:, 1:]
+
+        surface_coordinates = torch.from_numpy(surface_coordinates).to(self.device)
+        surface_normals = torch.from_numpy(surface_normals).to(self.device)
+        surface_area = torch.from_numpy(surface_area).to(self.device)
+        surface_neighbors = torch.from_numpy(surface_neighbors).to(self.device)
+        surface_neighbors_normals = torch.from_numpy(surface_neighbors_normals).to(
+            self.device
+        )
+        surface_neighbors_area = torch.from_numpy(surface_neighbors_area).to(
+            self.device
+        )
+
+        pos_normals_com = surface_coordinates - center_of_mass
+
+        if self.normalize_coordinates:
+            surface_coordinates = (
+                2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0
+            )
+
+        surface_coordinates = torch.unsqueeze(surface_coordinates, 0)
+        surface_normals = torch.unsqueeze(surface_normals, 0)
+        surface_area = torch.unsqueeze(surface_area, 0)
+        pos_normals_com = torch.unsqueeze(pos_normals_com, 0)
+
+        surface_neighbors = torch.unsqueeze(surface_neighbors, 0)
+        surface_neighbors_normals = torch.unsqueeze(surface_neighbors_normals, 0)
+        surface_neighbors_area = torch.unsqueeze(surface_neighbors_area, 0)
+
+        scaling_factors = [c_max, c_min]
+
+        return (
+            surface_coordinates,
+            surface_neighbors,
+            surface_normals,
+            surface_neighbors_normals,
+            surface_area,
+            surface_neighbors_area,
+            pos_normals_com,
+            scaling_factors,
+        )
+
+    def sample_points_in_volume(
+        self, num_points_vol, max_min, center_of_mass, bounding_box=None
+    ):
+        if bounding_box is not None:
+            c_max = bounding_box[1]
+            c_min = bounding_box[0]
+        else:
+            c_min = max_min[0]
+            c_max = max_min[1]
+
+        start_time = time.time()
+
+        nx, ny, nz = self.grid_resolution
+        for k in range(10):
+            if k > 0:
+                num_pts_vol = num_points_vol - int(volume_coordinates.shape[0] / 2)
+            else:
+                num_pts_vol = int(1.25 * num_points_vol)
+
+            volume_coordinates_sub = (c_max - c_min) * torch.rand(
+                num_pts_vol, 3, device=self.device, dtype=torch.float32
+            ) + c_min
+
+            sdf_nodes, sdf_node_closest_point = signed_distance_field(
+                self.surface_vertices,
+                self.surface_indices,
+                volume_coordinates_sub,
+                include_hit_points=True,
+                device=self.device,
+            )
+            sdf_nodes = torch.unsqueeze(sdf_nodes, -1)
+
+            idx = torch.unsqueeze(torch.where((sdf_nodes > 0))[0], -1)
+            idx = idx.repeat(1, volume_coordinates_sub.shape[1])
+            if k == 0:
+                volume_coordinates = torch.gather(volume_coordinates_sub, 0, idx)
+            else:
+                volume_coordinates_1 = torch.gather(volume_coordinates_sub, 0, idx)
+                volume_coordinates = torch.cat(
+                    (volume_coordinates, volume_coordinates_1), axis=0
+                )
+
+            if volume_coordinates.shape[0] > num_points_vol:
+                volume_coordinates = volume_coordinates[:num_points_vol]
+                break
+
+        sdf_nodes, sdf_node_closest_point = signed_distance_field(
+            self.surface_vertices,
+            self.surface_indices,
+            volume_coordinates,
+            include_hit_points=True,
+            device=self.device,
+        )
+        sdf_nodes = torch.unsqueeze(sdf_nodes, -1)
+
+        pos_normals_closest = volume_coordinates - sdf_node_closest_point
+        pos_normals_com = volume_coordinates - center_of_mass
+
+        if self.normalize_coordinates:
+            volume_coordinates = (
+                2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0
+            )
+
+        volume_coordinates = torch.unsqueeze(volume_coordinates, 0)
+        pos_normals_com = torch.unsqueeze(pos_normals_com, 0)
+
+        if self.use_sdf_basis:
+            pos_normals_closest = torch.unsqueeze(pos_normals_closest, 0)
+            sdf_nodes = torch.unsqueeze(sdf_nodes, 0)
+
+        scaling_factors = [c_max, c_min]
+        return (
+            volume_coordinates,
+            pos_normals_com,
+            pos_normals_closest,
+            sdf_nodes,
+            scaling_factors,
+        )
+
+
+class dominoInference:
+    def __init__(
+        self,
+        cfg: DictConfig,
+        dist: None,
+        cached_geo_encoding: bool = False,
+    ):
+        self.cfg = cfg
+        self.dist = dist
+        self.stream_velocity = None
+        self.stencil_size = None
+        self.stl_path = None
+        self.stl_vertices = None
+        self.stl_centers = None
+        self.surface_areas = None
+        self.mesh_indices_flattened = None
+        self.length_scale = 1.0
+        if self.dist is None:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        else:
+            self.device = self.dist.device
+
+        self.air_density = torch.full((1, 1), 1.205, dtype=torch.float32).to(
+            self.device
+        )
+        (
+            self.num_vol_vars,
+            self.num_surf_vars,
+            self.num_global_features,
+        ) = self.get_num_variables()
+        self.model = None
+        self.grid_resolution = torch.tensor(self.cfg.model.interp_res).to(self.device)
+        self.vol_factors = None
+        self.bounding_box_min_max = None
+        self.bounding_box_surface_min_max = None
+        self.center_of_mass = None
+        self.grid = None
+        self.geometry_encoding = None
+        self.geometry_encoding_surface = None
+        self.cached_geo_encoding = cached_geo_encoding
+        self.out_dict = {}
+
+    def get_geometry_encoding(self):
+        return self.geometry_encoding
+
+    def get_geometry_encoding_surface(self):
+        return self.geometry_encoding_surface
+
+    def get_out_dict(self):
+        return self.out_dict
+
+    def clear_out_dict(self):
+        self.out_dict.clear()
+
+    def initialize_data_processor(self):
+        self.ifp = inferenceDataPipe(
+            device=self.device,
+            surface_vertices=self.stl_vertices,
+            surface_indices=self.mesh_indices_flattened,
+            surface_areas=self.surface_areas,
+            surface_centers=self.stl_centers,
+            grid_resolution=self.grid_resolution,
+            normalize_coordinates=True,
+            geom_points_sample=300000,
+            positional_encoding=False,
+            use_sdf_basis=self.cfg.model.use_sdf_in_basis_func,
+        )
+
+    def load_bounding_box(self):
+        if (
+            self.cfg.data.bounding_box.min is not None
+            and self.cfg.data.bounding_box.max is not None
+        ):
+            c_min = torch.from_numpy(
+                np.array(self.cfg.data.bounding_box.min, dtype=np.float32)
+            ).to(self.device)
+            c_max = torch.from_numpy(
+                np.array(self.cfg.data.bounding_box.max, dtype=np.float32)
+            ).to(self.device)
+            self.bounding_box_min_max = [c_min, c_max]
+
+        if (
+            self.cfg.data.bounding_box_surface.min is not None
+            and self.cfg.data.bounding_box_surface.max is not None
+        ):
+            c_min = torch.from_numpy(
+                np.array(self.cfg.data.bounding_box_surface.min, dtype=np.float32)
+            ).to(self.device)
+            c_max = torch.from_numpy(
+                np.array(self.cfg.data.bounding_box_surface.max, dtype=np.float32)
+            ).to(self.device)
+            self.bounding_box_surface_min_max = [c_min, c_max]
+
+    def load_volume_scaling_factors(self):
+        scaling_param_path = self.cfg.eval.scaling_param_path
+        vol_factors_path = os.path.join(
+            scaling_param_path, "volume_scaling_factors.npy"
+        )
+
+        vol_factors = np.load(vol_factors_path, allow_pickle=True)
+        vol_factors = torch.from_numpy(vol_factors).to(self.device)
+
+        return vol_factors
+
+    def load_surface_scaling_factors(self):
+        scaling_param_path = self.cfg.eval.scaling_param_path
+        surf_factors_path = os.path.join(
+            scaling_param_path, "surface_scaling_factors.npy"
+        )
+
+        surf_factors = np.load(surf_factors_path, allow_pickle=True)
+        surf_factors = torch.from_numpy(surf_factors).to(self.device)
+
+        return surf_factors
+
+    def read_stl(self):
+        stl_files = get_filenames(self.stl_path)
+        mesh_stl = combine_stls(self.stl_path, stl_files)
+        if self.cfg.eval.refine_stl:
+            mesh_stl = mesh_stl.subdivide(
+                nsub=2, subfilter="linear"
+            )  # .smooth(n_iter=20)
+        stl_vertices = mesh_stl.points
+        length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0))
+        stl_centers = mesh_stl.cell_centers().points
+        # Assuming triangular elements
+        stl_faces = np.array(mesh_stl.faces).reshape((-1, 4))[:, 1:]
+        mesh_indices_flattened = stl_faces.flatten()
+
+        surface_areas = mesh_stl.compute_cell_sizes(
+            length=False, area=True, volume=False
+        )
+        surface_areas = np.array(surface_areas.cell_data["Area"])
+
+        surface_normals = np.array(mesh_stl.cell_normals, dtype=np.float32)
+
+        self.stl_vertices = torch.from_numpy(np.float32(stl_vertices)).to(self.device)
+        self.stl_centers = torch.from_numpy(np.float32(stl_centers)).to(self.device)
+        self.surface_areas = torch.from_numpy(np.float32(surface_areas)).to(self.device)
+        self.stl_normals = -1.0 * torch.from_numpy(np.float32(surface_normals)).to(
+            self.device
+        )
+        self.mesh_indices_flattened = torch.from_numpy(
+            np.int32(mesh_indices_flattened)
+        ).to(self.device)
+        self.length_scale = length_scale
+        self.mesh_stl = mesh_stl
+
+    def read_stl_trimesh(
+        self, stl_vertices, stl_faces, stl_centers, surface_normals, surface_areas
+    ):
+        mesh_indices_flattened = stl_faces.flatten()
+        length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0))
+        self.stl_vertices = torch.from_numpy(stl_vertices).to(self.device)
+        self.stl_centers = torch.from_numpy(stl_centers).to(self.device)
+        self.stl_normals = -1.0 * torch.from_numpy(surface_normals).to(self.device)
+        self.surface_areas = torch.from_numpy(surface_areas).to(self.device)
+        self.mesh_indices_flattened = torch.from_numpy(
+            np.int32(mesh_indices_flattened)
+        ).to(self.device)
+        self.length_scale = length_scale
+
+    def get_num_variables(self):
+        volume_variable_names = list(self.cfg.variables.volume.solution.keys())
+        num_vol_vars = 0
+        for j in volume_variable_names:
+            if self.cfg.variables.volume.solution[j] == "vector":
+                num_vol_vars += 3
+            else:
+                num_vol_vars += 1
+
+        surface_variable_names = list(self.cfg.variables.surface.solution.keys())
+        num_surf_vars = 0
+        for j in surface_variable_names:
+            if self.cfg.variables.surface.solution[j] == "vector":
+                num_surf_vars += 3
+            else:
+                num_surf_vars += 1
+
+        num_global_features = 0
+        global_params_names = list(cfg.variables.global_parameters.keys())
+        for param in global_params_names:
+            if cfg.variables.global_parameters[param].type == "vector":
+                num_global_features += len(
+                    cfg.variables.global_parameters[param].reference
+                )
+            elif cfg.variables.global_parameters[param].type == "scalar":
+                num_global_features += 1
+            else:
+                raise ValueError(f"Unknown global parameter type")
+
+        return num_vol_vars, num_surf_vars, num_global_features
+
+    def initialize_model(self, model_path):
+        model = (
+            DoMINO(
+                input_features=3,
+                output_features_vol=self.num_vol_vars,
+                output_features_surf=self.num_surf_vars,
+                global_features=self.num_global_features,
+                model_parameters=self.cfg.model,
+            )
+            .to(self.device)
+            .eval()
+        )
+        model = torch.compile(model, disable=True)
+
+        checkpoint_iter = torch.load(
+            to_absolute_path(model_path), map_location=self.dist.device
+        )
+
+        model.load_state_dict(checkpoint_iter)
+
+        if self.dist is not None:
+            if self.dist.world_size > 1:
+                model = DistributedDataParallel(
+                    model,
+                    device_ids=[self.dist.local_rank],
+                    output_device=self.dist.device,
+                    broadcast_buffers=self.dist.broadcast_buffers,
+                    find_unused_parameters=self.dist.find_unused_parameters,
+                    gradient_as_bucket_view=True,
+                    static_graph=True,
+                )
+
+        self.model = model
+        self.vol_factors = self.load_volume_scaling_factors()
+        self.surf_factors = self.load_surface_scaling_factors()
+        self.load_bounding_box()
+
+    def set_stream_velocity(self, stream_velocity):
+        self.stream_velocity = torch.full(
+            (1, 1), stream_velocity, dtype=torch.float32
+        ).to(self.device)
+
+    def set_stencil_size(self, stencil_size):
+        self.stencil_size = stencil_size
+
+    def set_air_density(self, air_density):
+        self.air_density = torch.full((1, 1), air_density, dtype=torch.float32).to(
+            self.device
+        )
+
+    def set_stl_path(self, filename):
+        self.stl_path = filename
+
+    @torch.no_grad()
+    def compute_geo_encoding(self, cached_geom_path=None):
+        start_time = time.time()
+
+        if not self.cached_geo_encoding:
+            (
+                surface_vertices,
+                grid,
+                sdf_grid,
+                max_min,
+                s_grid,
+                surf_sdf_grid,
+                surf_max_min,
+                center_of_mass,
+            ) = self.ifp.process_surface_mesh(
+                self.bounding_box_min_max, self.bounding_box_surface_min_max
+            )
+            if self.bounding_box_min_max is None:
+                self.bounding_box_min_max = max_min
+            if self.bounding_box_surface_min_max is None:
+                self.bounding_box_surface_min_max = surf_max_min
+            self.center_of_mass = center_of_mass
+            self.grid = grid
+            self.s_grid = s_grid
+            self.sdf_grid = sdf_grid
+            self.surf_sdf_grid = surf_sdf_grid
+            self.out_dict["sdf"] = sdf_grid
+
+            geo_encoding, geo_encoding_surface = self.calculate_geometry_encoding(
+                surface_vertices, grid, sdf_grid, s_grid, surf_sdf_grid, self.model
+            )
+        else:
+            out_dict_cached = torch.load(cached_geom_path, map_location=self.device)
+            self.bounding_box_min_max = out_dict_cached["bounding_box_min_max"]
+            self.grid = out_dict_cached["grid"]
+            self.sdf_grid = out_dict_cached["sdf_grid"]
+            self.center_of_mass = out_dict_cached["com"]
+            geo_encoding = out_dict_cached["geo_encoding"]
+            geo_encoding_surface = out_dict_cached["geo_encoding_surface"]
+            self.out_dict["sdf"] = self.sdf_grid
+        torch.cuda.synchronize()
+        print("Time taken for geo encoding = %f" % (time.time() - start_time))
+
+        self.geometry_encoding = geo_encoding
+        self.geometry_encoding_surface = geo_encoding_surface
+
+    def compute_forces(self):
+        pressure = self.out_dict["pressure_surface"]
+        wall_shear = self.out_dict["wall-shear-stress"]
+        # sampling_indices = self.out_dict["sampling_indices"]
+
+        surface_normals = self.stl_normals[self.sampling_indices]
+        surface_areas = self.surface_areas[self.sampling_indices]
+
+        drag_force = torch.sum(
+            pressure[0, :, 0] * surface_normals[:, 0] * surface_areas
+            - wall_shear[0, :, 0] * surface_areas
+        )
+        lift_force = torch.sum(
+            pressure[0, :, 0] * surface_normals[:, 2] * surface_areas
+            - wall_shear[0, :, 2] * surface_areas
+        )
+
+        self.out_dict["drag_force"] = drag_force
+        self.out_dict["lift_force"] = lift_force
+
+    @torch.inference_mode()
+    def compute_surface_solutions(self, num_sample_points=None, plot_solutions=False):
+        total_time = 0.0
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+
+        geo_encoding = self.geometry_encoding_surface
+        j = 0
+
+        with autocast(enabled=True):
+            start_event.record()
+            (
+                surface_mesh_centers,
+                surface_neighbors,
+                surface_normals,
+                surface_neighbors_normals,
+                surface_areas,
+                surface_neighbors_areas,
+                pos_normals_com,
+                surf_scaling_factors,
+                sampling_indices,
+            ) = self.ifp.sample_stl_points(
+                num_sample_points,
+                self.stl_centers.cpu().numpy(),
+                self.surface_areas.cpu().numpy(),
+                self.stl_normals.cpu().numpy(),
+                max_min=self.bounding_box_surface_min_max,
+                center_of_mass=self.center_of_mass,
+                stencil_size=self.stencil_size,
+            )
+            end_event.record()
+            end_event.synchronize()
+            cur_time = start_event.elapsed_time(end_event) / 1000.0
+            print(f"sample_points_in_surface time (s): {cur_time:.4f}")
+            # vol_coordinates_all.append(volume_mesh_centers)
+            surface_coordinates_all = surface_mesh_centers
+
+            inner_time = time.time()
+            start_event.record()
+            if num_sample_points == None:
+                point_batch_size = 512_000
+                num_points = surface_coordinates_all.shape[1]
+                subdomain_points = int(np.floor(num_points / point_batch_size))
+                surface_solutions = torch.zeros(1, num_points, self.num_surf_vars).to(
+                    self.device
+                )
+                for p in range(subdomain_points + 1):
+                    start_idx = p * point_batch_size
+                    end_idx = (p + 1) * point_batch_size
+                    surface_solutions_batch = self.compute_solution_on_surface(
+                        geo_encoding,
+                        surface_mesh_centers[:, start_idx:end_idx],
+                        surface_neighbors[:, start_idx:end_idx],
+                        surface_normals[:, start_idx:end_idx],
+                        surface_neighbors_normals[:, start_idx:end_idx],
+                        surface_areas[:, start_idx:end_idx],
+                        surface_neighbors_areas[:, start_idx:end_idx],
+                        pos_normals_com[:, start_idx:end_idx],
+                        self.s_grid,
+                        self.model,
+                        inlet_velocity=self.stream_velocity,
+                        air_density=self.air_density,
+                    )
+                    surface_solutions[:, start_idx:end_idx] = surface_solutions_batch
+            else:
+                point_batch_size = 512_000
+                num_points = num_sample_points
+                subdomain_points = int(np.floor(num_points / point_batch_size))
+                surface_solutions = torch.zeros(1, num_points, self.num_surf_vars).to(
+                    self.device
+                )
+                for p in range(subdomain_points + 1):
+                    start_idx = p * point_batch_size
+                    end_idx = (p + 1) * point_batch_size
+                    surface_solutions_batch = self.compute_solution_on_surface(
+                        geo_encoding,
+                        surface_mesh_centers[:, start_idx:end_idx],
+                        surface_neighbors[:, start_idx:end_idx],
+                        surface_normals[:, start_idx:end_idx],
+                        surface_neighbors_normals[:, start_idx:end_idx],
+                        surface_areas[:, start_idx:end_idx],
+                        surface_neighbors_areas[:, start_idx:end_idx],
+                        pos_normals_com[:, start_idx:end_idx],
+                        self.s_grid,
+                        self.model,
+                        inlet_velocity=self.stream_velocity,
+                        air_density=self.air_density,
+                    )
+                    # print(torch.amax(surface_solutions_batch, (0, 1)), torch.amin(surface_solutions_batch, (0, 1)))
+                    surface_solutions[:, start_idx:end_idx] = surface_solutions_batch
+
+            # print(surface_solutions.shape)
+            end_event.record()
+            end_event.synchronize()
+            cur_time = start_event.elapsed_time(end_event) / 1000.0
+            print(f"compute_solution time (s): {cur_time:.4f}")
+            total_time += float(time.time() - inner_time)
+            surface_solutions_all = surface_solutions
+            print(
+                "Time taken for compute solution on surface for=%f, %f"
+                % (time.time() - inner_time, torch.cuda.utilization(self.device))
+            )
+        cmax = surf_scaling_factors[0]
+        cmin = surf_scaling_factors[1]
+
+        surface_coordinates_all = torch.reshape(
+            surface_coordinates_all, (1, num_points, 3)
+        )
+        surface_solutions_all = torch.reshape(surface_solutions_all, (1, num_points, 4))
+
+        if self.surf_factors is not None:
+            surface_solutions_all = unnormalize(
+                surface_solutions_all, self.surf_factors[0], self.surf_factors[1]
+            )
+
+        self.out_dict["surface_coordinates"] = (
+            0.5 * (surface_coordinates_all + 1.0) * (cmax - cmin) + cmin
+        )
+        self.out_dict["pressure_surface"] = (
+            surface_solutions_all[:, :, :1]
+            * self.stream_velocity**2.0
+            * self.air_density
+        )
+        self.out_dict["wall-shear-stress"] = (
+            surface_solutions_all[:, :, 1:4]
+            * self.stream_velocity**2.0
+            * self.air_density
+        )
+        self.sampling_indices = sampling_indices
+
+    @torch.inference_mode()
+    def compute_volume_solutions(self, num_sample_points, plot_solutions=False):
+        total_time = 0.0
+        start_event = torch.cuda.Event(enable_timing=True)
+        end_event = torch.cuda.Event(enable_timing=True)
+
+        geo_encoding = self.geometry_encoding
+        j = 0
+
+        # Compute volume
+        point_batch_size = 512_000
+        num_points = num_sample_points
+        subdomain_points = int(np.floor(num_points / point_batch_size))
+        volume_solutions = torch.zeros(1, num_points, self.num_vol_vars).to(self.device)
+        volume_coordinates = torch.zeros(1, num_points, 3).to(self.device)
+
+        for p in range(subdomain_points + 1):
+            start_idx = p * point_batch_size
+            end_idx = (p + 1) * point_batch_size
+            if end_idx > num_points:
+                point_batch_size = num_points - start_idx
+                end_idx = num_points
+
+            with autocast(enabled=True):
+                inner_time = time.time()
+                start_event.record()
+                (
+                    volume_mesh_centers,
+                    pos_normals_com,
+                    pos_normals_closest,
+                    sdf_nodes,
+                    scaling_factors,
+                ) = self.ifp.sample_points_in_volume(
+                    num_points_vol=point_batch_size,
+                    max_min=self.bounding_box_min_max,
+                    center_of_mass=self.center_of_mass,
+                )
+                end_event.record()
+                end_event.synchronize()
+                cur_time = start_event.elapsed_time(end_event) / 1000.0
+                print(f"sample_points_in_volume time (s): {cur_time:.4f}")
+
+                volume_coordinates[:, start_idx:end_idx] = volume_mesh_centers
+
+                start_event.record()
+
+                volume_solutions_batch = self.compute_solution_in_volume(
+                    geo_encoding,
+                    volume_mesh_centers,
+                    sdf_nodes,
+                    pos_normals_closest,
+                    pos_normals_com,
+                    self.grid,
+                    self.model,
+                    use_sdf_basis=self.cfg.model.use_sdf_in_basis_func,
+                    inlet_velocity=self.stream_velocity,
+                    air_density=self.air_density,
+                )
+                volume_solutions[:, start_idx:end_idx] = volume_solutions_batch
+                end_event.record()
+                end_event.synchronize()
+                cur_time = start_event.elapsed_time(end_event) / 1000.0
+                print(f"compute_solution time (s): {cur_time:.4f}")
+                total_time += float(time.time() - inner_time)
+                # volume_solutions_all = volume_solutions
+                print(
+                    "Time taken for compute solution in volume for =%f"
+                    % (time.time() - inner_time)
+                )
+                # print("Points processed:", end_idx)
+            print("Total time measured = %f" % total_time)
+            print("Points processed:", end_idx)
+
+        cmax = scaling_factors[0]
+        cmin = scaling_factors[1]
+        volume_coordinates_all = volume_coordinates
+        volume_solutions_all = volume_solutions
+
+        cmax = scaling_factors[0]
+        cmin = scaling_factors[1]
+
+        volume_coordinates_all = torch.reshape(
+            volume_coordinates_all, (1, num_sample_points, 3)
+        )
+        volume_solutions_all = torch.reshape(
+            volume_solutions_all, (1, num_sample_points, self.num_vol_vars)
+        )
+
+        if self.vol_factors is not None:
+            volume_solutions_all = unnormalize(
+                volume_solutions_all, self.vol_factors[0], self.vol_factors[1]
+            )
+
+        self.out_dict["coordinates"] = (
+            0.5 * (volume_coordinates_all + 1.0) * (cmax - cmin) + cmin
+        )
+        self.out_dict["velocity"] = (
+            volume_solutions_all[:, :, :3] * self.stream_velocity
+        )
+        self.out_dict["pressure"] = (
+            volume_solutions_all[:, :, 3:4]
+            * self.stream_velocity**2.0
+            * self.air_density
+        )
+        # self.out_dict["turbulent-kinetic-energy"] = (
+        #     volume_solutions_all[:, :, 4:5]
+        #     * self.stream_velocity**2.0
+        #     * self.air_density
+        # )
+        # self.out_dict["turbulent-viscosity"] = (
+        #     volume_solutions_all[:, :, 5:] * self.stream_velocity * self.length_scale
+        # )
+        self.out_dict["bounding_box_dims"] = torch.vstack(self.bounding_box_min_max)
+
+        if plot_solutions:
+            print("Plotting solutions")
+            plot_save_path = os.path.join(self.cfg.output, "plots/contours/")
+            create_directory(plot_save_path)
+
+            p_grid = 0.5 * (self.grid + 1.0) * (cmax - cmin) + cmin
+            p_grid = p_grid.cpu().numpy()
+            sdf_grid = self.sdf_grid.cpu().numpy()
+            volume_coordinates_all = (
+                0.5 * (volume_coordinates_all + 1.0) * (cmax - cmin) + cmin
+            )
+            volume_solutions_all[:, :, :3] = (
+                volume_solutions_all[:, :, :3] * self.stream_velocity
+            )
+            volume_solutions_all[:, :, 3:4] = (
+                volume_solutions_all[:, :, 3:4]
+                * self.stream_velocity**2.0
+                * self.air_density
+            )
+            # volume_solutions_all[:, :, 4:5] = (
+            #     volume_solutions_all[:, :, 4:5]
+            #     * self.stream_velocity**2.0
+            #     * self.air_density
+            # )
+            # volume_solutions_all[:, :, 5] = (
+            #     volume_solutions_all[:, :, 5] * self.stream_velocity * self.length_scale
+            # )
+            volume_coordinates_all = volume_coordinates_all.cpu().numpy()
+            volume_solutions_all = volume_solutions_all.cpu().numpy()
+
+            # ND interpolation on a grid
+            prediction_grid = nd_interpolator(
+                volume_coordinates_all, volume_solutions_all[0], p_grid[0]
+            )
+            nx, ny, nz, vars = prediction_grid.shape
+            idx = np.where(sdf_grid[0] < 0.0)
+            prediction_grid[idx] = float("inf")
+            axes_titles = ["y/4 plane", "y/2 plane"]
+
+            plot(
+                prediction_grid[:, int(ny / 4), :, 0],
+                prediction_grid[:, int(ny / 2), :, 0],
+                var="x-vel",
+                save_path=plot_save_path + f"x-vel-midplane_{self.stream_velocity}.png",
+                axes_titles=axes_titles,
+                plot_error=False,
+            )
+            plot(
+                prediction_grid[:, int(ny / 4), :, 1],
+                prediction_grid[:, int(ny / 2), :, 1],
+                var="y-vel",
+                save_path=plot_save_path + f"y-vel-midplane_{self.stream_velocity}.png",
+                axes_titles=axes_titles,
+                plot_error=False,
+            )
+            plot(
+                prediction_grid[:, int(ny / 4), :, 2],
+                prediction_grid[:, int(ny / 2), :, 2],
+                var="z-vel",
+                save_path=plot_save_path + f"z-vel-midplane_{self.stream_velocity}.png",
+                axes_titles=axes_titles,
+                plot_error=False,
+            )
+            plot(
+                prediction_grid[:, int(ny / 4), :, 3],
+                prediction_grid[:, int(ny / 2), :, 3],
+                var="pres",
+                save_path=plot_save_path + f"pres-midplane_{self.stream_velocity}.png",
+                axes_titles=axes_titles,
+                plot_error=False,
+            )
+            # plot(
+            #     prediction_grid[:, int(ny / 4), :, 4],
+            #     prediction_grid[:, int(ny / 2), :, 4],
+            #     var="tke",
+            #     save_path=plot_save_path + f"tke-midplane_{self.stream_velocity}.png",
+            #     axes_titles=axes_titles,
+            #     plot_error=False,
+            # )
+            # plot(
+            #     prediction_grid[:, int(ny / 4), :, 5],
+            #     prediction_grid[:, int(ny / 2), :, 5],
+            #     var="nut",
+            #     save_path=plot_save_path + f"nut-midplane_{self.stream_velocity}.png",
+            #     axes_titles=axes_titles,
+            #     plot_error=False,
+            # )
+
+    def cold_start(self, cached_geom_path=None):
+        print("Cold start")
+        self.compute_geo_encoding(cached_geom_path)
+        self.compute_volume_solutions(num_sample_points=10)
+        self.clear_out_dict()
+
+    @torch.no_grad()
+    def calculate_geometry_encoding(
+        self, geo_centers, p_grid, sdf_grid, s_grid, sdf_surf_grid, model
+    ):
+        vol_min = self.bounding_box_min_max[0]
+        vol_max = self.bounding_box_min_max[1]
+        surf_min = self.bounding_box_surface_min_max[0]
+        surf_max = self.bounding_box_surface_min_max[1]
+
+        geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
+        if self.dist.world_size == 1:
+            encoding_g_vol = model.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid)
+        else:
+            encoding_g_vol = model.module.geo_rep_volume(
+                geo_centers_vol, p_grid, sdf_grid
+            )
+
+        geo_centers_surf = 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1
+
+        if self.dist.world_size == 1:
+            encoding_g_surf = model.geo_rep_surface(
+                geo_centers_surf, s_grid, sdf_surf_grid
+            )
+        else:
+            encoding_g_surf = model.module.geo_rep_surface(
+                geo_centers_surf, s_grid, sdf_surf_grid
+            )
+
+        if self.dist.world_size == 1:
+            encoding_g_surf1 = model.geo_rep_surface1(
+                geo_centers_surf, s_grid, sdf_surf_grid
+            )
+        else:
+            encoding_g_surf1 = model.module.geo_rep_surface1(
+                geo_centers_surf, s_grid, sdf_surf_grid
+            )
+
+        geo_encoding = 0.5 * encoding_g_surf1 + 0.5 * encoding_g_vol
+        geo_encoding_surface = 0.5 * encoding_g_surf
+        return geo_encoding, geo_encoding_surface
+
+    @torch.no_grad()
+    def compute_solution_on_surface(
+        self,
+        geo_encoding,
+        surface_mesh_centers,
+        surface_mesh_neighbors,
+        surface_normals,
+        surface_neighbors_normals,
+        surface_areas,
+        surface_neighbors_areas,
+        pos_normals_com,
+        s_grid,
+        model,
+        inlet_velocity,
+        air_density,
+    ):
+        """
+        Global parameters: For this particular case, the model was trained on single velocity/density values
+        across all simulations. Hence, global_params_values and global_params_reference are the same.
+        """
+        global_params_values = torch.cat(
+            (inlet_velocity, air_density), axis=1
+        )  # (1, 2)
+        global_params_values = torch.unsqueeze(global_params_values, -1)  # (1, 2, 1)
+
+        global_params_reference = torch.cat(
+            (inlet_velocity, air_density), axis=1
+        )  # (1, 2)
+        global_params_reference = torch.unsqueeze(
+            global_params_reference, -1
+        )  # (1, 2, 1)
+
+        if self.dist.world_size == 1:
+            geo_encoding_local = model.geo_encoding_local(
+                geo_encoding, surface_mesh_centers, s_grid, mode="surface"
+            )
+        else:
+            geo_encoding_local = model.module.geo_encoding_local(
+                geo_encoding, surface_mesh_centers, s_grid, mode="surface"
+            )
+
+        pos_encoding = pos_normals_com
+        surface_areas = torch.unsqueeze(surface_areas, -1)
+        surface_neighbors_areas = torch.unsqueeze(surface_neighbors_areas, -1)
+
+        if self.dist.world_size == 1:
+            pos_encoding = model.position_encoder(pos_encoding, eval_mode="surface")
+            tpredictions_batch = model.calculate_solution_with_neighbors(
+                surface_mesh_centers,
+                geo_encoding_local,
+                pos_encoding,
+                surface_mesh_neighbors,
+                surface_normals,
+                surface_neighbors_normals,
+                surface_areas,
+                surface_neighbors_areas,
+                global_params_values,
+                global_params_reference,
+            )
+        else:
+            pos_encoding = model.module.position_encoder(
+                pos_encoding, eval_mode="surface"
+            )
+            tpredictions_batch = model.module.calculate_solution_with_neighbors(
+                surface_mesh_centers,
+                geo_encoding_local,
+                pos_encoding,
+                surface_mesh_neighbors,
+                surface_normals,
+                surface_neighbors_normals,
+                surface_areas,
+                surface_neighbors_areas,
+                global_params_values,
+                global_params_reference,
+            )
+
+        return tpredictions_batch
+
+    @torch.no_grad()
+    def compute_solution_in_volume(
+        self,
+        geo_encoding,
+        volume_mesh_centers,
+        sdf_nodes,
+        pos_enc_closest,
+        pos_normals_com,
+        p_grid,
+        model,
+        use_sdf_basis,
+        inlet_velocity,
+        air_density,
+    ):
+        ## Global parameters
+        global_params_values = torch.cat(
+            (inlet_velocity, air_density), axis=1
+        )  # (1, 2)
+        global_params_values = torch.unsqueeze(global_params_values, -1)  # (1, 2, 1)
+
+        global_params_reference = torch.cat(
+            (inlet_velocity, air_density), axis=1
+        )  # (1, 2)
+        global_params_reference = torch.unsqueeze(
+            global_params_reference, -1
+        )  # (1, 2, 1)
+
+        if self.dist.world_size == 1:
+            geo_encoding_local = model.geo_encoding_local(
+                geo_encoding, volume_mesh_centers, p_grid, mode="volume"
+            )
+        else:
+            geo_encoding_local = model.module.geo_encoding_local(
+                geo_encoding, volume_mesh_centers, p_grid, mode="volume"
+            )
+        if use_sdf_basis:
+            pos_encoding = torch.cat(
+                (sdf_nodes, pos_enc_closest, pos_normals_com), axis=-1
+            )
+        else:
+            pos_encoding = pos_normals_com
+
+        if self.dist.world_size == 1:
+            pos_encoding = model.position_encoder(pos_encoding, eval_mode="volume")
+            tpredictions_batch = model.calculate_solution(
+                volume_mesh_centers,
+                geo_encoding_local,
+                pos_encoding,
+                global_params_values,
+                global_params_reference,
+                num_sample_points=self.stencil_size,
+                eval_mode="volume",
+            )
+        else:
+            pos_encoding = model.module.position_encoder(
+                pos_encoding, eval_mode="volume"
+            )
+            tpredictions_batch = model.module.calculate_solution(
+                volume_mesh_centers,
+                geo_encoding_local,
+                pos_encoding,
+                global_params_values,
+                global_params_reference,
+                num_sample_points=self.stencil_size,
+                eval_mode="volume",
+            )
+        return tpredictions_batch
+
+
+if __name__ == "__main__":
+    OmegaConf.register_new_resolver("eval", eval)
+    with initialize(version_base="1.3", config_path="conf"):
+        cfg = compose(config_name="config")
+
+    DistributedManager.initialize()
+    dist = DistributedManager()
+
+    if dist.world_size > 1:
+        torch.distributed.barrier()
+
+    input_path = cfg.eval.test_path
+    dirnames = get_filenames(input_path)
+    dev_id = torch.cuda.current_device()
+    num_files = int(len(dirnames) / 8)
+    dirnames_per_gpu = dirnames[int(num_files * dev_id) : int(num_files * (dev_id + 1))]
+
+    domino = dominoInference(cfg, dist, False)
+    domino.initialize_model(
+        model_path="/lustre/models/DoMINO.0.7.pt"
+    )  ## Replace the model path with location of the trained model
+
+    for count, dirname in enumerate(dirnames_per_gpu):
+        # print(f"Processing file {dirname}")
+        filepath = os.path.join(input_path, dirname)
+
+        STREAM_VELOCITY = 30.0
+        AIR_DENSITY = 1.205
+
+        # Neighborhood points sampled for evaluation, tradeoff between accuracy and speed
+        STENCIL_SIZE = (
+            7  # Higher stencil size -> more accuracy but more evaluation time
+        )
+
+        domino.set_stl_path(filepath)
+        domino.set_stream_velocity(STREAM_VELOCITY)
+        domino.set_stencil_size(STENCIL_SIZE)
+
+        domino.read_stl()
+
+        domino.initialize_data_processor()
+
+        # Calculate geometry encoding
+        domino.compute_geo_encoding()
+
+        # Calculate volume solutions
+        domino.compute_volume_solutions(
+            num_sample_points=10_256_000, plot_solutions=False
+        )
+
+        # Calculate surface solutions
+        domino.compute_surface_solutions()
+        domino.compute_forces()
+        out_dict = domino.get_out_dict()
+
+        print(
+            "Dirname:",
+            dirname,
+            "Drag:",
+            out_dict["drag_force"],
+            "Lift:",
+            out_dict["lift_force"],
+        )
+        vtp_path = f"/lustre/snidhan/physicsnemo-work/domino-global-param-runs/stl-results/pred_{dirname}_4.vtp"
+        domino.mesh_stl.save(vtp_path)
+        reader = vtk.vtkXMLPolyDataReader()
+        reader.SetFileName(f"{vtp_path}")
+        reader.Update()
+        polydata_surf = reader.GetOutput()
+
+        surfParam_vtk = numpy_support.numpy_to_vtk(
+            out_dict["pressure_surface"][0].cpu().numpy()
+        )
+        surfParam_vtk.SetName(f"Pressure")
+        polydata_surf.GetCellData().AddArray(surfParam_vtk)
+
+        surfParam_vtk = numpy_support.numpy_to_vtk(
+            out_dict["wall-shear-stress"][0].cpu().numpy()
+        )
+        surfParam_vtk.SetName(f"Wall-shear-stress")
+        polydata_surf.GetCellData().AddArray(surfParam_vtk)
+
+        write_to_vtp(polydata_surf, vtp_path)
+        exit()
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index a85cc7df86..3c6acc3ccd 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -15,1572 +15,620 @@
 # limitations under the License.
 
 """
-This code defines a standalone distributed inference pipeline the DoMINO model.
-This inference pipeline can be used to evaluate the model given an STL and
-an inflow speed. The pre-trained model checkpoint can be specified in this script
-or inferred from the config file. The results are calculated on a point cloud
-sampled in the volume around the STL and on the surface of the STL. They are stored
-in a dictionary, which can be written out for visualization.
+This code shows how to use a trained DoMINO model, with it's corresponding
+preprocessing pipeline, to infer values on and around an STL mesh file.
+
+This script uses the meshes from the DrivaerML dataset, however, the logic
+is largely the same.  As an overview:
+- Load the model
+- Set up the preprocessor
+- Loop over meshes
+- In each mesh, sample random points on the surface, volume, or both
+- Preprocess the points and run them through the model
+- Process the STL mesh centers, too
+- Collect the results and return
+- Save the results to file.
 """
 
-import os
 import time
+import os
+import re
+from typing import Literal, Any
 
-import hydra, re
-from hydra import compose, initialize
+import apex
+import numpy as np
+import hydra
 from hydra.utils import to_absolute_path
 from omegaconf import DictConfig, OmegaConf
-
-import numpy as np
 import torch
 
-from physicsnemo.models.domino.model import DoMINO
-from physicsnemo.utils.domino.utils import (
-    unnormalize,
-    create_directory,
-    nd_interpolator,
-    get_filenames,
-    write_to_vtp,
-)
-from torch.cuda.amp import autocast
-from torch.nn.parallel import DistributedDataParallel
-from physicsnemo.distributed import DistributedManager
-
-from numpy.typing import NDArray
-from typing import Any, Iterable, List, Literal, Mapping, Optional, Union, Callable
-import warp as wp
-from pathlib import Path
-import pandas as pd
-import matplotlib.pyplot as plt
-import pyvista as pv
-
-try:
-    from physicsnemo.sym.geometry.tessellation import Tessellation
-
-    SYM_AVAILABLE = True
-except ImportError:
-    SYM_AVAILABLE = False
-
-
-def combine_stls(stl_path, stl_files):
-    meshes = []
-    combined_mesh = pv.PolyData()
-    for file in stl_files:
-        if ".stl" in file and "single_solid" not in file:
-            stl_file_path = os.path.join(stl_path, file)
-            reader = pv.get_reader(stl_file_path)
-            mesh_stl = reader.read()
-            combined_mesh = combined_mesh.merge(mesh_stl)
-            # meshes.append(mesh_stl)
-            break
-    # combined_mesh = pv.merge(meshes)
-    return combined_mesh
-
-
-def plot(truth, prediction, var, save_path, axes_titles=None, plot_error=True):
-    if plot_error:
-        c = 3
-    else:
-        c = 2
-    fig, axes = plt.subplots(1, c, figsize=(15, 5))
-    error = truth - prediction
-    # Plot Truth
-    im = axes[0].imshow(
-        truth,
-        cmap="jet",
-        vmax=np.ma.masked_invalid(truth).max(),
-        vmin=np.ma.masked_invalid(truth).min(),
-    )
-    axes[0].axis("off")
-    cbar = fig.colorbar(im, ax=axes[0], orientation="vertical")
-    cbar.ax.tick_params(labelsize=12)
-    if axes_titles is None:
-        axes[0].set_title(f"{var} Truth")
-    else:
-        axes[0].set_title(axes_titles[0])
-
-    # Plot Predicted
-    im = axes[1].imshow(
-        prediction,
-        cmap="jet",
-        vmax=np.ma.masked_invalid(prediction).max(),
-        vmin=np.ma.masked_invalid(prediction).min(),
-    )
-    axes[1].axis("off")
-    cbar = fig.colorbar(im, ax=axes[1], orientation="vertical")
-    cbar.ax.tick_params(labelsize=12)
-    if axes_titles is None:
-        axes[1].set_title(f"{var} Predicted")
-    else:
-        axes[1].set_title(axes_titles[1])
-
-    if plot_error:
-        # Plot Error
-        im = axes[2].imshow(
-            error,
-            cmap="jet",
-            vmax=np.ma.masked_invalid(error).max(),
-            vmin=np.ma.masked_invalid(error).min(),
-        )
-        axes[2].axis("off")
-        cbar = fig.colorbar(im, ax=axes[2], orientation="vertical")
-        cbar.ax.tick_params(labelsize=12)
-        if axes_titles is None:
-            axes[2].set_title(f"{var} Error")
-        else:
-            axes[2].set_title(axes_titles[2])
-
-        MAE = np.mean(np.ma.masked_invalid((error)))
-
-        if MAE:
-            fig.suptitle(f"MAE {MAE}", fontsize=18, x=0.5)
-
-    plt.tight_layout()
-
-    path_to_save_path = os.path.join(save_path)
-    plt.savefig(path_to_save_path, bbox_inches="tight", pad_inches=0.1)
-    plt.close()
-
-
-@wp.kernel
-def _bvh_query_distance(
-    mesh: wp.uint64,
-    points: wp.array(dtype=wp.vec3f),
-    max_dist: wp.float32,
-    sdf: wp.array(dtype=wp.float32),
-    sdf_hit_point: wp.array(dtype=wp.vec3f),
-    sdf_hit_point_id: wp.array(dtype=wp.int32),
-):
-    """
-    Computes the signed distance from each point in the given array `points`
-    to the mesh represented by `mesh`,within the maximum distance `max_dist`,
-    and stores the result in the array `sdf`.
-
-    Parameters:
-        mesh (wp.uint64): The identifier of the mesh.
-        points (wp.array): An array of 3D points for which to compute the
-            signed distance.
-        max_dist (wp.float32): The maximum distance within which to search
-            for the closest point on the mesh.
-        sdf (wp.array): An array to store the computed signed distances.
-        sdf_hit_point (wp.array): An array to store the computed hit points.
-        sdf_hit_point_id (wp.array): An array to store the computed hit point ids.
-
-    Returns:
-        None
-    """
-    tid = wp.tid()
-
-    res = wp.mesh_query_point_sign_winding_number(mesh, points[tid], max_dist)
-
-    mesh_ = wp.mesh_get(mesh)
-
-    p0 = mesh_.points[mesh_.indices[3 * res.face + 0]]
-    p1 = mesh_.points[mesh_.indices[3 * res.face + 1]]
-    p2 = mesh_.points[mesh_.indices[3 * res.face + 2]]
-
-    p_closest = res.u * p0 + res.v * p1 + (1.0 - res.u - res.v) * p2
-
-    sdf[tid] = res.sign * wp.abs(wp.length(points[tid] - p_closest))
-    sdf_hit_point[tid] = p_closest
-    sdf_hit_point_id[tid] = res.face
-
-
-def signed_distance_field(
-    mesh_vertices: list[tuple[float, float, float]],
-    mesh_indices: NDArray[float],
-    input_points: list[tuple[float, float, float]],
-    max_dist: float = 1e8,
-    include_hit_points: bool = False,
-    include_hit_points_id: bool = False,
-    device: int = 0,
-) -> wp.array:
-    """
-    Computes the signed distance field (SDF) for a given mesh and input points.
-
-    Parameters:
-    ----------
-        mesh_vertices (list[tuple[float, float, float]]): List of vertices defining the mesh.
-        mesh_indices (list[tuple[int, int, int]]): List of indices defining the triangles of the mesh.
-        input_points (list[tuple[float, float, float]]): List of input points for which to compute the SDF.
-        max_dist (float, optional): Maximum distance within which to search for
-            the closest point on the mesh. Default is 1e8.
-        include_hit_points (bool, optional): Whether to include hit points in
-            the output. Default is False.
-        include_hit_points_id (bool, optional): Whether to include hit point
-            IDs in the output. Default is False.
-
-    Returns:
-    -------
-        wp.array: An array containing the computed signed distance field.
-
-    Example:
-    -------
-    >>> mesh_vertices = [(0, 0, 0), (1, 0, 0), (0, 1, 0)]
-    >>> mesh_indices = np.array((0, 1, 2))
-    >>> input_points = [(0.5, 0.5, 0.5)]
-    >>> signed_distance_field(mesh_vertices, mesh_indices, input_points).numpy()
-    Module ...
-    array([0.5], dtype=float32)
-    """
-
-    wp.init()
-    # mesh = wp.Mesh(
-    #     wp.array(mesh_vertices.cpu(), dtype=wp.vec3), wp.array(mesh_indices.cpu(), dtype=wp.int32)
-    # )
-    mesh = wp.Mesh(
-        wp.from_torch(mesh_vertices, dtype=wp.vec3),
-        wp.from_torch(mesh_indices, dtype=wp.int32),
-    )
-
-    sdf_points = wp.from_torch(input_points, dtype=wp.vec3)
-    sdf = wp.zeros(shape=sdf_points.shape, dtype=wp.float32)
-    sdf_hit_point = wp.zeros(shape=sdf_points.shape, dtype=wp.vec3f)
-    sdf_hit_point_id = wp.zeros(shape=sdf_points.shape, dtype=wp.int32)
-    wp.launch(
-        kernel=_bvh_query_distance,
-        dim=len(sdf_points),
-        inputs=[mesh.id, sdf_points, max_dist, sdf, sdf_hit_point, sdf_hit_point_id],
-    )
-    if include_hit_points and include_hit_points_id:
-        return (
-            wp.to_torch(sdf),
-            wp.to_torch(sdf_hit_point),
-            wp.to_torch(sdf_hit_point_id),
-        )
-    elif include_hit_points:
-        return (wp.to_torch(sdf), wp.to_torch(sdf_hit_point))
-    elif include_hit_points_id:
-        return (wp.to_torch(sdf), wp.to_torch(sdf_hit_point_id))
-    else:
-        return wp.to_torch(sdf)
-
-
-def shuffle_array_torch(surface_vertices, geometry_points, device):
-    idx = torch.unsqueeze(
-        torch.randperm(surface_vertices.shape[0])[:geometry_points], -1
-    ).to(device)
-    idx = idx.repeat(1, 3)
-    surface_sampled = torch.gather(surface_vertices, 0, idx)
-    return surface_sampled
-
-
-class inferenceDataPipe:
-    def __init__(
-        self,
-        device: int = 0,
-        grid_resolution: Optional[list] = [256, 96, 64],
-        normalize_coordinates: bool = False,
-        geom_points_sample: int = 300000,
-        positional_encoding: bool = False,
-        surface_vertices=None,
-        surface_indices=None,
-        surface_areas=None,
-        surface_centers=None,
-        use_sdf_basis=False,
-    ):
-        self.surface_vertices = surface_vertices
-        self.surface_indices = surface_indices
-        self.surface_areas = surface_areas
-        self.surface_centers = surface_centers
-        self.device = device
-        self.grid_resolution = grid_resolution
-        self.normalize_coordinates = normalize_coordinates
-        self.geom_points_sample = geom_points_sample
-        self.positional_encoding = positional_encoding
-        self.use_sdf_basis = use_sdf_basis
-        torch.manual_seed(int(42 + torch.cuda.current_device()))
-        self.data_dict = {}
-
-    def clear_dict(self):
-        del self.data_dict
-
-    def clear_volume_dict(self):
-        del self.data_dict["volume_mesh_centers"]
-        del self.data_dict["pos_enc_closest"]
-        del self.data_dict["pos_normals_com"]
-        del self.data_dict["sdf_nodes"]
-
-    def create_grid_torch(self, mx, mn, nres):
-        start_time = time.time()
-        dx = torch.linspace(mn[0], mx[0], nres[0], device=self.device)
-        dy = torch.linspace(mn[1], mx[1], nres[1], device=self.device)
-        dz = torch.linspace(mn[2], mx[2], nres[2], device=self.device)
-
-        xv, yv, zv = torch.meshgrid(dx, dy, dz, indexing="ij")
-        xv = torch.unsqueeze(xv, -1)
-        yv = torch.unsqueeze(yv, -1)
-        zv = torch.unsqueeze(zv, -1)
-        grid = torch.cat((xv, yv, zv), axis=-1)
-        return grid
-
-    def process_surface_mesh(self, bounding_box=None, bounding_box_surface=None):
-        # Use coarse mesh to calculate SDF
-        surface_vertices = self.surface_vertices
-        surface_indices = self.surface_indices
-        surface_areas = self.surface_areas
-        surface_centers = self.surface_centers
-
-        start_time = time.time()
-
-        if bounding_box is None:
-            # Create a bounding box
-            s_max = torch.amax(surface_vertices, 0)
-            s_min = torch.amin(surface_vertices, 0)
-
-            c_max = s_max + (s_max - s_min) / 2
-            c_min = s_min - (s_max - s_min) / 2
-            c_min[2] = s_min[2]
-        else:
-            c_min = bounding_box[0]
-            c_max = bounding_box[1]
-
-        if bounding_box_surface is None:
-            # Create a bounding box
-            s_max = torch.amax(surface_vertices, 0)
-            s_min = torch.amin(surface_vertices, 0)
-
-            surf_max = s_max + (s_max - s_min) / 2
-            surf_min = s_min - (s_max - s_min) / 2
-            surf_min[2] = s_min[2]
-        else:
-            surf_min = bounding_box_surface[0]
-            surf_max = bounding_box_surface[1]
-
-        nx, ny, nz = self.grid_resolution
-
-        grid = self.create_grid_torch(c_max, c_min, self.grid_resolution)
-        grid_reshaped = torch.reshape(grid, (nx * ny * nz, 3))
-
-        # SDF on grid
-        sdf_grid = signed_distance_field(
-            surface_vertices, surface_indices, grid_reshaped, device=self.device
-        )
-        sdf_grid = torch.reshape(sdf_grid, (nx, ny, nz))
-
-        surface_areas = torch.unsqueeze(surface_areas, -1)
-        center_of_mass = torch.sum(surface_centers * surface_areas, 0) / torch.sum(
-            surface_areas
-        )
+DISABLE_RMM = os.environ.get("DISABLE_RMM", False)
+if not DISABLE_RMM:
+    import rmm
+    from rmm.allocators.torch import rmm_torch_allocator
 
-        s_grid = self.create_grid_torch(surf_max, surf_min, self.grid_resolution)
-        surf_grid_reshaped = torch.reshape(s_grid, (nx * ny * nz, 3))
+    rmm.reinitialize(pool_allocator=True)
+    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
 
-        surf_sdf_grid = signed_distance_field(
-            surface_vertices, surface_indices, surf_grid_reshaped, device=self.device
-        )
-        surf_sdf_grid = torch.reshape(surf_sdf_grid, (nx, ny, nz))
-
-        if self.normalize_coordinates:
-            grid = 2.0 * (grid - c_min) / (c_max - c_min) - 1.0
-            s_grid = 2.0 * (s_grid - surf_min) / (surf_max - surf_min) - 1.0
-
-        surface_vertices = torch.unsqueeze(surface_vertices, 0)
-        grid = torch.unsqueeze(grid, 0)
-        s_grid = torch.unsqueeze(s_grid, 0)
-        sdf_grid = torch.unsqueeze(sdf_grid, 0)
-        surf_sdf_grid = torch.unsqueeze(surf_sdf_grid, 0)
-        max_min = [c_min, c_max]
-        surf_max_min = [surf_min, surf_max]
-        center_of_mass = center_of_mass
-
-        return (
-            surface_vertices,
-            grid,
-            sdf_grid,
-            max_min,
-            s_grid,
-            surf_sdf_grid,
-            surf_max_min,
-            center_of_mass,
-        )
-
-    def sample_stl_points(
-        self,
-        num_points,
-        stl_centers,
-        stl_area,
-        stl_normals,
-        max_min,
-        center_of_mass,
-        bounding_box=None,
-        stencil_size=7,
-    ):
-        if bounding_box is not None:
-            c_max = bounding_box[1]
-            c_min = bounding_box[0]
-        else:
-            c_min = max_min[0]
-            c_max = max_min[1]
-
-        start_time = time.time()
-
-        nx, ny, nz = self.grid_resolution
-
-        idx = np.arange(stl_centers.shape[0])
-        # np.random.shuffle(idx)
-        if num_points is not None:
-            idx = idx[:num_points]
-
-        surface_coordinates = stl_centers
-        surface_normals = stl_normals
-        surface_area = stl_area
-
-        if stencil_size > 1:
-            interp_func = KDTree(surface_coordinates)
-            dd, ii = interp_func.query(surface_coordinates, k=stencil_size)
-            surface_neighbors = surface_coordinates[ii]
-            surface_neighbors = surface_neighbors[:, 1:] + 1e-6
-            surface_neighbors_normals = surface_normals[ii]
-            surface_neighbors_normals = surface_neighbors_normals[:, 1:]
-            surface_neighbors_area = surface_area[ii]
-            surface_neighbors_area = surface_neighbors_area[:, 1:]
-        else:
-            surface_neighbors = np.expand_dims(surface_coordinates, 1) + 1e-6
-            surface_neighbors_normals = np.expand_dims(surface_normals, 1)
-            surface_neighbors_area = np.expand_dims(surface_area, 1)
-
-        surface_coordinates = torch.from_numpy(surface_coordinates).to(self.device)
-        surface_normals = torch.from_numpy(surface_normals).to(self.device)
-        surface_area = torch.from_numpy(surface_area).to(self.device)
-        surface_neighbors = torch.from_numpy(surface_neighbors).to(self.device)
-        surface_neighbors_normals = torch.from_numpy(surface_neighbors_normals).to(
-            self.device
-        )
-        surface_neighbors_area = torch.from_numpy(surface_neighbors_area).to(
-            self.device
-        )
-
-        pos_normals_com = surface_coordinates - center_of_mass
-
-        if self.normalize_coordinates:
-            surface_coordinates = (
-                2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0
-            )
-            surface_neighbors = (
-                2.0 * (surface_neighbors - c_min) / (c_max - c_min) - 1.0
-            )
-
-        surface_coordinates = surface_coordinates[idx]
-        surface_area = surface_area[idx]
-        surface_normals = surface_normals[idx]
-        pos_normals_com = pos_normals_com[idx]
-        surface_coordinates = torch.unsqueeze(surface_coordinates, 0)
-        surface_normals = torch.unsqueeze(surface_normals, 0)
-        surface_area = torch.unsqueeze(surface_area, 0)
-        pos_normals_com = torch.unsqueeze(pos_normals_com, 0)
-
-        surface_neighbors = surface_neighbors[idx]
-        surface_neighbors_normals = surface_neighbors_normals[idx]
-        surface_neighbors_area = surface_neighbors_area[idx]
-        surface_neighbors = torch.unsqueeze(surface_neighbors, 0)
-        surface_neighbors_normals = torch.unsqueeze(surface_neighbors_normals, 0)
-        surface_neighbors_area = torch.unsqueeze(surface_neighbors_area, 0)
-
-        scaling_factors = [c_max, c_min]
-
-        return (
-            surface_coordinates,
-            surface_neighbors,
-            surface_normals,
-            surface_neighbors_normals,
-            surface_area,
-            surface_neighbors_area,
-            pos_normals_com,
-            scaling_factors,
-            idx,
-        )
-
-    def sample_points_on_surface(
-        self,
-        num_points_surf,
-        max_min,
-        center_of_mass,
-        stl_path,
-        bounding_box=None,
-        stencil_size=7,
-    ):
-        if bounding_box is not None:
-            c_max = bounding_box[1]
-            c_min = bounding_box[0]
-        else:
-            c_min = max_min[0]
-            c_max = max_min[1]
-
-        start_time = time.time()
-
-        nx, ny, nz = self.grid_resolution
-
-        obj = Tessellation.from_stl(stl_path, airtight=False)
-
-        boundary = obj.sample_boundary(num_points_surf)
-        surface_coordinates = np.concatenate(
-            [
-                np.float32(boundary["x"]),
-                np.float32(boundary["y"]),
-                np.float32(boundary["z"]),
-            ],
-            axis=1,
-        )
-        surface_normals = np.concatenate(
-            [
-                np.float32(boundary["normal_x"]),
-                np.float32(boundary["normal_y"]),
-                np.float32(boundary["normal_z"]),
-            ],
-            axis=1,
-        )
-
-        surface_area = np.float32(boundary["area"])
-
-        interp_func = KDTree(surface_coordinates)
-        dd, ii = interp_func.query(surface_coordinates, k=stencil_size)
-        surface_neighbors = surface_coordinates[ii]
-        surface_neighbors = surface_neighbors[:, 1:]
-        surface_neighbors_normals = surface_normals[ii]
-        surface_neighbors_normals = surface_neighbors_normals[:, 1:]
-        surface_neighbors_area = surface_area[ii]
-        surface_neighbors_area = surface_neighbors_area[:, 1:]
-
-        surface_coordinates = torch.from_numpy(surface_coordinates).to(self.device)
-        surface_normals = torch.from_numpy(surface_normals).to(self.device)
-        surface_area = torch.from_numpy(surface_area).to(self.device)
-        surface_neighbors = torch.from_numpy(surface_neighbors).to(self.device)
-        surface_neighbors_normals = torch.from_numpy(surface_neighbors_normals).to(
-            self.device
-        )
-        surface_neighbors_area = torch.from_numpy(surface_neighbors_area).to(
-            self.device
-        )
-
-        pos_normals_com = surface_coordinates - center_of_mass
-
-        if self.normalize_coordinates:
-            surface_coordinates = (
-                2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0
-            )
-
-        surface_coordinates = torch.unsqueeze(surface_coordinates, 0)
-        surface_normals = torch.unsqueeze(surface_normals, 0)
-        surface_area = torch.unsqueeze(surface_area, 0)
-        pos_normals_com = torch.unsqueeze(pos_normals_com, 0)
-
-        surface_neighbors = torch.unsqueeze(surface_neighbors, 0)
-        surface_neighbors_normals = torch.unsqueeze(surface_neighbors_normals, 0)
-        surface_neighbors_area = torch.unsqueeze(surface_neighbors_area, 0)
-
-        scaling_factors = [c_max, c_min]
-
-        return (
-            surface_coordinates,
-            surface_neighbors,
-            surface_normals,
-            surface_neighbors_normals,
-            surface_area,
-            surface_neighbors_area,
-            pos_normals_com,
-            scaling_factors,
-        )
-
-    def sample_points_in_volume(
-        self, num_points_vol, max_min, center_of_mass, bounding_box=None
-    ):
-        if bounding_box is not None:
-            c_max = bounding_box[1]
-            c_min = bounding_box[0]
-        else:
-            c_min = max_min[0]
-            c_max = max_min[1]
-
-        start_time = time.time()
-
-        nx, ny, nz = self.grid_resolution
-        for k in range(10):
-            if k > 0:
-                num_pts_vol = num_points_vol - int(volume_coordinates.shape[0] / 2)
-            else:
-                num_pts_vol = int(1.25 * num_points_vol)
-
-            volume_coordinates_sub = (c_max - c_min) * torch.rand(
-                num_pts_vol, 3, device=self.device, dtype=torch.float32
-            ) + c_min
-
-            sdf_nodes, sdf_node_closest_point = signed_distance_field(
-                self.surface_vertices,
-                self.surface_indices,
-                volume_coordinates_sub,
-                include_hit_points=True,
-                device=self.device,
-            )
-            sdf_nodes = torch.unsqueeze(sdf_nodes, -1)
-
-            idx = torch.unsqueeze(torch.where((sdf_nodes > 0))[0], -1)
-            idx = idx.repeat(1, volume_coordinates_sub.shape[1])
-            if k == 0:
-                volume_coordinates = torch.gather(volume_coordinates_sub, 0, idx)
-            else:
-                volume_coordinates_1 = torch.gather(volume_coordinates_sub, 0, idx)
-                volume_coordinates = torch.cat(
-                    (volume_coordinates, volume_coordinates_1), axis=0
-                )
+import torchinfo
+import torch.distributed as dist
+from torch.amp import GradScaler, autocast
+from torch.nn.parallel import DistributedDataParallel
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import DistributedSampler
+from torch.utils.tensorboard import SummaryWriter
+from nvtx import annotate as nvtx_annotate
+import torch.cuda.nvtx as nvtx
 
-            if volume_coordinates.shape[0] > num_points_vol:
-                volume_coordinates = volume_coordinates[:num_points_vol]
-                break
+from physicsnemo.distributed import DistributedManager
+from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
+from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-        sdf_nodes, sdf_node_closest_point = signed_distance_field(
-            self.surface_vertices,
-            self.surface_indices,
-            volume_coordinates,
-            include_hit_points=True,
-            device=self.device,
-        )
-        sdf_nodes = torch.unsqueeze(sdf_nodes, -1)
+from physicsnemo.datapipes.cae.domino_datapipe2 import (
+    DoMINODataPipe,
+    create_domino_dataset,
+)
+from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
+    DrivaerMLDataset,
+)
 
-        pos_normals_closest = volume_coordinates - sdf_node_closest_point
-        pos_normals_com = volume_coordinates - center_of_mass
+from physicsnemo.models.domino.model import DoMINO
+from physicsnemo.utils.domino.utils import sample_points_on_mesh
 
-        if self.normalize_coordinates:
-            volume_coordinates = (
-                2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0
-            )
+from utils import ScalingFactors
 
-        volume_coordinates = torch.unsqueeze(volume_coordinates, 0)
-        pos_normals_com = torch.unsqueeze(pos_normals_com, 0)
+# This is included for GPU memory tracking:
+from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
+import time
 
-        if self.use_sdf_basis:
-            pos_normals_closest = torch.unsqueeze(pos_normals_closest, 0)
-            sdf_nodes = torch.unsqueeze(sdf_nodes, 0)
 
-        scaling_factors = [c_max, c_min]
-        return (
-            volume_coordinates,
-            pos_normals_com,
-            pos_normals_closest,
-            sdf_nodes,
-            scaling_factors,
-        )
+# Initialize NVML
+nvmlInit()
 
 
-class dominoInference:
-    def __init__(
-        self,
-        cfg: DictConfig,
-        dist: None,
-        cached_geo_encoding: bool = False,
-    ):
-        self.cfg = cfg
-        self.dist = dist
-        self.stream_velocity = None
-        self.stencil_size = None
-        self.stl_path = None
-        self.stl_vertices = None
-        self.stl_centers = None
-        self.surface_areas = None
-        self.mesh_indices_flattened = None
-        self.length_scale = 1.0
-        if self.dist is None:
-            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        else:
-            self.device = self.dist.device
+from physicsnemo.utils.profiling import profile, Profiler
 
-        self.air_density = torch.full((1, 1), 1.205, dtype=torch.float32).to(
-            self.device
-        )
-        (
-            self.num_vol_vars,
-            self.num_surf_vars,
-            self.num_global_features,
-        ) = self.get_num_variables()
-        self.model = None
-        self.grid_resolution = torch.tensor(self.cfg.model.interp_res).to(self.device)
-        self.vol_factors = None
-        self.bounding_box_min_max = None
-        self.bounding_box_surface_min_max = None
-        self.center_of_mass = None
-        self.grid = None
-        self.geometry_encoding = None
-        self.geometry_encoding_surface = None
-        self.cached_geo_encoding = cached_geo_encoding
-        self.out_dict = {}
-
-    def get_geometry_encoding(self):
-        return self.geometry_encoding
-
-    def get_geometry_encoding_surface(self):
-        return self.geometry_encoding_surface
-
-    def get_out_dict(self):
-        return self.out_dict
-
-    def clear_out_dict(self):
-        self.out_dict.clear()
-
-    def initialize_data_processor(self):
-        self.ifp = inferenceDataPipe(
-            device=self.device,
-            surface_vertices=self.stl_vertices,
-            surface_indices=self.mesh_indices_flattened,
-            surface_areas=self.surface_areas,
-            surface_centers=self.stl_centers,
-            grid_resolution=self.grid_resolution,
-            normalize_coordinates=True,
-            geom_points_sample=300000,
-            positional_encoding=False,
-            use_sdf_basis=self.cfg.model.use_sdf_in_basis_func,
-        )
 
-    def load_bounding_box(self):
-        if (
-            self.cfg.data.bounding_box.min is not None
-            and self.cfg.data.bounding_box.max is not None
-        ):
-            c_min = torch.from_numpy(
-                np.array(self.cfg.data.bounding_box.min, dtype=np.float32)
-            ).to(self.device)
-            c_max = torch.from_numpy(
-                np.array(self.cfg.data.bounding_box.max, dtype=np.float32)
-            ).to(self.device)
-            self.bounding_box_min_max = [c_min, c_max]
-
-        if (
-            self.cfg.data.bounding_box_surface.min is not None
-            and self.cfg.data.bounding_box_surface.max is not None
-        ):
-            c_min = torch.from_numpy(
-                np.array(self.cfg.data.bounding_box_surface.min, dtype=np.float32)
-            ).to(self.device)
-            c_max = torch.from_numpy(
-                np.array(self.cfg.data.bounding_box_surface.max, dtype=np.float32)
-            ).to(self.device)
-            self.bounding_box_surface_min_max = [c_min, c_max]
-
-    def load_volume_scaling_factors(self):
-        scaling_param_path = self.cfg.eval.scaling_param_path
-        vol_factors_path = os.path.join(
-            scaling_param_path, "volume_scaling_factors.npy"
-        )
+from loss import compute_loss_dict
+from utils import get_num_vars
 
-        vol_factors = np.load(vol_factors_path, allow_pickle=True)
-        vol_factors = torch.from_numpy(vol_factors).to(self.device)
 
-        return vol_factors
+def inference_on_single_stl(
+    stl_coordinates: torch.Tensor,
+    stl_faces: torch.Tensor,
+    model: DoMINO,
+    datapipe: DoMINODataPipe,
+    batch_size: int,
+    total_points: int,
+    gpu_handle: int | None = None,
+    logger: PythonLogger | None = None,
+):
+    """
+    Perform model inference on a single STL mesh.
 
-    def load_surface_scaling_factors(self):
-        scaling_param_path = self.cfg.eval.scaling_param_path
-        surf_factors_path = os.path.join(
-            scaling_param_path, "surface_scaling_factors.npy"
-        )
+    This function will take the input mesh + faces and
+    then sample the surface and volume to produce the model outputs
+    at `total_points` locations in batches of `batch_size`.
 
-        surf_factors = np.load(surf_factors_path, allow_pickle=True)
-        surf_factors = torch.from_numpy(surf_factors).to(self.device)
-
-        return surf_factors
-
-    def read_stl(self):
-        stl_files = get_filenames(self.stl_path)
-        mesh_stl = combine_stls(self.stl_path, stl_files)
-        if self.cfg.eval.refine_stl:
-            mesh_stl = mesh_stl.subdivide(
-                nsub=2, subfilter="linear"
-            )  # .smooth(n_iter=20)
-        stl_vertices = mesh_stl.points
-        length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0))
-        stl_centers = mesh_stl.cell_centers().points
-        # Assuming triangular elements
-        stl_faces = np.array(mesh_stl.faces).reshape((-1, 4))[:, 1:]
-        mesh_indices_flattened = stl_faces.flatten()
-
-        surface_areas = mesh_stl.compute_cell_sizes(
-            length=False, area=True, volume=False
-        )
-        surface_areas = np.array(surface_areas.cell_data["Area"])
 
-        surface_normals = np.array(mesh_stl.cell_normals, dtype=np.float32)
 
-        self.stl_vertices = torch.from_numpy(np.float32(stl_vertices)).to(self.device)
-        self.stl_centers = torch.from_numpy(np.float32(stl_centers)).to(self.device)
-        self.surface_areas = torch.from_numpy(np.float32(surface_areas)).to(self.device)
-        self.stl_normals = -1.0 * torch.from_numpy(np.float32(surface_normals)).to(
-            self.device
-        )
-        self.mesh_indices_flattened = torch.from_numpy(
-            np.int32(mesh_indices_flattened)
-        ).to(self.device)
-        self.length_scale = length_scale
-        self.mesh_stl = mesh_stl
-
-    def read_stl_trimesh(
-        self, stl_vertices, stl_faces, stl_centers, surface_normals, surface_areas
-    ):
-        mesh_indices_flattened = stl_faces.flatten()
-        length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0))
-        self.stl_vertices = torch.from_numpy(stl_vertices).to(self.device)
-        self.stl_centers = torch.from_numpy(stl_centers).to(self.device)
-        self.stl_normals = -1.0 * torch.from_numpy(surface_normals).to(self.device)
-        self.surface_areas = torch.from_numpy(surface_areas).to(self.device)
-        self.mesh_indices_flattened = torch.from_numpy(
-            np.int32(mesh_indices_flattened)
-        ).to(self.device)
-        self.length_scale = length_scale
-
-    def get_num_variables(self):
-        volume_variable_names = list(self.cfg.variables.volume.solution.keys())
-        num_vol_vars = 0
-        for j in volume_variable_names:
-            if self.cfg.variables.volume.solution[j] == "vector":
-                num_vol_vars += 3
-            else:
-                num_vol_vars += 1
-
-        surface_variable_names = list(self.cfg.variables.surface.solution.keys())
-        num_surf_vars = 0
-        for j in surface_variable_names:
-            if self.cfg.variables.surface.solution[j] == "vector":
-                num_surf_vars += 3
-            else:
-                num_surf_vars += 1
-
-        num_global_features = 0
-        global_params_names = list(cfg.variables.global_parameters.keys())
-        for param in global_params_names:
-            if cfg.variables.global_parameters[param].type == "vector":
-                num_global_features += len(
-                    cfg.variables.global_parameters[param].reference
+    Args:
+        stl_coordinates: The coordinates of the STL mesh.
+        stl_faces: The faces of the STL mesh.
+        model: The model to use for inference.
+        datapipe: The datapipe to use for preprocessing.
+        batch_size: The batch size to use for inference.
+        total_points: The total number of points to process.
+        gpu_handle: The GPU handle to use for inference.
+        logger: The logger to use for logging.
+    """
+    device = stl_coordinates.device
+    batch_start_time = time.perf_counter()
+    ######################################################
+    # The IO only reads in "stl_faces" and "stl_coordinates".
+    # "stl_areas" and "stl_centers" would be computed by
+    # pyvista on CPU - instead, we do it on the GPU
+    # right here.
+    ######################################################
+
+    # Center is a mean of the 3 vertices
+    triangle_vertices = stl_coordinates[stl_faces.reshape((-1, 3))]
+    stl_centers = triangle_vertices.mean(dim=-1)
+    ######################################################
+    # Area we compute from the cross product of two sides:
+    ######################################################
+    d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0]
+    d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0]
+    stl_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
+    normals_norm = torch.linalg.norm(stl_mesh_normals, dim=1)
+    stl_mesh_normals = stl_mesh_normals / normals_norm.unsqueeze(1)
+    stl_areas = 0.5 * normals_norm
+
+    ######################################################
+    # For computing the points, we take those stl objects,
+    # sample in chunks of `batch_size` until we've
+    # accumulated `total_points` predictions.
+    ######################################################
+
+    batch_output_dict = {}
+    N = 2
+    total_points_processed = 0
+
+    # Use these lists to build up the output tensors:
+    surface_results = []
+    volume_results = []
+
+    while total_points_processed < total_points:
+        inner_loop_start_time = time.perf_counter()
+
+        ######################################################
+        # Create the dictionary as the preprocessing expects:
+        ######################################################
+        inference_dict = {
+            "stl_coordinates": stl_coordinates,
+            "stl_faces": stl_faces,
+            "stl_centers": stl_centers,
+            "stl_areas": stl_areas,
+        }
+
+        # If the surface data is part of the model, sample the surface:
+
+        if datapipe.model_type == "surface" or datapipe.model_type == "combined":
+            ######################################################
+            # This function will sample points on the STL surface
+            ######################################################
+            sampled_points, sampled_faces, sampled_areas, sampled_normals = (
+                sample_points_on_mesh(
+                    stl_coordinates,
+                    stl_faces,
+                    batch_size,
+                    mesh_normals=stl_mesh_normals,
+                    mesh_areas=stl_areas,
                 )
-            elif cfg.variables.global_parameters[param].type == "scalar":
-                num_global_features += 1
-            else:
-                raise ValueError(f"Unknown global parameter type")
-
-        return num_vol_vars, num_surf_vars, num_global_features
-
-    def initialize_model(self, model_path):
-        model = (
-            DoMINO(
-                input_features=3,
-                output_features_vol=self.num_vol_vars,
-                output_features_surf=self.num_surf_vars,
-                global_features=self.num_global_features,
-                model_parameters=self.cfg.model,
             )
-            .to(self.device)
-            .eval()
-        )
-        model = torch.compile(model, disable=True)
-
-        checkpoint_iter = torch.load(
-            to_absolute_path(model_path), map_location=self.dist.device
-        )
-
-        model.load_state_dict(checkpoint_iter)
-
-        if self.dist is not None:
-            if self.dist.world_size > 1:
-                model = DistributedDataParallel(
-                    model,
-                    device_ids=[self.dist.local_rank],
-                    output_device=self.dist.device,
-                    broadcast_buffers=self.dist.broadcast_buffers,
-                    find_unused_parameters=self.dist.find_unused_parameters,
-                    gradient_as_bucket_view=True,
-                    static_graph=True,
-                )
-
-        self.model = model
-        self.vol_factors = self.load_volume_scaling_factors()
-        self.surf_factors = self.load_surface_scaling_factors()
-        self.load_bounding_box()
 
-    def set_stream_velocity(self, stream_velocity):
-        self.stream_velocity = torch.full(
-            (1, 1), stream_velocity, dtype=torch.float32
-        ).to(self.device)
+            inference_dict["surface_mesh_centers"] = sampled_points
+            inference_dict["surface_normals"] = sampled_normals
+            inference_dict["surface_areas"] = sampled_areas
+            inference_dict["surface_faces"] = sampled_faces
 
-    def set_stencil_size(self, stencil_size):
-        self.stencil_size = stencil_size
+        # If the volume data is part of the model, sample the volume:
+        if datapipe.model_type == "volume" or datapipe.model_type == "combined":
+            ######################################################
+            # Build up volume points too with uniform sampling
+            # TODO - this doesn't filter points that are
+            # internal to the mesh
+            ######################################################
+            c_min = datapipe.config.bounding_box_dims[1]
+            c_max = datapipe.config.bounding_box_dims[0]
 
-    def set_air_density(self, air_density):
-        self.air_density = torch.full((1, 1), air_density, dtype=torch.float32).to(
-            self.device
-        )
+            sampled_volume_points = (c_max - c_min) * torch.rand(
+                batch_size, 3, device=device, dtype=torch.float32
+            ) + c_min
 
-    def set_stl_path(self, filename):
-        self.stl_path = filename
-
-    @torch.no_grad()
-    def compute_geo_encoding(self, cached_geom_path=None):
-        start_time = time.time()
-
-        if not self.cached_geo_encoding:
-            (
-                surface_vertices,
-                grid,
-                sdf_grid,
-                max_min,
-                s_grid,
-                surf_sdf_grid,
-                surf_max_min,
-                center_of_mass,
-            ) = self.ifp.process_surface_mesh(
-                self.bounding_box_min_max, self.bounding_box_surface_min_max
-            )
-            if self.bounding_box_min_max is None:
-                self.bounding_box_min_max = max_min
-            if self.bounding_box_surface_min_max is None:
-                self.bounding_box_surface_min_max = surf_max_min
-            self.center_of_mass = center_of_mass
-            self.grid = grid
-            self.s_grid = s_grid
-            self.sdf_grid = sdf_grid
-            self.surf_sdf_grid = surf_sdf_grid
-            self.out_dict["sdf"] = sdf_grid
-
-            geo_encoding, geo_encoding_surface = self.calculate_geometry_encoding(
-                surface_vertices, grid, sdf_grid, s_grid, surf_sdf_grid, self.model
-            )
+            inference_dict["volume_mesh_centers"] = (sampled_volume_points,)
+
+        ######################################################
+        # Pre-process the data with the datapipe:
+        ######################################################
+        preprocessed_data = datapipe.process_data(inference_dict)
+
+        if datapipe.model_type == "volume" or datapipe.model_type == "combined":
+            ######################################################
+            # Use the sign of the volume SDF to filter out points
+            # That are inside the STL mesh
+            ######################################################
+            sdf_nodes = preprocessed_data["sdf_nodes"]
+            valid_volume_idx = sdf_nodes > 0
+            preprocessed_data["volume_mesh_centers"] = preprocessed_data[
+                "volume_mesh_centers"
+            ][valid_volume_idx]
+
+        ######################################################
+        # Add a batch dimension to the data_dict
+        # (normally this is added in __getitem__ of the datapipe)
+        ######################################################
+        preprocessed_data = {k: v.unsqueeze(0) for k, v in preprocessed_data.items()}
+
+        ######################################################
+        # Forward pass through the model:
+        ######################################################
+        with torch.no_grad():
+            output_vol, output_surf = model(preprocessed_data)
+
+        ######################################################
+        # unnormalize the outputs with the datapipe
+        # Whatever settings are configured for normalizing the
+        # output fields - even though we don't have ground
+        # truth here - are reused to undo that for the predictions
+        ######################################################
+        output_vol, output_surf = datapipe.unscale_model_outputs(
+            output_vol, output_surf
+        )
+
+        surface_results.append(output_surf)
+        volume_results.append(output_vol)
+
+        total_points_processed += batch_size
+
+        current_loop_time = time.perf_counter()
+
+        logging_string = f"Device {device} processed {total_points_processed} points of {total_points}\n"
+        if gpu_handle is not None:
+            gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+            gpu_memory_used = gpu_info.used / (1024**3)
+            logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
+
+        logging_string += f"  Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n"
+        logging_string += f"  iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n"
+        logging_string += f"  Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second.\n"
+
+        if logger is not None:
+            logger.info(logging_string)
         else:
-            out_dict_cached = torch.load(cached_geom_path, map_location=self.device)
-            self.bounding_box_min_max = out_dict_cached["bounding_box_min_max"]
-            self.grid = out_dict_cached["grid"]
-            self.sdf_grid = out_dict_cached["sdf_grid"]
-            self.center_of_mass = out_dict_cached["com"]
-            geo_encoding = out_dict_cached["geo_encoding"]
-            geo_encoding_surface = out_dict_cached["geo_encoding_surface"]
-            self.out_dict["sdf"] = self.sdf_grid
-        torch.cuda.synchronize()
-        print("Time taken for geo encoding = %f" % (time.time() - start_time))
-
-        self.geometry_encoding = geo_encoding
-        self.geometry_encoding_surface = geo_encoding_surface
-
-    def compute_forces(self):
-        pressure = self.out_dict["pressure_surface"]
-        wall_shear = self.out_dict["wall-shear-stress"]
-        # sampling_indices = self.out_dict["sampling_indices"]
-
-        surface_normals = self.stl_normals[self.sampling_indices]
-        surface_areas = self.surface_areas[self.sampling_indices]
-
-        drag_force = torch.sum(
-            pressure[0, :, 0] * surface_normals[:, 0] * surface_areas
-            - wall_shear[0, :, 0] * surface_areas
-        )
-        lift_force = torch.sum(
-            pressure[0, :, 0] * surface_normals[:, 2] * surface_areas
-            - wall_shear[0, :, 2] * surface_areas
-        )
+            print(logging_string)
+
+    ######################################################
+    # Here at the end, get the values for the stl centers
+    # by updating the previous inference dict
+    # Only do this if the surface is part of the computation
+    # Comments are shorter here - it's a condensed version
+    # of the above logic.
+    ######################################################
+    if datapipe.model_type == "surface" or datapipe.model_type == "combined":
+        stl_inference_dict = {
+            "stl_coordinates": stl_coordinates,
+            "stl_faces": stl_faces,
+            "stl_centers": stl_centers,
+            "stl_areas": stl_areas,
+        }
+        inference_dict["surface_mesh_centers"] = stl_centers
+        inference_dict["surface_normals"] = stl_mesh_normals
+        inference_dict["surface_areas"] = stl_areas
+        inference_dict["surface_faces"] = stl_faces
+
+        # Just reuse the previous volume samples here if needed:
+        if datapipe.model_type == "combined":
+            inference_dict["volume_mesh_centers"] = sampled_volume_points
+
+        # Preprocess:
+        preprocessed_data = datapipe.process_data(inference_dict)
+
+        # Pull out the invalid volume points again, if needed:
+        if datapipe.model_type == "combined":
+            sdf_nodes = preprocessed_data["sdf_nodes"]
+            valid_volume_idx = sdf_nodes > 0
+            preprocessed_data["volume_mesh_centers"] = preprocessed_data[
+                "volume_mesh_centers"
+            ][valid_volume_idx]
+
+        # Run the model forward:
+        with torch.no_grad():
+            preprocessed_data = {
+                k: v.unsqueeze(0) for k, v in preprocessed_data.items()
+            }
+            _, output_surf = model(preprocessed_data)
+
+        # Unnormalize the outputs:
+        _, stl_center_results = datapipe.unscale_model_outputs(None, output_surf)
 
-        self.out_dict["drag_force"] = drag_force
-        self.out_dict["lift_force"] = lift_force
-
-    @torch.inference_mode()
-    def compute_surface_solutions(self, num_sample_points=None, plot_solutions=False):
-        total_time = 0.0
-        start_event = torch.cuda.Event(enable_timing=True)
-        end_event = torch.cuda.Event(enable_timing=True)
-
-        geo_encoding = self.geometry_encoding_surface
-        j = 0
-
-        with autocast(enabled=True):
-            start_event.record()
-            (
-                surface_mesh_centers,
-                surface_neighbors,
-                surface_normals,
-                surface_neighbors_normals,
-                surface_areas,
-                surface_neighbors_areas,
-                pos_normals_com,
-                surf_scaling_factors,
-                sampling_indices,
-            ) = self.ifp.sample_stl_points(
-                num_sample_points,
-                self.stl_centers.cpu().numpy(),
-                self.surface_areas.cpu().numpy(),
-                self.stl_normals.cpu().numpy(),
-                max_min=self.bounding_box_surface_min_max,
-                center_of_mass=self.center_of_mass,
-                stencil_size=self.stencil_size,
-            )
-            end_event.record()
-            end_event.synchronize()
-            cur_time = start_event.elapsed_time(end_event) / 1000.0
-            print(f"sample_points_in_surface time (s): {cur_time:.4f}")
-            # vol_coordinates_all.append(volume_mesh_centers)
-            surface_coordinates_all = surface_mesh_centers
-
-            inner_time = time.time()
-            start_event.record()
-            if num_sample_points == None:
-                point_batch_size = 512_000
-                num_points = surface_coordinates_all.shape[1]
-                subdomain_points = int(np.floor(num_points / point_batch_size))
-                surface_solutions = torch.zeros(1, num_points, self.num_surf_vars).to(
-                    self.device
-                )
-                for p in range(subdomain_points + 1):
-                    start_idx = p * point_batch_size
-                    end_idx = (p + 1) * point_batch_size
-                    surface_solutions_batch = self.compute_solution_on_surface(
-                        geo_encoding,
-                        surface_mesh_centers[:, start_idx:end_idx],
-                        surface_neighbors[:, start_idx:end_idx],
-                        surface_normals[:, start_idx:end_idx],
-                        surface_neighbors_normals[:, start_idx:end_idx],
-                        surface_areas[:, start_idx:end_idx],
-                        surface_neighbors_areas[:, start_idx:end_idx],
-                        pos_normals_com[:, start_idx:end_idx],
-                        self.s_grid,
-                        self.model,
-                        inlet_velocity=self.stream_velocity,
-                        air_density=self.air_density,
-                    )
-                    surface_solutions[:, start_idx:end_idx] = surface_solutions_batch
-            else:
-                point_batch_size = 512_000
-                num_points = num_sample_points
-                subdomain_points = int(np.floor(num_points / point_batch_size))
-                surface_solutions = torch.zeros(1, num_points, self.num_surf_vars).to(
-                    self.device
-                )
-                for p in range(subdomain_points + 1):
-                    start_idx = p * point_batch_size
-                    end_idx = (p + 1) * point_batch_size
-                    surface_solutions_batch = self.compute_solution_on_surface(
-                        geo_encoding,
-                        surface_mesh_centers[:, start_idx:end_idx],
-                        surface_neighbors[:, start_idx:end_idx],
-                        surface_normals[:, start_idx:end_idx],
-                        surface_neighbors_normals[:, start_idx:end_idx],
-                        surface_areas[:, start_idx:end_idx],
-                        surface_neighbors_areas[:, start_idx:end_idx],
-                        pos_normals_com[:, start_idx:end_idx],
-                        self.s_grid,
-                        self.model,
-                        inlet_velocity=self.stream_velocity,
-                        air_density=self.air_density,
-                    )
-                    # print(torch.amax(surface_solutions_batch, (0, 1)), torch.amin(surface_solutions_batch, (0, 1)))
-                    surface_solutions[:, start_idx:end_idx] = surface_solutions_batch
-
-            # print(surface_solutions.shape)
-            end_event.record()
-            end_event.synchronize()
-            cur_time = start_event.elapsed_time(end_event) / 1000.0
-            print(f"compute_solution time (s): {cur_time:.4f}")
-            total_time += float(time.time() - inner_time)
-            surface_solutions_all = surface_solutions
-            print(
-                "Time taken for compute solution on surface for=%f, %f"
-                % (time.time() - inner_time, torch.cuda.utilization(self.device))
-            )
-        cmax = surf_scaling_factors[0]
-        cmin = surf_scaling_factors[1]
+    else:
+        stl_center_results = None
+
+    # Stack up the results into one big tensor for surface and volume:
+    if all([s is not None for s in surface_results]):
+        surface_results = torch.cat(surface_results, dim=1)
+    if all([v is not None for v in volume_results]):
+        volume_results = torch.cat(volume_results, dim=0)
+
+    return stl_center_results, surface_results, volume_results
+
+
+def inference_epoch(
+    dataset: DrivaerMLDataset,
+    sampler: DistributedSampler,
+    datapipe: DoMINODataPipe,
+    model: DoMINO,
+    gpu_handle: int,
+    logger: PythonLogger,
+    batch_size: int = 24_000,
+    total_points: int = 1_024_000,
+):
+    ######################################################
+    # Inference can run in a distributed way by coordinating
+    # the indices for each rank, which the sampler does
+    ######################################################
+
+    # Convert the indices right to a list:
+    epoch_indices = list(sampler)
+
+    ######################################################
+    # Assuming here there are more than two target meshes
+    # This will get the IO pipe running in the background
+    # While we process a dataset.
+    ######################################################
+    dataset.preload(epoch_indices[0])
+    dataset.preload(epoch_indices[1])
+
+    for i_batch, epoch_index in enumerate(epoch_indices):
+        batch_start_time = time.perf_counter()
+        ######################################################
+        # Put another example in the preload queue while this
+        # batch is processed
+        ######################################################
+        data_loading_start = time.perf_counter()
+        if i_batch + 2 < len(epoch_indices):
+            # Preload next next
+            dataset.preload(epoch_indices[i_batch + 2])
+
+        ######################################################
+        # Get the data for this index:
+        ######################################################
+        sample_batched = dataset[epoch_index]
+        dataloading_time = time.perf_counter() - data_loading_start
+
+        logger.info(
+            f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds"
+        )
+
+        procesing_time_start = time.perf_counter()
+        stl_center_resulst, surface_results, volume_results = inference_on_single_stl(
+            sample_batched["stl_coordinates"],
+            sample_batched["stl_faces"],
+            model,
+            datapipe,
+            batch_size,
+            total_points,
+            gpu_handle,
+            logger,
+        )
+
+        ######################################################
+        # Peel off pressure, velocity, nut, shear, etc.
+        # Also compute drag, lift forces.
+        ######################################################
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+
+        procesing_time_end = time.perf_counter()
+        logger.info(
+            f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds"
+        )
+
+        output_start_time = time.perf_counter()
+        ######################################################
+        # Save the outputs to file:
+        ######################################################
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        # TODO
+        output_end_time = time.perf_counter()
+        logger.info(
+            f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds"
+        )
+
+
+@hydra.main(version_base="1.3", config_path="conf", config_name="config")
+def main(cfg: DictConfig) -> None:
+    ######################################################
+    # initialize distributed manager
+    ######################################################
+    DistributedManager.initialize()
+    dist = DistributedManager()
 
-        surface_coordinates_all = torch.reshape(
-            surface_coordinates_all, (1, num_points, 3)
-        )
-        surface_solutions_all = torch.reshape(surface_solutions_all, (1, num_points, 4))
+    ######################################################
+    # Initialize NVML
+    ######################################################
+    nvmlInit()
+    gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index)
 
-        if self.surf_factors is not None:
-            surface_solutions_all = unnormalize(
-                surface_solutions_all, self.surf_factors[0], self.surf_factors[1]
-            )
+    ######################################################
+    # Initialize logger
+    ######################################################
 
-        self.out_dict["surface_coordinates"] = (
-            0.5 * (surface_coordinates_all + 1.0) * (cmax - cmin) + cmin
-        )
-        self.out_dict["pressure_surface"] = (
-            surface_solutions_all[:, :, :1]
-            * self.stream_velocity**2.0
-            * self.air_density
-        )
-        self.out_dict["wall-shear-stress"] = (
-            surface_solutions_all[:, :, 1:4]
-            * self.stream_velocity**2.0
-            * self.air_density
-        )
-        self.sampling_indices = sampling_indices
-
-    @torch.inference_mode()
-    def compute_volume_solutions(self, num_sample_points, plot_solutions=False):
-        total_time = 0.0
-        start_event = torch.cuda.Event(enable_timing=True)
-        end_event = torch.cuda.Event(enable_timing=True)
-
-        geo_encoding = self.geometry_encoding
-        j = 0
-
-        # Compute volume
-        point_batch_size = 512_000
-        num_points = num_sample_points
-        subdomain_points = int(np.floor(num_points / point_batch_size))
-        volume_solutions = torch.zeros(1, num_points, self.num_vol_vars).to(self.device)
-        volume_coordinates = torch.zeros(1, num_points, 3).to(self.device)
-
-        for p in range(subdomain_points + 1):
-            start_idx = p * point_batch_size
-            end_idx = (p + 1) * point_batch_size
-            if end_idx > num_points:
-                point_batch_size = num_points - start_idx
-                end_idx = num_points
-
-            with autocast(enabled=True):
-                inner_time = time.time()
-                start_event.record()
-                (
-                    volume_mesh_centers,
-                    pos_normals_com,
-                    pos_normals_closest,
-                    sdf_nodes,
-                    scaling_factors,
-                ) = self.ifp.sample_points_in_volume(
-                    num_points_vol=point_batch_size,
-                    max_min=self.bounding_box_min_max,
-                    center_of_mass=self.center_of_mass,
-                )
-                end_event.record()
-                end_event.synchronize()
-                cur_time = start_event.elapsed_time(end_event) / 1000.0
-                print(f"sample_points_in_volume time (s): {cur_time:.4f}")
-
-                volume_coordinates[:, start_idx:end_idx] = volume_mesh_centers
-
-                start_event.record()
-
-                volume_solutions_batch = self.compute_solution_in_volume(
-                    geo_encoding,
-                    volume_mesh_centers,
-                    sdf_nodes,
-                    pos_normals_closest,
-                    pos_normals_com,
-                    self.grid,
-                    self.model,
-                    use_sdf_basis=self.cfg.model.use_sdf_in_basis_func,
-                    inlet_velocity=self.stream_velocity,
-                    air_density=self.air_density,
-                )
-                volume_solutions[:, start_idx:end_idx] = volume_solutions_batch
-                end_event.record()
-                end_event.synchronize()
-                cur_time = start_event.elapsed_time(end_event) / 1000.0
-                print(f"compute_solution time (s): {cur_time:.4f}")
-                total_time += float(time.time() - inner_time)
-                # volume_solutions_all = volume_solutions
-                print(
-                    "Time taken for compute solution in volume for =%f"
-                    % (time.time() - inner_time)
-                )
-                # print("Points processed:", end_idx)
-            print("Total time measured = %f" % total_time)
-            print("Points processed:", end_idx)
+    logger = PythonLogger("Train")
+    logger = RankZeroLoggingWrapper(logger, dist)
 
-        cmax = scaling_factors[0]
-        cmin = scaling_factors[1]
-        volume_coordinates_all = volume_coordinates
-        volume_solutions_all = volume_solutions
+    logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
 
-        cmax = scaling_factors[0]
-        cmin = scaling_factors[1]
+    ######################################################
+    # Get scaling factors
+    # Likely, you want to reuse the scaling factors from training.
+    ######################################################
+    pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
 
-        volume_coordinates_all = torch.reshape(
-            volume_coordinates_all, (1, num_sample_points, 3)
-        )
-        volume_solutions_all = torch.reshape(
-            volume_solutions_all, (1, num_sample_points, self.num_vol_vars)
+    try:
+        scaling_factors = ScalingFactors.load(pickle_path)
+        logger.info(f"Scaling factors loaded from: {pickle_path}")
+    except FileNotFoundError:
+        raise FileNotFoundError(
+            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
         )
 
-        if self.vol_factors is not None:
-            volume_solutions_all = unnormalize(
-                volume_solutions_all, self.vol_factors[0], self.vol_factors[1]
-            )
-
-        self.out_dict["coordinates"] = (
-            0.5 * (volume_coordinates_all + 1.0) * (cmax - cmin) + cmin
-        )
-        self.out_dict["velocity"] = (
-            volume_solutions_all[:, :, :3] * self.stream_velocity
-        )
-        self.out_dict["pressure"] = (
-            volume_solutions_all[:, :, 3:4]
-            * self.stream_velocity**2.0
-            * self.air_density
-        )
-        # self.out_dict["turbulent-kinetic-energy"] = (
-        #     volume_solutions_all[:, :, 4:5]
-        #     * self.stream_velocity**2.0
-        #     * self.air_density
-        # )
-        # self.out_dict["turbulent-viscosity"] = (
-        #     volume_solutions_all[:, :, 5:] * self.stream_velocity * self.length_scale
-        # )
-        self.out_dict["bounding_box_dims"] = torch.vstack(self.bounding_box_min_max)
-
-        if plot_solutions:
-            print("Plotting solutions")
-            plot_save_path = os.path.join(self.cfg.output, "plots/contours/")
-            create_directory(plot_save_path)
-
-            p_grid = 0.5 * (self.grid + 1.0) * (cmax - cmin) + cmin
-            p_grid = p_grid.cpu().numpy()
-            sdf_grid = self.sdf_grid.cpu().numpy()
-            volume_coordinates_all = (
-                0.5 * (volume_coordinates_all + 1.0) * (cmax - cmin) + cmin
-            )
-            volume_solutions_all[:, :, :3] = (
-                volume_solutions_all[:, :, :3] * self.stream_velocity
-            )
-            volume_solutions_all[:, :, 3:4] = (
-                volume_solutions_all[:, :, 3:4]
-                * self.stream_velocity**2.0
-                * self.air_density
-            )
-            # volume_solutions_all[:, :, 4:5] = (
-            #     volume_solutions_all[:, :, 4:5]
-            #     * self.stream_velocity**2.0
-            #     * self.air_density
-            # )
-            # volume_solutions_all[:, :, 5] = (
-            #     volume_solutions_all[:, :, 5] * self.stream_velocity * self.length_scale
-            # )
-            volume_coordinates_all = volume_coordinates_all.cpu().numpy()
-            volume_solutions_all = volume_solutions_all.cpu().numpy()
-
-            # ND interpolation on a grid
-            prediction_grid = nd_interpolator(
-                volume_coordinates_all, volume_solutions_all[0], p_grid[0]
-            )
-            nx, ny, nz, vars = prediction_grid.shape
-            idx = np.where(sdf_grid[0] < 0.0)
-            prediction_grid[idx] = float("inf")
-            axes_titles = ["y/4 plane", "y/2 plane"]
-
-            plot(
-                prediction_grid[:, int(ny / 4), :, 0],
-                prediction_grid[:, int(ny / 2), :, 0],
-                var="x-vel",
-                save_path=plot_save_path + f"x-vel-midplane_{self.stream_velocity}.png",
-                axes_titles=axes_titles,
-                plot_error=False,
-            )
-            plot(
-                prediction_grid[:, int(ny / 4), :, 1],
-                prediction_grid[:, int(ny / 2), :, 1],
-                var="y-vel",
-                save_path=plot_save_path + f"y-vel-midplane_{self.stream_velocity}.png",
-                axes_titles=axes_titles,
-                plot_error=False,
-            )
-            plot(
-                prediction_grid[:, int(ny / 4), :, 2],
-                prediction_grid[:, int(ny / 2), :, 2],
-                var="z-vel",
-                save_path=plot_save_path + f"z-vel-midplane_{self.stream_velocity}.png",
-                axes_titles=axes_titles,
-                plot_error=False,
-            )
-            plot(
-                prediction_grid[:, int(ny / 4), :, 3],
-                prediction_grid[:, int(ny / 2), :, 3],
-                var="pres",
-                save_path=plot_save_path + f"pres-midplane_{self.stream_velocity}.png",
-                axes_titles=axes_titles,
-                plot_error=False,
-            )
-            # plot(
-            #     prediction_grid[:, int(ny / 4), :, 4],
-            #     prediction_grid[:, int(ny / 2), :, 4],
-            #     var="tke",
-            #     save_path=plot_save_path + f"tke-midplane_{self.stream_velocity}.png",
-            #     axes_titles=axes_titles,
-            #     plot_error=False,
-            # )
-            # plot(
-            #     prediction_grid[:, int(ny / 4), :, 5],
-            #     prediction_grid[:, int(ny / 2), :, 5],
-            #     var="nut",
-            #     save_path=plot_save_path + f"nut-midplane_{self.stream_velocity}.png",
-            #     axes_titles=axes_titles,
-            #     plot_error=False,
-            # )
-
-    def cold_start(self, cached_geom_path=None):
-        print("Cold start")
-        self.compute_geo_encoding(cached_geom_path)
-        self.compute_volume_solutions(num_sample_points=10)
-        self.clear_out_dict()
-
-    @torch.no_grad()
-    def calculate_geometry_encoding(
-        self, geo_centers, p_grid, sdf_grid, s_grid, sdf_surf_grid, model
-    ):
-        vol_min = self.bounding_box_min_max[0]
-        vol_max = self.bounding_box_min_max[1]
-        surf_min = self.bounding_box_surface_min_max[0]
-        surf_max = self.bounding_box_surface_min_max[1]
-
-        geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
-        if self.dist.world_size == 1:
-            encoding_g_vol = model.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid)
-        else:
-            encoding_g_vol = model.module.geo_rep_volume(
-                geo_centers_vol, p_grid, sdf_grid
-            )
+    vol_factors = scaling_factors.mean["volume_fields"]
+    surf_factors = scaling_factors.mean["surface_fields"]
 
-        geo_centers_surf = 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1
+    ######################################################
+    # Configure the model
+    ######################################################
+    model_type = cfg.model.model_type
+    num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type)
 
-        if self.dist.world_size == 1:
-            encoding_g_surf = model.geo_rep_surface(
-                geo_centers_surf, s_grid, sdf_surf_grid
-            )
-        else:
-            encoding_g_surf = model.module.geo_rep_surface(
-                geo_centers_surf, s_grid, sdf_surf_grid
-            )
-
-        if self.dist.world_size == 1:
-            encoding_g_surf1 = model.geo_rep_surface1(
-                geo_centers_surf, s_grid, sdf_surf_grid
-            )
-        else:
-            encoding_g_surf1 = model.module.geo_rep_surface1(
-                geo_centers_surf, s_grid, sdf_surf_grid
-            )
-
-        geo_encoding = 0.5 * encoding_g_surf1 + 0.5 * encoding_g_vol
-        geo_encoding_surface = 0.5 * encoding_g_surf
-        return geo_encoding, geo_encoding_surface
-
-    @torch.no_grad()
-    def compute_solution_on_surface(
-        self,
-        geo_encoding,
-        surface_mesh_centers,
-        surface_mesh_neighbors,
-        surface_normals,
-        surface_neighbors_normals,
-        surface_areas,
-        surface_neighbors_areas,
-        pos_normals_com,
-        s_grid,
-        model,
-        inlet_velocity,
-        air_density,
-    ):
-        """
-        Global parameters: For this particular case, the model was trained on single velocity/density values
-        across all simulations. Hence, global_params_values and global_params_reference are the same.
-        """
-        global_params_values = torch.cat(
-            (inlet_velocity, air_density), axis=1
-        )  # (1, 2)
-        global_params_values = torch.unsqueeze(global_params_values, -1)  # (1, 2, 1)
-
-        global_params_reference = torch.cat(
-            (inlet_velocity, air_density), axis=1
-        )  # (1, 2)
-        global_params_reference = torch.unsqueeze(
-            global_params_reference, -1
-        )  # (1, 2, 1)
-
-        if self.dist.world_size == 1:
-            geo_encoding_local = model.geo_encoding_local(
-                geo_encoding, surface_mesh_centers, s_grid, mode="surface"
-            )
-        else:
-            geo_encoding_local = model.module.geo_encoding_local(
-                geo_encoding, surface_mesh_centers, s_grid, mode="surface"
-            )
+    if model_type == "combined" or model_type == "surface":
+        surface_variable_names = list(cfg.variables.surface.solution.keys())
+    else:
+        surface_variable_names = []
 
-        pos_encoding = pos_normals_com
-        surface_areas = torch.unsqueeze(surface_areas, -1)
-        surface_neighbors_areas = torch.unsqueeze(surface_neighbors_areas, -1)
-
-        if self.dist.world_size == 1:
-            pos_encoding = model.position_encoder(pos_encoding, eval_mode="surface")
-            tpredictions_batch = model.calculate_solution_with_neighbors(
-                surface_mesh_centers,
-                geo_encoding_local,
-                pos_encoding,
-                surface_mesh_neighbors,
-                surface_normals,
-                surface_neighbors_normals,
-                surface_areas,
-                surface_neighbors_areas,
-                global_params_values,
-                global_params_reference,
-            )
-        else:
-            pos_encoding = model.module.position_encoder(
-                pos_encoding, eval_mode="surface"
-            )
-            tpredictions_batch = model.module.calculate_solution_with_neighbors(
-                surface_mesh_centers,
-                geo_encoding_local,
-                pos_encoding,
-                surface_mesh_neighbors,
-                surface_normals,
-                surface_neighbors_normals,
-                surface_areas,
-                surface_neighbors_areas,
-                global_params_values,
-                global_params_reference,
-            )
+    if model_type == "combined" or model_type == "volume":
+        volume_variable_names = list(cfg.variables.volume.solution.keys())
+    else:
+        volume_variable_names = []
+
+    ######################################################
+    # Check that the sample size is equal.
+    # unequal samples could be done but they aren't, here.s
+    ######################################################
+    if cfg.model.model_type == "combined":
+        if cfg.model.volume_points_sample != cfg.model.surface_points_sample:
+            raise ValueError(
+                "Volume and surface points sample must be equal for combined model"
+            )
+
+    # Get the number of sample points:
+    sample_points = (
+        cfg.model.surface_points_sample
+        if cfg.model.model_type == "surface"
+        else cfg.model.volume_points_sample
+    )
 
-        return tpredictions_batch
-
-    @torch.no_grad()
-    def compute_solution_in_volume(
-        self,
-        geo_encoding,
-        volume_mesh_centers,
-        sdf_nodes,
-        pos_enc_closest,
-        pos_normals_com,
-        p_grid,
-        model,
-        use_sdf_basis,
-        inlet_velocity,
-        air_density,
-    ):
-        ## Global parameters
-        global_params_values = torch.cat(
-            (inlet_velocity, air_density), axis=1
-        )  # (1, 2)
-        global_params_values = torch.unsqueeze(global_params_values, -1)  # (1, 2, 1)
-
-        global_params_reference = torch.cat(
-            (inlet_velocity, air_density), axis=1
-        )  # (1, 2)
-        global_params_reference = torch.unsqueeze(
-            global_params_reference, -1
-        )  # (1, 2, 1)
-
-        if self.dist.world_size == 1:
-            geo_encoding_local = model.geo_encoding_local(
-                geo_encoding, volume_mesh_centers, p_grid, mode="volume"
-            )
-        else:
-            geo_encoding_local = model.module.geo_encoding_local(
-                geo_encoding, volume_mesh_centers, p_grid, mode="volume"
-            )
-        if use_sdf_basis:
-            pos_encoding = torch.cat(
-                (sdf_nodes, pos_enc_closest, pos_normals_com), axis=-1
-            )
-        else:
-            pos_encoding = pos_normals_com
-
-        if self.dist.world_size == 1:
-            pos_encoding = model.position_encoder(pos_encoding, eval_mode="volume")
-            tpredictions_batch = model.calculate_solution(
-                volume_mesh_centers,
-                geo_encoding_local,
-                pos_encoding,
-                global_params_values,
-                global_params_reference,
-                num_sample_points=self.stencil_size,
-                eval_mode="volume",
-            )
-        else:
-            pos_encoding = model.module.position_encoder(
-                pos_encoding, eval_mode="volume"
-            )
-            tpredictions_batch = model.module.calculate_solution(
-                volume_mesh_centers,
-                geo_encoding_local,
-                pos_encoding,
-                global_params_values,
-                global_params_reference,
-                num_sample_points=self.stencil_size,
-                eval_mode="volume",
-            )
-        return tpredictions_batch
+    ######################################################
+    # If the batch size doesn't evenly divide
+    # the num points, that's ok.  But print a warning
+    # that the total points will get tweaked.
+    ######################################################
+    if cfg.eval.num_points % sample_points != 0:
+        logger.warning(
+            f"Batch size {sample_points} doesn't evenly divide num points {cfg.eval.num_points}."
+        )
+        logger.warning(
+            f"Total points will be rounded up to {((cfg.eval.num_points // sample_points) + 1) * sample_points}."
+        )
+
+    ######################################################
+    # Configure the dataset
+    # We are applying preprocessing in a separate step
+    # for this - so the dataset and datapipe are separate
+    ######################################################
+
+    # Override the model type
+    # For the inference pipeline, we adjust the tooling a little for the data.
+    # We use only a bare STL dataset that will read the mesh coordinates
+    # and triangle definitions.  We'll compute the centers and normals
+    # on the GPU (instead of on the CPU, as pyvista would do) and
+    # then we can sample from that mesh on the GPU.
+    test_dataset = DrivaerMLDataset(
+        data_dir=cfg.eval.test_path,
+        keys_to_read=[
+            "stl_coordinates",
+            "stl_faces",
+        ],
+        output_device=dist.device,
+    )
 
+    # Volumetric data will be generated on the fly on the GPU.
+
+    ######################################################
+    # Configure the datapipe
+    # We _won't_ iterate over the datapipe, however, we can use the
+    # datapipe processing tools on the sampled surface and
+    # volume points with the same preprocessing.
+    # It also is used to un-normalize the model outputs.
+    ######################################################
+    overrides = {}
+    if hasattr(cfg.data, "gpu_preprocessing"):
+        overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing
+
+    if hasattr(cfg.data, "gpu_output"):
+        overrides["gpu_output"] = cfg.data.gpu_output
+
+    test_datapipe = DoMINODataPipe(
+        None,
+        phase="test",
+        grid_resolution=cfg.model.interp_res,
+        volume_variables=volume_variable_names,
+        surface_variables=surface_variable_names,
+        normalize_coordinates=True,
+        sampling=False,
+        sample_in_bbox=True,
+        volume_points_sample=None,
+        surface_points_sample=None,
+        geom_points_sample=None,
+        positional_encoding=cfg.model.positional_encoding,
+        volume_factors=vol_factors,
+        surface_factors=surf_factors,
+        scaling_type=cfg.model.normalization,
+        model_type=model_type,
+        bounding_box_dims=cfg.data.bounding_box,
+        bounding_box_dims_surf=cfg.data.bounding_box_surface,
+        num_surface_neighbors=cfg.model.num_neighbors_surface,
+        resample_surfaces=cfg.model.resampling_surface_mesh.resample,
+        resampling_points=cfg.model.resampling_surface_mesh.points,
+        surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
+        **overrides,
+    )
 
-if __name__ == "__main__":
-    OmegaConf.register_new_resolver("eval", eval)
-    with initialize(version_base="1.3", config_path="conf"):
-        cfg = compose(config_name="config")
+    ######################################################
+    # The sampler is used in multi-gpu inference to
+    # coordinate the batches used for each rank.
+    ######################################################
+    test_sampler = DistributedSampler(
+        test_dataset,
+        num_replicas=dist.world_size,
+        rank=dist.rank,
+        **cfg.train.sampler,
+    )
 
-    DistributedManager.initialize()
-    dist = DistributedManager()
+    ######################################################
+    # Configure the model
+    # and move it to the device.
+    ######################################################
+    model = DoMINO(
+        input_features=3,
+        output_features_vol=num_vol_vars,
+        output_features_surf=num_surf_vars,
+        global_features=num_global_features,
+        model_parameters=cfg.model,
+    ).to(dist.device)
+    # model = torch.compile(model, fullgraph=True, dynamic=True)  # TODO make this configurable
+
+    # Print model summary (structure and parmeter count).
+    logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n")
 
     if dist.world_size > 1:
         torch.distributed.barrier()
 
-    input_path = cfg.eval.test_path
-    dirnames = get_filenames(input_path)
-    dev_id = torch.cuda.current_device()
-    num_files = int(len(dirnames) / 8)
-    dirnames_per_gpu = dirnames[int(num_files * dev_id) : int(num_files * (dev_id + 1))]
-
-    domino = dominoInference(cfg, dist, False)
-    domino.initialize_model(
-        model_path="/lustre/models/DoMINO.0.7.pt"
-    )  ## Replace the model path with location of the trained model
-
-    for count, dirname in enumerate(dirnames_per_gpu):
-        # print(f"Processing file {dirname}")
-        filepath = os.path.join(input_path, dirname)
-
-        STREAM_VELOCITY = 30.0
-        AIR_DENSITY = 1.205
-
-        # Neighborhood points sampled for evaluation, tradeoff between accuracy and speed
-        STENCIL_SIZE = (
-            7  # Higher stencil size -> more accuracy but more evaluation time
-        )
-
-        domino.set_stl_path(filepath)
-        domino.set_stream_velocity(STREAM_VELOCITY)
-        domino.set_stencil_size(STENCIL_SIZE)
-
-        domino.read_stl()
-
-        domino.initialize_data_processor()
-
-        # Calculate geometry encoding
-        domino.compute_geo_encoding()
-
-        # Calculate volume solutions
-        domino.compute_volume_solutions(
-            num_sample_points=10_256_000, plot_solutions=False
-        )
+    load_checkpoint(
+        to_absolute_path(cfg.resume_dir),
+        models=model,
+        device=dist.device,
+    )
 
-        # Calculate surface solutions
-        domino.compute_surface_solutions()
-        domino.compute_forces()
-        out_dict = domino.get_out_dict()
-
-        print(
-            "Dirname:",
-            dirname,
-            "Drag:",
-            out_dict["drag_force"],
-            "Lift:",
-            out_dict["lift_force"],
-        )
-        vtp_path = f"/lustre/snidhan/physicsnemo-work/domino-global-param-runs/stl-results/pred_{dirname}_4.vtp"
-        domino.mesh_stl.save(vtp_path)
-        reader = vtk.vtkXMLPolyDataReader()
-        reader.SetFileName(f"{vtp_path}")
-        reader.Update()
-        polydata_surf = reader.GetOutput()
-
-        surfParam_vtk = numpy_support.numpy_to_vtk(
-            out_dict["pressure_surface"][0].cpu().numpy()
-        )
-        surfParam_vtk.SetName(f"Pressure")
-        polydata_surf.GetCellData().AddArray(surfParam_vtk)
+    start_time = time.perf_counter()
+
+    # This controls what indices to use for each epoch.
+    test_sampler.set_epoch(0)
+
+    prof = Profiler()
+
+    model.eval()
+    epoch_start_time = time.perf_counter()
+    with prof:
+        inference_epoch(
+            dataset=test_dataset,
+            sampler=test_sampler,
+            datapipe=test_datapipe,
+            model=model,
+            logger=logger,
+            gpu_handle=gpu_handle,
+            batch_size=sample_points,
+            total_points=cfg.eval.num_points,
+        )
+    epoch_end_time = time.perf_counter()
+    logger.info(
+        f"Device {dist.device}, Epoch took {epoch_end_time - epoch_start_time:.3f} seconds"
+    )
 
-        surfParam_vtk = numpy_support.numpy_to_vtk(
-            out_dict["wall-shear-stress"][0].cpu().numpy()
-        )
-        surfParam_vtk.SetName(f"Wall-shear-stress")
-        polydata_surf.GetCellData().AddArray(surfParam_vtk)
 
-        write_to_vtp(polydata_surf, vtp_path)
-        exit()
+if __name__ == "__main__":
+    # Profiler().enable("torch")
+    # Profiler().initialize()
+    main()
+    # Profiler().finalize()
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
deleted file mode 100644
index 3c6acc3ccd..0000000000
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py
+++ /dev/null
@@ -1,634 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This code shows how to use a trained DoMINO model, with it's corresponding
-preprocessing pipeline, to infer values on and around an STL mesh file.
-
-This script uses the meshes from the DrivaerML dataset, however, the logic
-is largely the same.  As an overview:
-- Load the model
-- Set up the preprocessor
-- Loop over meshes
-- In each mesh, sample random points on the surface, volume, or both
-- Preprocess the points and run them through the model
-- Process the STL mesh centers, too
-- Collect the results and return
-- Save the results to file.
-"""
-
-import time
-import os
-import re
-from typing import Literal, Any
-
-import apex
-import numpy as np
-import hydra
-from hydra.utils import to_absolute_path
-from omegaconf import DictConfig, OmegaConf
-import torch
-
-DISABLE_RMM = os.environ.get("DISABLE_RMM", False)
-if not DISABLE_RMM:
-    import rmm
-    from rmm.allocators.torch import rmm_torch_allocator
-
-    rmm.reinitialize(pool_allocator=True)
-    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
-
-import torchinfo
-import torch.distributed as dist
-from torch.amp import GradScaler, autocast
-from torch.nn.parallel import DistributedDataParallel
-from torch.utils.data import DataLoader
-from torch.utils.data.distributed import DistributedSampler
-from torch.utils.tensorboard import SummaryWriter
-from nvtx import annotate as nvtx_annotate
-import torch.cuda.nvtx as nvtx
-
-from physicsnemo.distributed import DistributedManager
-from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
-from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
-
-from physicsnemo.datapipes.cae.domino_datapipe2 import (
-    DoMINODataPipe,
-    create_domino_dataset,
-)
-from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
-    DrivaerMLDataset,
-)
-
-from physicsnemo.models.domino.model import DoMINO
-from physicsnemo.utils.domino.utils import sample_points_on_mesh
-
-from utils import ScalingFactors
-
-# This is included for GPU memory tracking:
-from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
-import time
-
-
-# Initialize NVML
-nvmlInit()
-
-
-from physicsnemo.utils.profiling import profile, Profiler
-
-
-from loss import compute_loss_dict
-from utils import get_num_vars
-
-
-def inference_on_single_stl(
-    stl_coordinates: torch.Tensor,
-    stl_faces: torch.Tensor,
-    model: DoMINO,
-    datapipe: DoMINODataPipe,
-    batch_size: int,
-    total_points: int,
-    gpu_handle: int | None = None,
-    logger: PythonLogger | None = None,
-):
-    """
-    Perform model inference on a single STL mesh.
-
-    This function will take the input mesh + faces and
-    then sample the surface and volume to produce the model outputs
-    at `total_points` locations in batches of `batch_size`.
-
-
-
-    Args:
-        stl_coordinates: The coordinates of the STL mesh.
-        stl_faces: The faces of the STL mesh.
-        model: The model to use for inference.
-        datapipe: The datapipe to use for preprocessing.
-        batch_size: The batch size to use for inference.
-        total_points: The total number of points to process.
-        gpu_handle: The GPU handle to use for inference.
-        logger: The logger to use for logging.
-    """
-    device = stl_coordinates.device
-    batch_start_time = time.perf_counter()
-    ######################################################
-    # The IO only reads in "stl_faces" and "stl_coordinates".
-    # "stl_areas" and "stl_centers" would be computed by
-    # pyvista on CPU - instead, we do it on the GPU
-    # right here.
-    ######################################################
-
-    # Center is a mean of the 3 vertices
-    triangle_vertices = stl_coordinates[stl_faces.reshape((-1, 3))]
-    stl_centers = triangle_vertices.mean(dim=-1)
-    ######################################################
-    # Area we compute from the cross product of two sides:
-    ######################################################
-    d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0]
-    d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0]
-    stl_mesh_normals = torch.linalg.cross(d1, d2, dim=1)
-    normals_norm = torch.linalg.norm(stl_mesh_normals, dim=1)
-    stl_mesh_normals = stl_mesh_normals / normals_norm.unsqueeze(1)
-    stl_areas = 0.5 * normals_norm
-
-    ######################################################
-    # For computing the points, we take those stl objects,
-    # sample in chunks of `batch_size` until we've
-    # accumulated `total_points` predictions.
-    ######################################################
-
-    batch_output_dict = {}
-    N = 2
-    total_points_processed = 0
-
-    # Use these lists to build up the output tensors:
-    surface_results = []
-    volume_results = []
-
-    while total_points_processed < total_points:
-        inner_loop_start_time = time.perf_counter()
-
-        ######################################################
-        # Create the dictionary as the preprocessing expects:
-        ######################################################
-        inference_dict = {
-            "stl_coordinates": stl_coordinates,
-            "stl_faces": stl_faces,
-            "stl_centers": stl_centers,
-            "stl_areas": stl_areas,
-        }
-
-        # If the surface data is part of the model, sample the surface:
-
-        if datapipe.model_type == "surface" or datapipe.model_type == "combined":
-            ######################################################
-            # This function will sample points on the STL surface
-            ######################################################
-            sampled_points, sampled_faces, sampled_areas, sampled_normals = (
-                sample_points_on_mesh(
-                    stl_coordinates,
-                    stl_faces,
-                    batch_size,
-                    mesh_normals=stl_mesh_normals,
-                    mesh_areas=stl_areas,
-                )
-            )
-
-            inference_dict["surface_mesh_centers"] = sampled_points
-            inference_dict["surface_normals"] = sampled_normals
-            inference_dict["surface_areas"] = sampled_areas
-            inference_dict["surface_faces"] = sampled_faces
-
-        # If the volume data is part of the model, sample the volume:
-        if datapipe.model_type == "volume" or datapipe.model_type == "combined":
-            ######################################################
-            # Build up volume points too with uniform sampling
-            # TODO - this doesn't filter points that are
-            # internal to the mesh
-            ######################################################
-            c_min = datapipe.config.bounding_box_dims[1]
-            c_max = datapipe.config.bounding_box_dims[0]
-
-            sampled_volume_points = (c_max - c_min) * torch.rand(
-                batch_size, 3, device=device, dtype=torch.float32
-            ) + c_min
-
-            inference_dict["volume_mesh_centers"] = (sampled_volume_points,)
-
-        ######################################################
-        # Pre-process the data with the datapipe:
-        ######################################################
-        preprocessed_data = datapipe.process_data(inference_dict)
-
-        if datapipe.model_type == "volume" or datapipe.model_type == "combined":
-            ######################################################
-            # Use the sign of the volume SDF to filter out points
-            # That are inside the STL mesh
-            ######################################################
-            sdf_nodes = preprocessed_data["sdf_nodes"]
-            valid_volume_idx = sdf_nodes > 0
-            preprocessed_data["volume_mesh_centers"] = preprocessed_data[
-                "volume_mesh_centers"
-            ][valid_volume_idx]
-
-        ######################################################
-        # Add a batch dimension to the data_dict
-        # (normally this is added in __getitem__ of the datapipe)
-        ######################################################
-        preprocessed_data = {k: v.unsqueeze(0) for k, v in preprocessed_data.items()}
-
-        ######################################################
-        # Forward pass through the model:
-        ######################################################
-        with torch.no_grad():
-            output_vol, output_surf = model(preprocessed_data)
-
-        ######################################################
-        # unnormalize the outputs with the datapipe
-        # Whatever settings are configured for normalizing the
-        # output fields - even though we don't have ground
-        # truth here - are reused to undo that for the predictions
-        ######################################################
-        output_vol, output_surf = datapipe.unscale_model_outputs(
-            output_vol, output_surf
-        )
-
-        surface_results.append(output_surf)
-        volume_results.append(output_vol)
-
-        total_points_processed += batch_size
-
-        current_loop_time = time.perf_counter()
-
-        logging_string = f"Device {device} processed {total_points_processed} points of {total_points}\n"
-        if gpu_handle is not None:
-            gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle)
-            gpu_memory_used = gpu_info.used / (1024**3)
-            logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
-
-        logging_string += f"  Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n"
-        logging_string += f"  iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n"
-        logging_string += f"  Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second.\n"
-
-        if logger is not None:
-            logger.info(logging_string)
-        else:
-            print(logging_string)
-
-    ######################################################
-    # Here at the end, get the values for the stl centers
-    # by updating the previous inference dict
-    # Only do this if the surface is part of the computation
-    # Comments are shorter here - it's a condensed version
-    # of the above logic.
-    ######################################################
-    if datapipe.model_type == "surface" or datapipe.model_type == "combined":
-        stl_inference_dict = {
-            "stl_coordinates": stl_coordinates,
-            "stl_faces": stl_faces,
-            "stl_centers": stl_centers,
-            "stl_areas": stl_areas,
-        }
-        inference_dict["surface_mesh_centers"] = stl_centers
-        inference_dict["surface_normals"] = stl_mesh_normals
-        inference_dict["surface_areas"] = stl_areas
-        inference_dict["surface_faces"] = stl_faces
-
-        # Just reuse the previous volume samples here if needed:
-        if datapipe.model_type == "combined":
-            inference_dict["volume_mesh_centers"] = sampled_volume_points
-
-        # Preprocess:
-        preprocessed_data = datapipe.process_data(inference_dict)
-
-        # Pull out the invalid volume points again, if needed:
-        if datapipe.model_type == "combined":
-            sdf_nodes = preprocessed_data["sdf_nodes"]
-            valid_volume_idx = sdf_nodes > 0
-            preprocessed_data["volume_mesh_centers"] = preprocessed_data[
-                "volume_mesh_centers"
-            ][valid_volume_idx]
-
-        # Run the model forward:
-        with torch.no_grad():
-            preprocessed_data = {
-                k: v.unsqueeze(0) for k, v in preprocessed_data.items()
-            }
-            _, output_surf = model(preprocessed_data)
-
-        # Unnormalize the outputs:
-        _, stl_center_results = datapipe.unscale_model_outputs(None, output_surf)
-
-    else:
-        stl_center_results = None
-
-    # Stack up the results into one big tensor for surface and volume:
-    if all([s is not None for s in surface_results]):
-        surface_results = torch.cat(surface_results, dim=1)
-    if all([v is not None for v in volume_results]):
-        volume_results = torch.cat(volume_results, dim=0)
-
-    return stl_center_results, surface_results, volume_results
-
-
-def inference_epoch(
-    dataset: DrivaerMLDataset,
-    sampler: DistributedSampler,
-    datapipe: DoMINODataPipe,
-    model: DoMINO,
-    gpu_handle: int,
-    logger: PythonLogger,
-    batch_size: int = 24_000,
-    total_points: int = 1_024_000,
-):
-    ######################################################
-    # Inference can run in a distributed way by coordinating
-    # the indices for each rank, which the sampler does
-    ######################################################
-
-    # Convert the indices right to a list:
-    epoch_indices = list(sampler)
-
-    ######################################################
-    # Assuming here there are more than two target meshes
-    # This will get the IO pipe running in the background
-    # While we process a dataset.
-    ######################################################
-    dataset.preload(epoch_indices[0])
-    dataset.preload(epoch_indices[1])
-
-    for i_batch, epoch_index in enumerate(epoch_indices):
-        batch_start_time = time.perf_counter()
-        ######################################################
-        # Put another example in the preload queue while this
-        # batch is processed
-        ######################################################
-        data_loading_start = time.perf_counter()
-        if i_batch + 2 < len(epoch_indices):
-            # Preload next next
-            dataset.preload(epoch_indices[i_batch + 2])
-
-        ######################################################
-        # Get the data for this index:
-        ######################################################
-        sample_batched = dataset[epoch_index]
-        dataloading_time = time.perf_counter() - data_loading_start
-
-        logger.info(
-            f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds"
-        )
-
-        procesing_time_start = time.perf_counter()
-        stl_center_resulst, surface_results, volume_results = inference_on_single_stl(
-            sample_batched["stl_coordinates"],
-            sample_batched["stl_faces"],
-            model,
-            datapipe,
-            batch_size,
-            total_points,
-            gpu_handle,
-            logger,
-        )
-
-        ######################################################
-        # Peel off pressure, velocity, nut, shear, etc.
-        # Also compute drag, lift forces.
-        ######################################################
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-
-        procesing_time_end = time.perf_counter()
-        logger.info(
-            f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds"
-        )
-
-        output_start_time = time.perf_counter()
-        ######################################################
-        # Save the outputs to file:
-        ######################################################
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        # TODO
-        output_end_time = time.perf_counter()
-        logger.info(
-            f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds"
-        )
-
-
-@hydra.main(version_base="1.3", config_path="conf", config_name="config")
-def main(cfg: DictConfig) -> None:
-    ######################################################
-    # initialize distributed manager
-    ######################################################
-    DistributedManager.initialize()
-    dist = DistributedManager()
-
-    ######################################################
-    # Initialize NVML
-    ######################################################
-    nvmlInit()
-    gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index)
-
-    ######################################################
-    # Initialize logger
-    ######################################################
-
-    logger = PythonLogger("Train")
-    logger = RankZeroLoggingWrapper(logger, dist)
-
-    logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
-
-    ######################################################
-    # Get scaling factors
-    # Likely, you want to reuse the scaling factors from training.
-    ######################################################
-    pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
-
-    try:
-        scaling_factors = ScalingFactors.load(pickle_path)
-        logger.info(f"Scaling factors loaded from: {pickle_path}")
-    except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
-        )
-
-    vol_factors = scaling_factors.mean["volume_fields"]
-    surf_factors = scaling_factors.mean["surface_fields"]
-
-    ######################################################
-    # Configure the model
-    ######################################################
-    model_type = cfg.model.model_type
-    num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type)
-
-    if model_type == "combined" or model_type == "surface":
-        surface_variable_names = list(cfg.variables.surface.solution.keys())
-    else:
-        surface_variable_names = []
-
-    if model_type == "combined" or model_type == "volume":
-        volume_variable_names = list(cfg.variables.volume.solution.keys())
-    else:
-        volume_variable_names = []
-
-    ######################################################
-    # Check that the sample size is equal.
-    # unequal samples could be done but they aren't, here.s
-    ######################################################
-    if cfg.model.model_type == "combined":
-        if cfg.model.volume_points_sample != cfg.model.surface_points_sample:
-            raise ValueError(
-                "Volume and surface points sample must be equal for combined model"
-            )
-
-    # Get the number of sample points:
-    sample_points = (
-        cfg.model.surface_points_sample
-        if cfg.model.model_type == "surface"
-        else cfg.model.volume_points_sample
-    )
-
-    ######################################################
-    # If the batch size doesn't evenly divide
-    # the num points, that's ok.  But print a warning
-    # that the total points will get tweaked.
-    ######################################################
-    if cfg.eval.num_points % sample_points != 0:
-        logger.warning(
-            f"Batch size {sample_points} doesn't evenly divide num points {cfg.eval.num_points}."
-        )
-        logger.warning(
-            f"Total points will be rounded up to {((cfg.eval.num_points // sample_points) + 1) * sample_points}."
-        )
-
-    ######################################################
-    # Configure the dataset
-    # We are applying preprocessing in a separate step
-    # for this - so the dataset and datapipe are separate
-    ######################################################
-
-    # Override the model type
-    # For the inference pipeline, we adjust the tooling a little for the data.
-    # We use only a bare STL dataset that will read the mesh coordinates
-    # and triangle definitions.  We'll compute the centers and normals
-    # on the GPU (instead of on the CPU, as pyvista would do) and
-    # then we can sample from that mesh on the GPU.
-    test_dataset = DrivaerMLDataset(
-        data_dir=cfg.eval.test_path,
-        keys_to_read=[
-            "stl_coordinates",
-            "stl_faces",
-        ],
-        output_device=dist.device,
-    )
-
-    # Volumetric data will be generated on the fly on the GPU.
-
-    ######################################################
-    # Configure the datapipe
-    # We _won't_ iterate over the datapipe, however, we can use the
-    # datapipe processing tools on the sampled surface and
-    # volume points with the same preprocessing.
-    # It also is used to un-normalize the model outputs.
-    ######################################################
-    overrides = {}
-    if hasattr(cfg.data, "gpu_preprocessing"):
-        overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing
-
-    if hasattr(cfg.data, "gpu_output"):
-        overrides["gpu_output"] = cfg.data.gpu_output
-
-    test_datapipe = DoMINODataPipe(
-        None,
-        phase="test",
-        grid_resolution=cfg.model.interp_res,
-        volume_variables=volume_variable_names,
-        surface_variables=surface_variable_names,
-        normalize_coordinates=True,
-        sampling=False,
-        sample_in_bbox=True,
-        volume_points_sample=None,
-        surface_points_sample=None,
-        geom_points_sample=None,
-        positional_encoding=cfg.model.positional_encoding,
-        volume_factors=vol_factors,
-        surface_factors=surf_factors,
-        scaling_type=cfg.model.normalization,
-        model_type=model_type,
-        bounding_box_dims=cfg.data.bounding_box,
-        bounding_box_dims_surf=cfg.data.bounding_box_surface,
-        num_surface_neighbors=cfg.model.num_neighbors_surface,
-        resample_surfaces=cfg.model.resampling_surface_mesh.resample,
-        resampling_points=cfg.model.resampling_surface_mesh.points,
-        surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
-        **overrides,
-    )
-
-    ######################################################
-    # The sampler is used in multi-gpu inference to
-    # coordinate the batches used for each rank.
-    ######################################################
-    test_sampler = DistributedSampler(
-        test_dataset,
-        num_replicas=dist.world_size,
-        rank=dist.rank,
-        **cfg.train.sampler,
-    )
-
-    ######################################################
-    # Configure the model
-    # and move it to the device.
-    ######################################################
-    model = DoMINO(
-        input_features=3,
-        output_features_vol=num_vol_vars,
-        output_features_surf=num_surf_vars,
-        global_features=num_global_features,
-        model_parameters=cfg.model,
-    ).to(dist.device)
-    # model = torch.compile(model, fullgraph=True, dynamic=True)  # TODO make this configurable
-
-    # Print model summary (structure and parmeter count).
-    logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n")
-
-    if dist.world_size > 1:
-        torch.distributed.barrier()
-
-    load_checkpoint(
-        to_absolute_path(cfg.resume_dir),
-        models=model,
-        device=dist.device,
-    )
-
-    start_time = time.perf_counter()
-
-    # This controls what indices to use for each epoch.
-    test_sampler.set_epoch(0)
-
-    prof = Profiler()
-
-    model.eval()
-    epoch_start_time = time.perf_counter()
-    with prof:
-        inference_epoch(
-            dataset=test_dataset,
-            sampler=test_sampler,
-            datapipe=test_datapipe,
-            model=model,
-            logger=logger,
-            gpu_handle=gpu_handle,
-            batch_size=sample_points,
-            total_points=cfg.eval.num_points,
-        )
-    epoch_end_time = time.perf_counter()
-    logger.info(
-        f"Device {dist.device}, Epoch took {epoch_end_time - epoch_start_time:.3f} seconds"
-    )
-
-
-if __name__ == "__main__":
-    # Profiler().enable("torch")
-    # Profiler().initialize()
-    main()
-    # Profiler().finalize()

From f172ce6f61beabc9f4baa4f9571f73277ef06961 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 23 Sep 2025 20:11:56 +0000
Subject: [PATCH 46/98] Update train, inference, and config files.

---
 .../domino/src/benchmark_dataloader.py        |   2 +-
 .../domino/src/conf/config.yaml               |  13 +-
 .../domino/src/inference_on_stl.py            | 224 ++++++++++--------
 .../external_aerodynamics/domino/src/train.py |  60 ++---
 4 files changed, 160 insertions(+), 139 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index f24e0ffe16..62a41d3383 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -173,7 +173,7 @@ def main(cfg: DictConfig) -> None:
         placements=placements,
     )
     train_sampler = DistributedSampler(
-        train_dataset, num_replicas=dist.world_size, rank=dist.rank
+        train_dataset, num_replicas=data_mesh.size(), rank=data_mesh.get_local_rank()
     )
 
     # train_dataloader = DataLoader(
diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index 84256a0d97..73a720f343 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -81,6 +81,9 @@ data: # Input directory for training and validation data
     max: [4.5, 1.2, 1.3]
   gpu_preprocessing: true
   gpu_output: true
+  normalize_coordinates: true
+  sample_in_bbox: true
+  sampling: true
 
 # ┌───────────────────────────────────────────┐
 # │          Domain Parallelism Settings      │
@@ -177,7 +180,8 @@ train: # Training configurable parameters
   checkpoint_interval: 1
   dataloader:
     batch_size: 1
-    pin_memory: false # if the preprocessing is outputing GPU data, set this to false
+    preload_depth: 2
+    pin_memory: True # if the preprocessing is outputing GPU data, set this to false
   sampler:
     shuffle: true
     drop_last: false
@@ -191,7 +195,8 @@ train: # Training configurable parameters
 val: # Validation configurable parameters
   dataloader:
     batch_size: 1
-    pin_memory: false # if the preprocessing is outputing GPU data, set this to false
+    preload_depth: 1
+    pin_memory: true # if the preprocessing is outputing GPU data, set this to false
   sampler:
     shuffle: true
     drop_last: false
@@ -205,4 +210,6 @@ eval: # Testing configurable parameters
   checkpoint_name: DoMINO.0.455.pt # Name of checkpoint to select from saved checkpoints
   scaling_param_path: /user/scaling_params
   refine_stl: False # Automatically refine STL during inference
-  stencil_size: 7 # Stencil size for evaluating surface and volume model
+  #TODO -  This was hardcoded anyways, remove it.
+  # stencil_size: 7 # Stencil size for evaluating surface and volume model
+  num_points: 1_240_000 # Number of points to sample on surface and volume per batch
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index 3c6acc3ccd..edeefba937 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -75,7 +75,7 @@
 from physicsnemo.models.domino.model import DoMINO
 from physicsnemo.utils.domino.utils import sample_points_on_mesh
 
-from utils import ScalingFactors
+from utils import ScalingFactors, get_keys_to_read, coordinate_distributed_environment
 
 # This is included for GPU memory tracking:
 from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
@@ -92,10 +92,47 @@
 from loss import compute_loss_dict
 from utils import get_num_vars
 
+def reject_interior_volume_points(preprocessed_data: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+    """
+    Reject volume points that are inside the STL mesh.
+    """
+    ######################################################
+    # Use the sign of the volume SDF to filter out points
+    # That are inside the STL mesh
+    ######################################################
+    sdf_nodes = preprocessed_data["sdf_nodes"]
+    # The sfd_nodes tensor typically has shape (n_vol_points, 1)
+    valid_volume_idx = sdf_nodes > 0
+    # So remove it if it's there:
+    valid_volume_idx = valid_volume_idx.squeeze(-1)
+    # Apply this selection to all the volume points:
+    for key in ["volume_mesh_centers", "sdf_nodes", "pos_volume_closest", "pos_volume_center_of_mass"]:
+        preprocessed_data[key] = preprocessed_data[key][valid_volume_idx]
+        
+    return preprocessed_data
+
+def sample_volume_points(c_min: torch.Tensor, c_max: torch.Tensor, n_points: int, device: torch.device, eps: float = 1e-7) -> torch.Tensor:
+    """
+    Generate a set of random points interior to the specified bounding box.
+    
+    Args:
+        c_min: The minimum coordinate of the bounding box.
+        c_max: The maximum coordinate of the bounding box.
+        n_points: The number of points to sample.
+        device: The device to sample the points on.
+        eps: The small edge factor to shift away from the lower bound.
+    """
+    # We use a small edge factor to shift away from the lower bound,
+    # which can, in some cases, be exactly on the border.
+    uniform_points = torch.rand(n_points, 3, device=device, dtype=torch.float32)*(1-2*eps) + eps
+    sampled_volume_points = (c_max - c_min) * uniform_points + c_min
+    return sampled_volume_points
 
 def inference_on_single_stl(
     stl_coordinates: torch.Tensor,
     stl_faces: torch.Tensor,
+    global_params_values: torch.Tensor,
+    global_params_reference: torch.Tensor,
     model: DoMINO,
     datapipe: DoMINODataPipe,
     batch_size: int,
@@ -115,6 +152,8 @@ def inference_on_single_stl(
     Args:
         stl_coordinates: The coordinates of the STL mesh.
         stl_faces: The faces of the STL mesh.
+        global_params_values: The values of the global parameters.
+        global_params_reference: The reference values of the global parameters.
         model: The model to use for inference.
         datapipe: The datapipe to use for preprocessing.
         batch_size: The batch size to use for inference.
@@ -169,6 +208,8 @@ def inference_on_single_stl(
             "stl_faces": stl_faces,
             "stl_centers": stl_centers,
             "stl_areas": stl_areas,
+            "global_params_values": global_params_values,
+            "global_params_reference": global_params_reference,
         }
 
         # If the surface data is part of the model, sample the surface:
@@ -196,17 +237,13 @@ def inference_on_single_stl(
         if datapipe.model_type == "volume" or datapipe.model_type == "combined":
             ######################################################
             # Build up volume points too with uniform sampling
-            # TODO - this doesn't filter points that are
-            # internal to the mesh
             ######################################################
             c_min = datapipe.config.bounding_box_dims[1]
             c_max = datapipe.config.bounding_box_dims[0]
+            inference_dict["volume_mesh_centers"] = sample_volume_points(
+                c_min, c_max, batch_size, device,
+            )
 
-            sampled_volume_points = (c_max - c_min) * torch.rand(
-                batch_size, 3, device=device, dtype=torch.float32
-            ) + c_min
-
-            inference_dict["volume_mesh_centers"] = (sampled_volume_points,)
 
         ######################################################
         # Pre-process the data with the datapipe:
@@ -214,15 +251,7 @@ def inference_on_single_stl(
         preprocessed_data = datapipe.process_data(inference_dict)
 
         if datapipe.model_type == "volume" or datapipe.model_type == "combined":
-            ######################################################
-            # Use the sign of the volume SDF to filter out points
-            # That are inside the STL mesh
-            ######################################################
-            sdf_nodes = preprocessed_data["sdf_nodes"]
-            valid_volume_idx = sdf_nodes > 0
-            preprocessed_data["volume_mesh_centers"] = preprocessed_data[
-                "volume_mesh_centers"
-            ][valid_volume_idx]
+            preprocessed_data = reject_interior_volume_points(preprocessed_data)
 
         ######################################################
         # Add a batch dimension to the data_dict
@@ -276,32 +305,34 @@ def inference_on_single_stl(
     # of the above logic.
     ######################################################
     if datapipe.model_type == "surface" or datapipe.model_type == "combined":
-        stl_inference_dict = {
+
+        inference_dict = {
             "stl_coordinates": stl_coordinates,
             "stl_faces": stl_faces,
             "stl_centers": stl_centers,
             "stl_areas": stl_areas,
+            "global_params_values": global_params_values,
+            "global_params_reference": global_params_reference,
         }
         inference_dict["surface_mesh_centers"] = stl_centers
         inference_dict["surface_normals"] = stl_mesh_normals
         inference_dict["surface_areas"] = stl_areas
         inference_dict["surface_faces"] = stl_faces
 
-        # Just reuse the previous volume samples here if needed:
         if datapipe.model_type == "combined":
-            inference_dict["volume_mesh_centers"] = sampled_volume_points
+            c_min = datapipe.config.bounding_box_dims[1]
+            c_max = datapipe.config.bounding_box_dims[0]
+            inference_dict["volume_mesh_centers"] = sample_volume_points(
+                c_min, c_max, stl_centers.shape[0], device,
+            )
 
         # Preprocess:
         preprocessed_data = datapipe.process_data(inference_dict)
 
         # Pull out the invalid volume points again, if needed:
-        if datapipe.model_type == "combined":
-            sdf_nodes = preprocessed_data["sdf_nodes"]
-            valid_volume_idx = sdf_nodes > 0
-            preprocessed_data["volume_mesh_centers"] = preprocessed_data[
-                "volume_mesh_centers"
-            ][valid_volume_idx]
-
+        if datapipe.model_type == "combined" or datapipe.model_type == "volume":
+            preprocessed_data = reject_interior_volume_points(preprocessed_data)
+            
         # Run the model forward:
         with torch.no_grad():
             preprocessed_data = {
@@ -316,18 +347,21 @@ def inference_on_single_stl(
         stl_center_results = None
 
     # Stack up the results into one big tensor for surface and volume:
-    if all([s is not None for s in surface_results]):
+    if len(surface_results) > 0 and all([s is not None for s in surface_results]):
         surface_results = torch.cat(surface_results, dim=1)
-    if all([v is not None for v in volume_results]):
-        volume_results = torch.cat(volume_results, dim=0)
+    else:
+        surface_results = None
+    if len(volume_results) > 0 and all([v is not None for v in volume_results]):
+        volume_results = torch.cat(volume_results, dim=1)
+    else:
+        volume_results = None
 
     return stl_center_results, surface_results, volume_results
 
 
 def inference_epoch(
-    dataset: DrivaerMLDataset,
+    dataloader: DrivaerMLDataset,
     sampler: DistributedSampler,
-    datapipe: DoMINODataPipe,
     model: DoMINO,
     gpu_handle: int,
     logger: PythonLogger,
@@ -339,44 +373,29 @@ def inference_epoch(
     # the indices for each rank, which the sampler does
     ######################################################
 
-    # Convert the indices right to a list:
-    epoch_indices = list(sampler)
-
-    ######################################################
-    # Assuming here there are more than two target meshes
-    # This will get the IO pipe running in the background
-    # While we process a dataset.
-    ######################################################
-    dataset.preload(epoch_indices[0])
-    dataset.preload(epoch_indices[1])
-
-    for i_batch, epoch_index in enumerate(epoch_indices):
-        batch_start_time = time.perf_counter()
-        ######################################################
-        # Put another example in the preload queue while this
-        # batch is processed
-        ######################################################
-        data_loading_start = time.perf_counter()
-        if i_batch + 2 < len(epoch_indices):
-            # Preload next next
-            dataset.preload(epoch_indices[i_batch + 2])
+    batch_start_time = time.perf_counter()
+    
+    # N.B. - iterating over the dataset directly here.
+    # That's because we need to sample on the STL and volume and
+    # that means we'll preprocess after that.
+    for i_batch, sample_batched in enumerate(dataloader.dataset):
+        
+        
+        dataloading_time = time.perf_counter() - batch_start_time
 
-        ######################################################
-        # Get the data for this index:
-        ######################################################
-        sample_batched = dataset[epoch_index]
-        dataloading_time = time.perf_counter() - data_loading_start
 
         logger.info(
             f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds"
         )
 
         procesing_time_start = time.perf_counter()
-        stl_center_resulst, surface_results, volume_results = inference_on_single_stl(
+        stl_center_results, surface_results, volume_results = inference_on_single_stl(
             sample_batched["stl_coordinates"],
             sample_batched["stl_faces"],
+            sample_batched["global_params_values"],
+            sample_batched["global_params_reference"],
             model,
-            datapipe,
+            dataloader,
             batch_size,
             total_points,
             gpu_handle,
@@ -399,6 +418,10 @@ def inference_epoch(
         logger.info(
             f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds"
         )
+        logger.info(
+            f"Batch {i_batch} stl points: {stl_center_results.shape[1]}"
+        )
+
 
         output_start_time = time.perf_counter()
         ######################################################
@@ -414,6 +437,8 @@ def inference_epoch(
         logger.info(
             f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds"
         )
+        
+        batch_start_time = time.perf_counter()
 
 
 @hydra.main(version_base="1.3", config_path="conf", config_name="config")
@@ -423,6 +448,10 @@ def main(cfg: DictConfig) -> None:
     ######################################################
     DistributedManager.initialize()
     dist = DistributedManager()
+    
+    # DoMINO supports domain parallel training and inference.  This function helps coordinate
+    # how to set that up, if needed.
+    domain_mesh, data_mesh, placements = coordinate_distributed_environment(cfg)
 
     ######################################################
     # Initialize NVML
@@ -507,21 +536,28 @@ def main(cfg: DictConfig) -> None:
     # We are applying preprocessing in a separate step
     # for this - so the dataset and datapipe are separate
     ######################################################
-
+    
+    # This helper function is to determine which keys to read from the data
+    # (and which to use default values for, if they aren't present - like
+    # air_density, for example)
+    keys_to_read, keys_to_read_if_available = get_keys_to_read(
+        cfg, model_type, get_ground_truth=True
+    )
     # Override the model type
     # For the inference pipeline, we adjust the tooling a little for the data.
     # We use only a bare STL dataset that will read the mesh coordinates
     # and triangle definitions.  We'll compute the centers and normals
     # on the GPU (instead of on the CPU, as pyvista would do) and
     # then we can sample from that mesh on the GPU.
-    test_dataset = DrivaerMLDataset(
-        data_dir=cfg.eval.test_path,
-        keys_to_read=[
-            "stl_coordinates",
-            "stl_faces",
-        ],
-        output_device=dist.device,
-    )
+    # test_dataset = DrivaerMLDataset(
+    #     data_dir=cfg.eval.test_path,
+    #     keys_to_read=[
+    #         "stl_coordinates",
+    #         "stl_faces",
+    #     ],
+    #     keys_to_read_if_available=keys_to_read_if_available,
+    #     output_device=dist.device,
+    # )
 
     # Volumetric data will be generated on the fly on the GPU.
 
@@ -538,44 +574,35 @@ def main(cfg: DictConfig) -> None:
 
     if hasattr(cfg.data, "gpu_output"):
         overrides["gpu_output"] = cfg.data.gpu_output
-
-    test_datapipe = DoMINODataPipe(
-        None,
+        
+    test_dataloader = create_domino_dataset(
+        cfg,
         phase="test",
-        grid_resolution=cfg.model.interp_res,
-        volume_variables=volume_variable_names,
-        surface_variables=surface_variable_names,
-        normalize_coordinates=True,
-        sampling=False,
-        sample_in_bbox=True,
-        volume_points_sample=None,
-        surface_points_sample=None,
-        geom_points_sample=None,
-        positional_encoding=cfg.model.positional_encoding,
-        volume_factors=vol_factors,
-        surface_factors=surf_factors,
-        scaling_type=cfg.model.normalization,
-        model_type=model_type,
-        bounding_box_dims=cfg.data.bounding_box,
-        bounding_box_dims_surf=cfg.data.bounding_box_surface,
-        num_surface_neighbors=cfg.model.num_neighbors_surface,
-        resample_surfaces=cfg.model.resampling_surface_mesh.resample,
-        resampling_points=cfg.model.resampling_surface_mesh.points,
-        surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
-        **overrides,
+        keys_to_read=["stl_coordinates", "stl_faces"],
+        keys_to_read_if_available=keys_to_read_if_available,
+        vol_factors=vol_factors,
+        surf_factors=surf_factors,
+        normalize_coordinates = cfg.data.normalize_coordinates,
+        sample_in_bbox = cfg.data.sample_in_bbox,
+        sampling = cfg.data.sampling,
+        device_mesh=domain_mesh,
+        placements=placements,
     )
-
+    
     ######################################################
     # The sampler is used in multi-gpu inference to
     # coordinate the batches used for each rank.
     ######################################################
     test_sampler = DistributedSampler(
-        test_dataset,
-        num_replicas=dist.world_size,
-        rank=dist.rank,
+        test_dataloader,
+        num_replicas=data_mesh.size(),
+        rank=data_mesh.get_local_rank(),
         **cfg.train.sampler,
     )
 
+
+
+
     ######################################################
     # Configure the model
     # and move it to the device.
@@ -612,9 +639,8 @@ def main(cfg: DictConfig) -> None:
     epoch_start_time = time.perf_counter()
     with prof:
         inference_epoch(
-            dataset=test_dataset,
+            dataloader=test_dataloader,
             sampler=test_sampler,
-            datapipe=test_datapipe,
             model=model,
             logger=logger,
             gpu_handle=gpu_handle,
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 0176084082..5c0ced9a22 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -39,7 +39,19 @@
 from omegaconf import DictConfig, OmegaConf
 
 
-DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", False)
+def srt2bool(val: str):
+    if isinstance(val, bool):
+        return val
+    if val.lower() in ["true", "1", "yes", "y"]:
+        return True
+    elif val.lower() in ["false", "0", "no", "n"]:
+        return False
+    else:
+        raise ValueError(f"Invalid boolean value: {val}")
+
+
+DISABLE_RMM = srt2bool(os.environ.get("DOMINO_DISABLE_RMM", False))
+
 if not DISABLE_RMM:
     import rmm
     from rmm.allocators.torch import rmm_torch_allocator
@@ -170,9 +182,9 @@ def train_epoch(
     gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
     start_time = time.perf_counter()
     with Profiler():
-        for i_batch, sample_batched in enumerate(dataloader):
-            sampled_batched = dict_to_device(sample_batched, device)
-
+        for i_batch, sampled_batched in enumerate(dataloader):
+            if i_batch == 7:
+                break
             if add_physics_loss:
                 autocast_enabled = False
             else:
@@ -373,6 +385,9 @@ def main(cfg: DictConfig) -> None:
         surf_factors=surf_factors,
         device_mesh=domain_mesh,
         placements=placements,
+        normalize_coordinates=cfg.data.normalize_coordinates,
+        sample_in_bbox=cfg.data.sample_in_bbox,
+        sampling=cfg.data.sampling,
     )
     train_sampler = DistributedSampler(
         train_dataloader,
@@ -390,6 +405,9 @@ def main(cfg: DictConfig) -> None:
         surf_factors=surf_factors,
         device_mesh=domain_mesh,
         placements=placements,
+        normalize_coordinates=cfg.data.normalize_coordinates,
+        sample_in_bbox=cfg.data.sample_in_bbox,
+        sampling=cfg.data.sampling,
     )
     val_sampler = DistributedSampler(
         val_dataloader,
@@ -398,37 +416,6 @@ def main(cfg: DictConfig) -> None:
         **cfg.val.sampler,
     )
 
-    # train_dataloader = create_domino_dataset(
-    #     cfg,
-    #     phase="train",
-    #     volume_variable_names=volume_variable_names,
-    #     surface_variable_names=surface_variable_names,
-    #     vol_factors=vol_factors,
-    #     surf_factors=surf_factors,
-    # )
-    # val_dataloader = create_domino_dataset(
-    #     cfg,
-    #     phase="val",
-    #     volume_variable_names=volume_variable_names,
-    #     surface_variable_names=surface_variable_names,
-    #     vol_factors=vol_factors,
-    #     surf_factors=surf_factors,
-    # )
-
-    # train_sampler = DistributedSampler(
-    #     train_dataloader,
-    #     num_replicas=dist.world_size,
-    #     rank=dist.rank,
-    #     **cfg.train.sampler,
-    # )
-
-    # val_sampler = DistributedSampler(
-    #     val_dataloader,
-    #     num_replicas=dist.world_size,
-    #     rank=dist.rank,
-    #     **cfg.val.sampler,
-    # )
-
     ######################################################
     # Configure the model
     ######################################################
@@ -439,7 +426,6 @@ def main(cfg: DictConfig) -> None:
         global_features=num_global_features,
         model_parameters=cfg.model,
     ).to(dist.device)
-    # model = torch.compile(model, fullgraph=True, dynamic=True)  # TODO make this configurable
 
     # Print model summary (structure and parmeter count).
     logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n")
@@ -570,6 +556,8 @@ def main(cfg: DictConfig) -> None:
         )
         epoch_end_time = time.perf_counter()
 
+        return
+
         model.eval()
         avg_vloss = validation_step(
             dataloader=val_dataloader,

From cdbe0ce9bfbf66074349ba1db3e2ce21d5a46769 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 24 Sep 2025 15:23:57 +0000
Subject: [PATCH 47/98] Update scaling factor configuration and location
 setting

---
 .../domino/src/compute_statistics.py          |  2 +-
 .../domino/src/conf/config.yaml               |  3 +-
 .../domino/src/inference_on_stl.py            | 78 +++++++++++--------
 .../external_aerodynamics/domino/src/train.py |  2 +-
 .../datapipes/cae/drivaer_ml_dataset.py       |  2 +
 5 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
index 5c9ef21f04..ac917d5353 100644
--- a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
+++ b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
@@ -67,7 +67,7 @@ def main(cfg: DictConfig) -> None:
     ################################
     # Create output directory
     ################################
-    output_dir = os.path.join(cfg.output, "scaling_factors")
+    output_dir = os.path.dirname(cfg.data.scaling_factors)
     os.makedirs(output_dir, exist_ok=True)
 
     if dist.world_size > 1:
diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index 73a720f343..2686634cf9 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -68,7 +68,7 @@ variables:
       reference: 1.226
 
 # ┌───────────────────────────────────────────┐
-# │          Training Data Configs            │
+# │         Data Configs                      │
 # └───────────────────────────────────────────┘  
 data: # Input directory for training and validation data
   input_dir: /user/data/aws_data_all/
@@ -84,6 +84,7 @@ data: # Input directory for training and validation data
   normalize_coordinates: true
   sample_in_bbox: true
   sampling: true
+  scaling_factors: outputs/AWS_Dataset/1/scaling_factors/scaling_factors.pkl
 
 # ┌───────────────────────────────────────────┐
 # │          Domain Parallelism Settings      │
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index edeefba937..9228db8584 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -92,7 +92,10 @@
 from loss import compute_loss_dict
 from utils import get_num_vars
 
-def reject_interior_volume_points(preprocessed_data: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+
+def reject_interior_volume_points(
+    preprocessed_data: dict[str, torch.Tensor],
+) -> dict[str, torch.Tensor]:
     """
     Reject volume points that are inside the STL mesh.
     """
@@ -106,15 +109,27 @@ def reject_interior_volume_points(preprocessed_data: dict[str, torch.Tensor]) ->
     # So remove it if it's there:
     valid_volume_idx = valid_volume_idx.squeeze(-1)
     # Apply this selection to all the volume points:
-    for key in ["volume_mesh_centers", "sdf_nodes", "pos_volume_closest", "pos_volume_center_of_mass"]:
+    for key in [
+        "volume_mesh_centers",
+        "sdf_nodes",
+        "pos_volume_closest",
+        "pos_volume_center_of_mass",
+    ]:
         preprocessed_data[key] = preprocessed_data[key][valid_volume_idx]
-        
+
     return preprocessed_data
 
-def sample_volume_points(c_min: torch.Tensor, c_max: torch.Tensor, n_points: int, device: torch.device, eps: float = 1e-7) -> torch.Tensor:
+
+def sample_volume_points(
+    c_min: torch.Tensor,
+    c_max: torch.Tensor,
+    n_points: int,
+    device: torch.device,
+    eps: float = 1e-7,
+) -> torch.Tensor:
     """
     Generate a set of random points interior to the specified bounding box.
-    
+
     Args:
         c_min: The minimum coordinate of the bounding box.
         c_max: The maximum coordinate of the bounding box.
@@ -124,10 +139,14 @@ def sample_volume_points(c_min: torch.Tensor, c_max: torch.Tensor, n_points: int
     """
     # We use a small edge factor to shift away from the lower bound,
     # which can, in some cases, be exactly on the border.
-    uniform_points = torch.rand(n_points, 3, device=device, dtype=torch.float32)*(1-2*eps) + eps
+    uniform_points = (
+        torch.rand(n_points, 3, device=device, dtype=torch.float32) * (1 - 2 * eps)
+        + eps
+    )
     sampled_volume_points = (c_max - c_min) * uniform_points + c_min
     return sampled_volume_points
 
+
 def inference_on_single_stl(
     stl_coordinates: torch.Tensor,
     stl_faces: torch.Tensor,
@@ -241,10 +260,12 @@ def inference_on_single_stl(
             c_min = datapipe.config.bounding_box_dims[1]
             c_max = datapipe.config.bounding_box_dims[0]
             inference_dict["volume_mesh_centers"] = sample_volume_points(
-                c_min, c_max, batch_size, device,
+                c_min,
+                c_max,
+                batch_size,
+                device,
             )
 
-
         ######################################################
         # Pre-process the data with the datapipe:
         ######################################################
@@ -305,7 +326,6 @@ def inference_on_single_stl(
     # of the above logic.
     ######################################################
     if datapipe.model_type == "surface" or datapipe.model_type == "combined":
-
         inference_dict = {
             "stl_coordinates": stl_coordinates,
             "stl_faces": stl_faces,
@@ -323,7 +343,10 @@ def inference_on_single_stl(
             c_min = datapipe.config.bounding_box_dims[1]
             c_max = datapipe.config.bounding_box_dims[0]
             inference_dict["volume_mesh_centers"] = sample_volume_points(
-                c_min, c_max, stl_centers.shape[0], device,
+                c_min,
+                c_max,
+                stl_centers.shape[0],
+                device,
             )
 
         # Preprocess:
@@ -332,7 +355,7 @@ def inference_on_single_stl(
         # Pull out the invalid volume points again, if needed:
         if datapipe.model_type == "combined" or datapipe.model_type == "volume":
             preprocessed_data = reject_interior_volume_points(preprocessed_data)
-            
+
         # Run the model forward:
         with torch.no_grad():
             preprocessed_data = {
@@ -374,16 +397,13 @@ def inference_epoch(
     ######################################################
 
     batch_start_time = time.perf_counter()
-    
+
     # N.B. - iterating over the dataset directly here.
     # That's because we need to sample on the STL and volume and
     # that means we'll preprocess after that.
     for i_batch, sample_batched in enumerate(dataloader.dataset):
-        
-        
         dataloading_time = time.perf_counter() - batch_start_time
 
-
         logger.info(
             f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds"
         )
@@ -418,10 +438,7 @@ def inference_epoch(
         logger.info(
             f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds"
         )
-        logger.info(
-            f"Batch {i_batch} stl points: {stl_center_results.shape[1]}"
-        )
-
+        logger.info(f"Batch {i_batch} stl points: {stl_center_results.shape[1]}")
 
         output_start_time = time.perf_counter()
         ######################################################
@@ -437,7 +454,7 @@ def inference_epoch(
         logger.info(
             f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds"
         )
-        
+
         batch_start_time = time.perf_counter()
 
 
@@ -448,7 +465,7 @@ def main(cfg: DictConfig) -> None:
     ######################################################
     DistributedManager.initialize()
     dist = DistributedManager()
-    
+
     # DoMINO supports domain parallel training and inference.  This function helps coordinate
     # how to set that up, if needed.
     domain_mesh, data_mesh, placements = coordinate_distributed_environment(cfg)
@@ -463,7 +480,7 @@ def main(cfg: DictConfig) -> None:
     # Initialize logger
     ######################################################
 
-    logger = PythonLogger("Train")
+    logger = PythonLogger("Inference")
     logger = RankZeroLoggingWrapper(logger, dist)
 
     logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}")
@@ -472,7 +489,7 @@ def main(cfg: DictConfig) -> None:
     # Get scaling factors
     # Likely, you want to reuse the scaling factors from training.
     ######################################################
-    pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
+    pickle_path = os.path.join(cfg.data.scaling_factors)
 
     try:
         scaling_factors = ScalingFactors.load(pickle_path)
@@ -536,7 +553,7 @@ def main(cfg: DictConfig) -> None:
     # We are applying preprocessing in a separate step
     # for this - so the dataset and datapipe are separate
     ######################################################
-    
+
     # This helper function is to determine which keys to read from the data
     # (and which to use default values for, if they aren't present - like
     # air_density, for example)
@@ -574,7 +591,7 @@ def main(cfg: DictConfig) -> None:
 
     if hasattr(cfg.data, "gpu_output"):
         overrides["gpu_output"] = cfg.data.gpu_output
-        
+
     test_dataloader = create_domino_dataset(
         cfg,
         phase="test",
@@ -582,13 +599,13 @@ def main(cfg: DictConfig) -> None:
         keys_to_read_if_available=keys_to_read_if_available,
         vol_factors=vol_factors,
         surf_factors=surf_factors,
-        normalize_coordinates = cfg.data.normalize_coordinates,
-        sample_in_bbox = cfg.data.sample_in_bbox,
-        sampling = cfg.data.sampling,
+        normalize_coordinates=cfg.data.normalize_coordinates,
+        sample_in_bbox=cfg.data.sample_in_bbox,
+        sampling=cfg.data.sampling,
         device_mesh=domain_mesh,
         placements=placements,
     )
-    
+
     ######################################################
     # The sampler is used in multi-gpu inference to
     # coordinate the batches used for each rank.
@@ -600,9 +617,6 @@ def main(cfg: DictConfig) -> None:
         **cfg.train.sampler,
     )
 
-
-
-
     ######################################################
     # Configure the model
     # and move it to the device.
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 5c0ced9a22..a1029ceeae 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -292,7 +292,7 @@ def main(cfg: DictConfig) -> None:
     ######################################################
     # Get scaling factors - precompute them if this fails!
     ######################################################
-    pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
+    pickle_path = os.path.join(cfg.data.scaling_factors)
 
     try:
         scaling_factors = ScalingFactors.load(pickle_path)
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index 0acec3b7a5..13009fc968 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -880,6 +880,8 @@ def preload(self, idx: int) -> None:
 
         def _preload_worker():
             data = self._read_file(self._filenames[idx])
+            if "stl_faces" in data:
+                data["stl_faces"] = data["stl_faces"].to(torch.int32)
             # Convert to torch tensors
             return self._move_to_gpu(data, idx)
 

From fc5d32a27284aee05af06f9cd0b9e2a8a8671d79 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 24 Sep 2025 16:03:02 +0000
Subject: [PATCH 48/98] Make sure surface grid and sdf calculation always
 happens.

---
 .../domino/src/inference_on_stl.py            |  6 ++-
 physicsnemo/datapipes/cae/domino_datapipe2.py | 40 ++++++++++---------
 2 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index 9228db8584..f2c3388ada 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -339,7 +339,7 @@ def inference_on_single_stl(
         inference_dict["surface_areas"] = stl_areas
         inference_dict["surface_faces"] = stl_faces
 
-        if datapipe.model_type == "combined":
+        if datapipe.model_type == "combined" or datapipe.model_type == "volume":
             c_min = datapipe.config.bounding_box_dims[1]
             c_max = datapipe.config.bounding_box_dims[0]
             inference_dict["volume_mesh_centers"] = sample_volume_points(
@@ -438,7 +438,9 @@ def inference_epoch(
         logger.info(
             f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds"
         )
-        logger.info(f"Batch {i_batch} stl points: {stl_center_results.shape[1]}")
+        logger.info(
+            f"Batch {i_batch} stl points: {stl_center_results.shape[1] if stl_center_results is not None else 0}"
+        )
 
         output_start_time = time.perf_counter()
         ######################################################
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 53abe95822..0cf516f438 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -526,10 +526,6 @@ def process_surface(
             surf_grid = normalize(surf_grid, s_max, s_min)
             surface_coordinates = normalize(surface_coordinates, s_max, s_min)
             surface_neighbors = normalize(surface_neighbors, s_max, s_min)
-            # This is for the SDF Later:
-            normed_vertices = normalize(stl_vertices, s_max, s_min)
-        else:
-            normed_vertices = stl_vertices
 
         ########################################################################
         # Apply scaling to the targets, if desired:
@@ -539,15 +535,6 @@ def process_surface(
                 surface_fields, self.config.surface_factors
             )
 
-        # Compute signed distance function for the surface grid:
-        sdf_surf_grid, _ = signed_distance_field(
-            mesh_vertices=normed_vertices,
-            mesh_indices=stl_indices,
-            input_points=surf_grid,
-            use_sign_winding_number=True,
-        )
-        return_dict["sdf_surf_grid"] = sdf_surf_grid
-
         return_dict.update(
             {
                 "pos_surface_center_of_mass": pos_normals_com_surface,
@@ -746,10 +733,28 @@ def process_data(self, data_dict):
         )
         return_dict["surf_grid"] = surf_grid
 
+        # We always need to calculate the SDF on the surface grid:
+        # This is for the SDF Later:
+        if self.config.normalize_coordinates:
+            normed_vertices = normalize(data_dict["stl_coordinates"], s_max, s_min)
+        else:
+            normed_vertices = data_dict["stl_coordinates"]
+
+        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
+        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
+
+        # Compute signed distance function for the surface grid:
+        sdf_surf_grid, _ = signed_distance_field(
+            mesh_vertices=normed_vertices,
+            mesh_indices=mesh_indices_flattened,
+            input_points=surf_grid,
+            use_sign_winding_number=True,
+        )
+        return_dict["sdf_surf_grid"] = sdf_surf_grid
+
         # Store this only if normalization is active:
-        if self.model_type == "surface" or self.model_type == "combined":
-            if self.config.normalize_coordinates:
-                return_dict["surface_min_max"] = torch.stack([s_min, s_max])
+        if self.config.normalize_coordinates:
+            return_dict["surface_min_max"] = torch.stack([s_min, s_max])
 
         # This is a center of mass computation for the stl surface,
         # using the size of each mesh point as weight.
@@ -774,9 +779,6 @@ def process_data(self, data_dict):
             if self.config.normalize_coordinates:
                 return_dict["volume_min_max"] = torch.stack([c_min, c_max])
 
-        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
-        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
-
         if self.model_type == "volume" or self.model_type == "combined":
             volume_fields_raw = (
                 data_dict["volume_fields"] if "volume_fields" in data_dict else None

From 3f4f1107b09b80ad84bbf1caa1b1d06b1fd3444c Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 24 Sep 2025 16:40:02 +0000
Subject: [PATCH 49/98] Update timing printouts for training.

---
 .../cfd/external_aerodynamics/domino/src/train.py     | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index a1029ceeae..ea71ee2a71 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -182,9 +182,9 @@ def train_epoch(
     gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
     start_time = time.perf_counter()
     with Profiler():
+        io_start_time = time.perf_counter()
         for i_batch, sampled_batched in enumerate(dataloader):
-            if i_batch == 7:
-                break
+            io_end_time = time.perf_counter()
             if add_physics_loss:
                 autocast_enabled = False
             else:
@@ -224,6 +224,7 @@ def train_epoch(
             # Gather data and report
             running_loss += loss.item()
             elapsed_time = time.perf_counter() - start_time
+            io_time = io_end_time - io_start_time
             start_time = time.perf_counter()
             gpu_end_info = nvmlDeviceGetMemoryInfo(gpu_handle)
             gpu_memory_used = gpu_end_info.used / (1024**3)
@@ -245,11 +246,11 @@ def train_epoch(
             )
 
             logging_string += loss_string
-            logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb\n"
-            logging_string += f"  GPU memory delta: {gpu_memory_delta:.3f} Gb\n"
-            logging_string += f"  Time taken: {elapsed_time:.2f} seconds\n"
+            logging_string += f"  GPU memory used: {gpu_memory_used:.3f} Gb (delta: {gpu_memory_delta:.3f})\n"
+            logging_string += f"  Timings: (IO: {io_time:.2f}, Model: {elapsed_time - io_time:.2f}, Total: {elapsed_time:.2f})s\n"
             logger.info(logging_string)
             gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
+            io_start_time = time.perf_counter()
 
     last_loss = running_loss / (i_batch + 1)  # loss per batch
     if dist.rank == 0:

From 2e3c696e0533c7529777a43228e7b8dcdec1b92d Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 26 Sep 2025 11:52:51 -0700
Subject: [PATCH 50/98] Fix bug in output encoding when the number of upstream
 radii is different.

---
 physicsnemo/models/domino/encodings.py                 | 10 +++++-----
 physicsnemo/models/domino/model.py                     |  2 ++
 .../utils/neighbors/radius_search/_torch_impl.py       |  2 --
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py
index 068e4b3a1f..ce55ed46d3 100644
--- a/physicsnemo/models/domino/encodings.py
+++ b/physicsnemo/models/domino/encodings.py
@@ -138,7 +138,6 @@ def __init__(
             radius=radius,
             neighbors_in_radius=neighbors_in_radius,
         )
-
         self.local_point_conv = LocalPointConv(
             input_features=total_neighbors_in_radius,
             base_layer=base_layer,
@@ -201,6 +200,7 @@ def __init__(
         radii: list[float],
         neighbors_in_radius: list[int],
         geo_encoding_type: str,
+        n_upstream_radii: int,
         base_layer: int,
         activation: nn.Module,
         grid_resolution: tuple[int, int, int],
@@ -213,7 +213,7 @@ def __init__(
                     radius=r,
                     neighbors_in_radius=n,
                     total_neighbors_in_radius=self.calculate_total_neighbors_in_radius(
-                        geo_encoding_type, n, radii
+                        geo_encoding_type, n, n_upstream_radii
                     ),
                     base_layer=base_layer,
                     activation=activation,
@@ -224,12 +224,12 @@ def __init__(
         )
 
     def calculate_total_neighbors_in_radius(
-        self, geo_encoding_type: str, neighbors_in_radius: int, radii: list[float]
+        self, geo_encoding_type: str, neighbors_in_radius: int, n_upstream_radii: int
     ) -> list[int]:
         if geo_encoding_type == "both":
-            total_neighbors_in_radius = neighbors_in_radius * (len(radii) + 1)
+            total_neighbors_in_radius = neighbors_in_radius * (n_upstream_radii + 1)
         elif geo_encoding_type == "stl":
-            total_neighbors_in_radius = neighbors_in_radius * (len(radii))
+            total_neighbors_in_radius = neighbors_in_radius * (n_upstream_radii)
         elif geo_encoding_type == "sdf":
             total_neighbors_in_radius = neighbors_in_radius
 
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index bc06289a6b..7a31c2668c 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -391,6 +391,7 @@ def __init__(
             radii=model_parameters.geometry_local.surface_radii,
             neighbors_in_radius=model_parameters.geometry_local.surface_neighbors_in_radius,
             geo_encoding_type=self.geo_encoding_type,
+            n_upstream_radii=len(model_parameters.geometry_rep.geo_conv.surface_radii),
             base_layer=512,
             activation=get_activation(model_parameters.local_point_conv.activation),
             grid_resolution=self.grid_resolution,
@@ -401,6 +402,7 @@ def __init__(
             radii=model_parameters.geometry_local.volume_radii,
             neighbors_in_radius=model_parameters.geometry_local.volume_neighbors_in_radius,
             geo_encoding_type=self.geo_encoding_type,
+            n_upstream_radii=len(model_parameters.geometry_rep.geo_conv.volume_radii),
             base_layer=512,
             activation=get_activation(model_parameters.local_point_conv.activation),
             grid_resolution=self.grid_resolution,
diff --git a/physicsnemo/utils/neighbors/radius_search/_torch_impl.py b/physicsnemo/utils/neighbors/radius_search/_torch_impl.py
index c6df0f9e81..2b4c3394c3 100644
--- a/physicsnemo/utils/neighbors/radius_search/_torch_impl.py
+++ b/physicsnemo/utils/neighbors/radius_search/_torch_impl.py
@@ -56,8 +56,6 @@ def radius_search_impl(
             dists = torch.empty((0,), device=dists.device, dtype=dists.dtype)
 
     else:
-        print(f"dists shape: {dists.shape}")
-
         # Take the max_points lowest distances for each query
         closest_points = torch.topk(
             dists, k=min(max_points, dists.shape[0]), dim=0, largest=False

From e062f49b6af431cf1259e7f9511fe75f6373b0bf Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 22 Sep 2025 14:27:40 +0000
Subject: [PATCH 51/98] Update CHANGELOG

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 84d50c2128..a322d50071 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,11 +25,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Migrated Stokes MGN example to PyTorch Geometric.
 - Migrated Lennard Jones example to PyTorch Geometric.
+<<<<<<< HEAD
 - Migrated physicsnemo.utils.sdf.signed_distance_field to a static return,
   torch-only interface.  It also now works on distributed meshes and input fields.
 - Refactored DiTBlock to be more modular
 - Added NATTEN 2D neighborhood attention backend for DiTBlock
 - Migrated blood flow example to PyTorch Geometric.
+=======
+- Refactored DoMINO model code for performance optimizations and improved readability.
+>>>>>>> ddfd884 (Update CHANGELOG)
 
 ### Deprecated
 

From 6a26c958597cf6230e3f03e82844107853bb8951 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 29 Sep 2025 13:19:37 +0000
Subject: [PATCH 52/98] Update changelog

---
 CHANGELOG.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a322d50071..bb14a90124 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,15 +25,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Migrated Stokes MGN example to PyTorch Geometric.
 - Migrated Lennard Jones example to PyTorch Geometric.
-<<<<<<< HEAD
 - Migrated physicsnemo.utils.sdf.signed_distance_field to a static return,
   torch-only interface.  It also now works on distributed meshes and input fields.
 - Refactored DiTBlock to be more modular
 - Added NATTEN 2D neighborhood attention backend for DiTBlock
 - Migrated blood flow example to PyTorch Geometric.
-=======
 - Refactored DoMINO model code for performance optimizations and improved readability.
->>>>>>> ddfd884 (Update CHANGELOG)
 
 ### Deprecated
 

From 10bdc955477a50d72748fd1c0c1e3d3ec499194a Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Mon, 29 Sep 2025 08:25:33 -0700
Subject: [PATCH 53/98] resolving bug and optimizing GeoConvOut for memory

---
 .../domino/src/conf/config.yaml               | 20 ++--
 .../domino/src/inference_on_stl.py            | 36 +++++---
 .../external_aerodynamics/domino/src/test.py  | 56 +++++------
 physicsnemo/datapipes/cae/domino_datapipe.py  | 92 +++++--------------
 physicsnemo/models/domino/geometry_rep.py     | 35 +++----
 physicsnemo/models/domino/model.py            | 10 +-
 6 files changed, 92 insertions(+), 157 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index 84256a0d97..c6308b0231 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -18,7 +18,7 @@
 # │            Project Details                │
 # └───────────────────────────────────────────┘  
 project: # Project name
-  name: AWS_Dataset
+  name: DrivAerML_Dataset
   
 exp_tag: 1 # Experiment tag
 # Main output directory.
@@ -65,7 +65,7 @@ variables:
       reference: [38.89] # vector [30, 0, 0] should be specified as [30], while [30, 30, 0] should be [30, 30].
     air_density:
       type: scalar
-      reference: 1.226
+      reference: 1.0
 
 # ┌───────────────────────────────────────────┐
 # │          Training Data Configs            │
@@ -77,8 +77,8 @@ data: # Input directory for training and validation data
     min: [-3.5, -2.25, -0.32]
     max: [8.5, 2.25, 3.00]
   bounding_box_surface: # Bounding box dimensions for car surface
-    min: [-1.1, -1.2, -0.32]
-    max: [4.5, 1.2, 1.3]
+    min: [-1.5, -1.4, -0.32]
+    max: [5.0, 1.4, 1.4]
   gpu_preprocessing: true
   gpu_output: true
 
@@ -95,13 +95,12 @@ domain_parallelism:
 # └───────────────────────────────────────────┘  
 model:
   model_type: combined # train which model? surface, volume, combined
-  activation: "relu" # "relu" or "gelu"
+  activation: "gelu" # "relu" or "gelu"
   loss_function: 
     loss_type: "mse" # mse or rmse
     area_weighing_factor: 10000 # Generally inverse of maximum area
   interp_res: [128, 64, 64] # resolution of latent space 128, 64, 48
   use_sdf_in_basis_func: true # SDF in basis function network
-  positional_encoding: false # calculate positional encoding?
   volume_points_sample: 8192 # Number of points to sample in volume per epoch
   surface_points_sample: 8192 # Number of points to sample on surface per epoch
   surface_sampling_algorithm: area_weighted #random or area_weighted
@@ -109,7 +108,7 @@ model:
   num_neighbors_surface: 7 # How many neighbors on surface?
   num_neighbors_volume: 10 # How many neighbors on volume?
   combine_volume_surface: false # combine volume and surface encodings
-  return_volume_neighbors: true # Whether to return volume neighbors or not
+  return_volume_neighbors: false # Whether to return volume neighbors or not
   use_surface_normals: true # Use surface normals and surface areas for surface computation?
   use_surface_area: true # Use only surface normals and not surface area
   integral_loss_scaling_factor: 100 # Scale integral loss by this factor
@@ -119,9 +118,6 @@ model:
   vol_loss_scaling: 1.0 # scale volume loss with this factor in combined mode
   geometry_encoding_type: both # geometry encoder type, sdf, stl, both
   solution_calculation_mode: two-loop # one-loop is better for sharded, two-loop is lower memory but more overhead. Physics losses are not supported via one-loop presently.
-  resampling_surface_mesh: # resampling of surface mesh before constructing kd tree
-    resample: false #false or true
-    points: 1_000_000 # number of points
   geometry_rep: # Hyperparameters for geometry representation network
     geo_conv:
       base_neurons: 32 # 256 or 64
@@ -131,8 +127,8 @@ model:
       surface_radii: [0.01, 0.05, 1.0] # radii for surface
       surface_hops: 1 # Number of surface iterations
       volume_hops: 1 # Number of volume iterations
-      volume_neighbors_in_radius: [10, 10, 10, 10] # Number of neighbors in radius for volume
-      surface_neighbors_in_radius: [10, 10, 10] # Number of neighbors in radius for surface
+      volume_neighbors_in_radius: [32, 64, 128, 256] # Number of neighbors in radius for volume
+      surface_neighbors_in_radius: [8, 16, 128] # Number of neighbors in radius for surface
       fourier_features: false
       num_modes: 5
       activation: ${model.activation}
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index a85cc7df86..e4ec80f2e0 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -372,6 +372,8 @@ def process_surface_mesh(self, bounding_box=None, bounding_box_surface=None):
         surf_sdf_grid = torch.reshape(surf_sdf_grid, (nx, ny, nz))
 
         if self.normalize_coordinates:
+            sdf_grid = 2.0 * (sdf_grid - torch.amax(grid)) / (torch.amax(grid) - torch.amin(grid)) - 1.0
+            surf_sdf_grid = 2.0 * (surf_sdf_grid - torch.amax(s_grid)) / (torch.amax(s_grid) - torch.amin(s_grid)) - 1.0
             grid = 2.0 * (grid - c_min) / (c_max - c_min) - 1.0
             s_grid = 2.0 * (s_grid - surf_min) / (surf_max - surf_min) - 1.0
 
@@ -533,6 +535,16 @@ def sample_points_on_surface(
 
         surface_area = np.float32(boundary["area"])
 
+        if self.normalize_coordinates:
+            surface_coordinates = (
+                2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0
+            )
+            center_of_mass_normalized = (
+                2.0 * (center_of_mass - c_min) / (c_max - c_min) - 1.0
+            )
+        else:
+            center_of_mass_normalized = center_of_mass
+
         interp_func = KDTree(surface_coordinates)
         dd, ii = interp_func.query(surface_coordinates, k=stencil_size)
         surface_neighbors = surface_coordinates[ii]
@@ -553,12 +565,7 @@ def sample_points_on_surface(
             self.device
         )
 
-        pos_normals_com = surface_coordinates - center_of_mass
-
-        if self.normalize_coordinates:
-            surface_coordinates = (
-                2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0
-            )
+        pos_normals_com = surface_coordinates - center_of_mass_normalized
 
         surface_coordinates = torch.unsqueeze(surface_coordinates, 0)
         surface_normals = torch.unsqueeze(surface_normals, 0)
@@ -637,13 +644,20 @@ def sample_points_in_volume(
         )
         sdf_nodes = torch.unsqueeze(sdf_nodes, -1)
 
-        pos_normals_closest = volume_coordinates - sdf_node_closest_point
-        pos_normals_com = volume_coordinates - center_of_mass
-
         if self.normalize_coordinates:
-            volume_coordinates = (
-                2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0
+            volume_coordinates = 2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0
+            sdf_nodes = 2.0 * (sdf_nodes - torch.amax(c_max)) / (torch.amax(c_max) - torch.amin(c_min)) - 1.0
+            sdf_node_closest_point = (
+                2.0 * (sdf_node_closest_point - c_min) / (c_max - c_min) - 1.0
             )
+            center_of_mass_normalized = (
+                2.0 * (center_of_mass - c_min) / (c_max - c_min) - 1.0
+            )
+        else:
+            center_of_mass_normalized = center_of_mass
+
+        pos_normals_closest = volume_coordinates - sdf_node_closest_point
+        pos_normals_com = volume_coordinates - center_of_mass_normalized
 
         volume_coordinates = torch.unsqueeze(volume_coordinates, 0)
         pos_normals_com = torch.unsqueeze(pos_normals_com, 0)
diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py
index 944910f9f8..c799e83f64 100644
--- a/examples/cfd/external_aerodynamics/domino/src/test.py
+++ b/examples/cfd/external_aerodynamics/domino/src/test.py
@@ -464,6 +464,8 @@ def main(cfg: DictConfig):
         surf_grid = np.float32(surf_grid)
         sdf_surf_grid = np.float32(sdf_surf_grid)
         surf_grid_max_min = np.float32(np.asarray([s_min, s_max]))
+        if cfg.model.normalize_coordinates:
+            sdf_surf_grid = normalize(sdf_surf_grid, xp.amax(surf_grid), xp.amin(surf_grid))
 
         # Get global parameters and global parameters scaling from config.yaml
         global_params_names = list(cfg.variables.global_parameters.keys())
@@ -536,6 +538,13 @@ def main(cfg: DictConfig):
                 surface_normals / np.linalg.norm(surface_normals, axis=1)[:, np.newaxis]
             )
 
+            if cfg.model.normalize_coordinates:
+                surface_coordinates = normalize(surface_coordinates, s_max, s_min)
+                surf_grid = normalize(surf_grid, s_max, s_min)
+                center_of_mass_normalized = normalize(center_of_mass, s_max, s_min)
+            else:
+                center_of_mass_normalized = center_of_mass
+
             if cfg.model.num_neighbors_surface > 1:
                 interp_func = KDTree(surface_coordinates)
                 dd, ii = interp_func.query(
@@ -554,22 +563,11 @@ def main(cfg: DictConfig):
                 surface_neighbors_normals = surface_normals
                 surface_neighbors_sizes = surface_sizes
 
-            dx, dy, dz = (
-                (s_max[0] - s_min[0]) / nx,
-                (s_max[1] - s_min[1]) / ny,
-                (s_max[2] - s_min[2]) / nz,
-            )
-
-            if cfg.model.positional_encoding:
-                pos_surface_center_of_mass = calculate_normal_positional_encoding(
-                    surface_coordinates, center_of_mass, cell_length=[dx, dy, dz]
-                )
-            else:
-                pos_surface_center_of_mass = surface_coordinates - center_of_mass
+            
+            pos_surface_center_of_mass = surface_coordinates - center_of_mass_normalized
 
-            surface_coordinates = normalize(surface_coordinates, s_max, s_min)
-            surface_neighbors = normalize(surface_neighbors, s_max, s_min)
-            surf_grid = normalize(surf_grid, s_max, s_min)
+            # surface_coordinates = normalize(surface_coordinates, s_max, s_min)
+            # surface_neighbors = normalize(surface_neighbors, s_max, s_min)
 
         else:
             surface_coordinates = None
@@ -606,11 +604,6 @@ def main(cfg: DictConfig):
                 c_max = np.float32(bounding_box_dims[0])
                 c_min = np.float32(bounding_box_dims[1])
 
-            dx, dy, dz = (
-                (c_max[0] - c_min[0]) / nx,
-                (c_max[1] - c_min[1]) / ny,
-                (c_max[2] - c_min[2]) / nz,
-            )
             # Generate a grid of specified resolution to map the bounding box
             # The grid is used for capturing structured geometry features and SDF representation of geometry
             grid = create_grid(c_max, c_min, [nx, ny, nz])
@@ -635,21 +628,20 @@ def main(cfg: DictConfig):
                 return_cupy=False,
             )
             sdf_nodes = sdf_nodes.reshape(-1, 1)
+            vol_grid_max_min = np.asarray([c_min, c_max])
 
-            if cfg.model.positional_encoding:
-                pos_volume_closest = calculate_normal_positional_encoding(
-                    volume_coordinates, sdf_node_closest_point, cell_length=[dx, dy, dz]
-                )
-                pos_volume_center_of_mass = calculate_normal_positional_encoding(
-                    volume_coordinates, center_of_mass, cell_length=[dx, dy, dz]
-                )
+            if cfg.model.normalize_coordinates:
+                volume_coordinates = normalize(volume_coordinates, c_max, c_min)
+                grid = normalize(grid, c_max, c_min)
+                center_of_mass_normalized = normalize(center_of_mass, c_max, c_min)
+                sdf_grid = normalize(sdf_grid, xp.amax(grid), xp.amin(grid))
+                sdf_nodes = normalize(sdf_nodes, xp.amax(grid), xp.amin(grid))
+                sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
             else:
-                pos_volume_closest = volume_coordinates - sdf_node_closest_point
-                pos_volume_center_of_mass = volume_coordinates - center_of_mass
+                center_of_mass_normalized = center_of_mass
 
-            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
-            grid = normalize(grid, c_max, c_min)
-            vol_grid_max_min = np.asarray([c_min, c_max])
+            pos_volume_closest = volume_coordinates - sdf_node_closest_point
+            pos_volume_center_of_mass = volume_coordinates - center_of_mass_normalized
 
         else:
             volume_coordinates = None
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 0a3ec9e38b..666e4bfad5 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -50,7 +50,6 @@
     ArrayType,
     area_weighted_shuffle_array,
     calculate_center_of_mass,
-    calculate_normal_positional_encoding,
     create_grid,
     get_filenames,
     mean_std_sampling,
@@ -134,8 +133,6 @@ class DoMINODataConfig:
         surface_variables: (Surface specific) Names of surface variables.
         surface_points_sample: (Surface specific) Number of surface points to sample per batch.
         num_surface_neighbors: (Surface specific) Number of surface neighbors to consider for nearest neighbors approach.
-        resample_surfaces: (Surface specific) Whether to resample the surface before kdtree/knn. Not available if caching.
-        resampling_points: (Surface specific) Number of points to resample the surface to.
         surface_sampling_algorithm: (Surface specific) Algorithm to use for surface sampling ("area_weighted" or "random").
         surface_factors: (Surface specific) Non-dimensionalization factors for surface variables.
             If set, and scaling_type is:
@@ -168,10 +165,6 @@ class DoMINODataConfig:
             - volume.points_sample
         geom_points_sample: Number of STL points sampled per batch.
             Independent of volume.points_sample and surface.points_sample.
-        positional_encoding: Whether to use positional encoding. Affects the calculation of:
-            - pos_volume_closest
-            - pos_volume_center_of_mass
-            - pos_surface_centter_of_mass
         scaling_type: Scaling type for volume variables.
             If used, will rescale the volume_fields and surface fields outputs.
             Requires volume.factor and surface.factor to be set.
@@ -193,8 +186,6 @@ class DoMINODataConfig:
     surface_variables: Optional[Sequence] = ("pMean", "wallShearStress")
     surface_points_sample: int = 1024
     num_surface_neighbors: int = 11
-    resample_surfaces: bool = False
-    resampling_points: int = 1_000_000
     surface_sampling_algorithm: str = Literal["area_weighted", "random"]
     surface_factors: Optional[Sequence] = None
     bounding_box_dims_surf: Optional[Union[BoundingBox, Sequence]] = None
@@ -210,7 +201,6 @@ class DoMINODataConfig:
     sample_in_bbox: bool = False
     sampling: bool = False
     geom_points_sample: int = 300000
-    positional_encoding: bool = False
     scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None
     compute_scaling_factors: bool = False
     caching: bool = False
@@ -236,8 +226,6 @@ def __post_init__(self):
                 raise ValueError("Sampling should be False for caching")
             if self.compute_scaling_factors:
                 raise ValueError("Compute scaling factors should be False for caching")
-            if self.resample_surfaces:
-                raise ValueError("Resample surface should be False for caching")
 
         if self.phase not in [
             "train",
@@ -547,8 +535,6 @@ def preprocess_combined(self, data_dict):
         if mesh_indices_flattened.dtype != xp.int32:
             mesh_indices_flattened = mesh_indices_flattened.astype(xp.int32)
 
-        center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes)
-
         if self.config.bounding_box_dims_surf is None:
             s_max = xp.amax(stl_vertices, 0)
             s_min = xp.amin(stl_vertices, 0)
@@ -556,6 +542,8 @@ def preprocess_combined(self, data_dict):
             s_max = xp.asarray(self.config.bounding_box_dims_surf[0])
             s_min = xp.asarray(self.config.bounding_box_dims_surf[1])
 
+        center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes)
+
         # SDF calculation on the grid using WARP
         if not self.config.compute_scaling_factors:
             nx, ny, nz = self.config.grid_resolution
@@ -570,6 +558,8 @@ def preprocess_combined(self, data_dict):
             )
             sdf_surf_grid = sdf_surf_grid.reshape(nx, ny, nz)
             sdf_surf_grid = _convert_torch_to_array(sdf_surf_grid, self.array_provider)
+            if self.config.normalize_coordinates:
+                sdf_surf_grid = normalize(sdf_surf_grid, xp.amax(surf_grid), xp.amin(surf_grid))
 
         else:
             surf_grid = None
@@ -634,19 +624,6 @@ def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max)
 
         xp = self.array_provider
 
-        if self.config.resample_surfaces:
-            if self.config.resampling_points > surface_coordinates.shape[0]:
-                resampling_points = surface_coordinates.shape[0]
-            else:
-                resampling_points = self.config.resampling_points
-
-            surface_coordinates, idx_s = shuffle_array(
-                surface_coordinates, resampling_points
-            )
-            surface_normals = surface_normals[idx_s]
-            surface_sizes = surface_sizes[idx_s]
-            surface_fields = surface_fields[idx_s]
-
         if not self.config.compute_scaling_factors:
             c_max = self.config.bounding_box_dims[0]
             c_min = self.config.bounding_box_dims[1]
@@ -667,20 +644,16 @@ def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max)
                 surface_sizes = surface_sizes[ids_in_bbox]
                 surface_fields = surface_fields[ids_in_bbox]
 
-            # Compute the positional encoding before sampling
-            if self.config.positional_encoding:
-                dx, dy, dz = (
-                    (s_max[0] - s_min[0]) / nx,
-                    (s_max[1] - s_min[1]) / ny,
-                    (s_max[2] - s_min[2]) / nz,
-                )
-                pos_normals_com_surface = calculate_normal_positional_encoding(
-                    surface_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz]
-                )
+            
+            # Have to normalize neighbors after the kNN and sampling
+            if self.config.normalize_coordinates:
+                core_dict["surf_grid"] = normalize(core_dict["surf_grid"], s_max, s_min)
+                surface_coordinates = normalize(surface_coordinates, s_max, s_min)
+                center_of_mass_normalized = normalize(xp.asarray(center_of_mass), s_max, s_min)
             else:
-                pos_normals_com_surface = surface_coordinates - xp.asarray(
-                    center_of_mass
-                )
+                center_of_mass_normalized = xp.asarray(center_of_mass)
+
+            pos_normals_com_surface = surface_coordinates - center_of_mass_normalized
 
             # Fit the kNN (or KDTree, if CPU) on ALL points:
             if self.config.num_surface_neighbors > 1:
@@ -781,12 +754,6 @@ def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max)
                 surface_neighbors_normals = surface_normals[ii][:, 1:]
                 surface_neighbors_sizes = surface_sizes[ii][:, 1:]
 
-            # Have to normalize neighbors after the kNN and sampling
-            if self.config.normalize_coordinates:
-                core_dict["surf_grid"] = normalize(core_dict["surf_grid"], s_max, s_min)
-                surface_coordinates = normalize(surface_coordinates, s_max, s_min)
-                surface_neighbors = normalize(surface_neighbors, s_max, s_min)
-
             if self.config.scaling_type is not None:
                 if self.config.surface_factors is not None:
                     if self.config.scaling_type == "mean_std_scaling":
@@ -870,12 +837,6 @@ def preprocess_volume(
                 volume_coordinates = volume_coordinates[ids_in_bbox]
                 volume_fields = volume_fields[ids_in_bbox]
 
-            dx, dy, dz = (
-                (c_max[0] - c_min[0]) / nx,
-                (c_max[1] - c_min[1]) / ny,
-                (c_max[2] - c_min[2]) / nz,
-            )
-
             # Generate a grid of specified resolution to map the bounding box
             # The grid is used for capturing structured geometry features and SDF representation of geometry
             grid = create_grid(c_max, c_min, [nx, ny, nz])
@@ -924,22 +885,18 @@ def preprocess_volume(
 
             sdf_nodes = sdf_nodes.reshape((-1, 1))
 
-            if self.config.positional_encoding:
-                pos_normals_closest_vol = calculate_normal_positional_encoding(
-                    volume_coordinates,
-                    sdf_node_closest_point,
-                    cell_dimensions=[dx, dy, dz],
-                )
-                pos_normals_com_vol = calculate_normal_positional_encoding(
-                    volume_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz]
-                )
-            else:
-                pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
-                pos_normals_com_vol = volume_coordinates - center_of_mass
-
             if self.config.normalize_coordinates:
                 volume_coordinates = normalize(volume_coordinates, c_max, c_min)
                 grid = normalize(grid, c_max, c_min)
+                sdf_grid = normalize(sdf_grid, xp.amax(grid), xp.amin(grid))
+                sdf_nodes = normalize(sdf_nodes, xp.amax(grid), xp.amin(grid))
+                sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
+                center_of_mass_normalized = normalize(xp.asarray(center_of_mass), c_max, c_min)
+            else:
+                center_of_mass_normalized = xp.asarray(center_of_mass)
+        
+            pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
+            pos_normals_com_vol = volume_coordinates - center_of_mass_normalized
 
             if self.config.scaling_type is not None:
                 if self.config.volume_factors is not None:
@@ -1086,7 +1043,6 @@ def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -
                 sample_in_bbox=True,
                 volume_points_sample=cfg.model.volume_points_sample,
                 geom_points_sample=cfg.model.geom_points_sample,
-                positional_encoding=cfg.model.positional_encoding,
                 model_type=cfg.model.model_type,
                 bounding_box_dims=cfg.data.bounding_box,
                 bounding_box_dims_surf=cfg.data.bounding_box_surface,
@@ -1200,7 +1156,6 @@ def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -
                 sample_in_bbox=True,
                 volume_points_sample=cfg.model.volume_points_sample,
                 geom_points_sample=cfg.model.geom_points_sample,
-                positional_encoding=cfg.model.positional_encoding,
                 model_type=cfg.model.model_type,
                 bounding_box_dims=cfg.data.bounding_box,
                 bounding_box_dims_surf=cfg.data.bounding_box_surface,
@@ -1484,7 +1439,6 @@ def create_domino_dataset(
             volume_points_sample=cfg.model.volume_points_sample,
             surface_points_sample=cfg.model.surface_points_sample,
             geom_points_sample=cfg.model.geom_points_sample,
-            positional_encoding=cfg.model.positional_encoding,
             volume_factors=vol_factors,
             surface_factors=surf_factors,
             scaling_type=cfg.model.normalization,
@@ -1492,8 +1446,6 @@ def create_domino_dataset(
             bounding_box_dims=cfg.data.bounding_box,
             bounding_box_dims_surf=cfg.data.bounding_box_surface,
             num_surface_neighbors=cfg.model.num_neighbors_surface,
-            resample_surfaces=cfg.model.resampling_surface_mesh.resample,
-            resampling_points=cfg.model.resampling_surface_mesh.points,
             surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
             **overrides,
         )
diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py
index ea77ef5f15..077df840e2 100644
--- a/physicsnemo/models/domino/geometry_rep.py
+++ b/physicsnemo/models/domino/geometry_rep.py
@@ -65,6 +65,7 @@ class GeoConvOut(nn.Module):
     def __init__(
         self,
         input_features: int,
+        neighbors_in_radius: int,
         model_parameters,
         grid_resolution=None,
     ):
@@ -73,6 +74,7 @@ def __init__(
 
         Args:
             input_features: Number of input feature dimensions
+            neighbors_in_radius: Number of neighbors in radius
             model_parameters: Configuration parameters for the model
             grid_resolution: Resolution of the output grid [nx, ny, nz]
         """
@@ -84,9 +86,9 @@ def __init__(
         self.num_modes = model_parameters.num_modes
 
         if self.fourier_features:
-            input_features_calculated = input_features * (1 + 2 * self.num_modes)
+            input_features_calculated = input_features * (1 + 2 * self.num_modes) * neighbors_in_radius
         else:
-            input_features_calculated = input_features
+            input_features_calculated = input_features * neighbors_in_radius
 
         self.fc1 = nn.Linear(input_features_calculated, base_neurons)
         self.fc2 = nn.Linear(base_neurons, base_neurons // 2)
@@ -96,6 +98,8 @@ def __init__(
 
         self.activation = get_activation(model_parameters.activation)
 
+        self.neighbors_in_radius = neighbors_in_radius
+
         if self.fourier_features:
             self.register_buffer(
                 "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes))
@@ -127,13 +131,8 @@ def forward(
             self.grid_resolution[2],
         )
         grid = grid.reshape(1, nx * ny * nz, 3, 1)
-        x_transposed = torch.transpose(x, 2, 3)
-        dist_weights = 1.0 / (1e-6 + (x_transposed - grid) ** 2.0)
-        dist_weights = torch.transpose(dist_weights, 2, 3)
 
-        # x = torch.sum(x * dist_weights, 2) / torch.sum(dist_weights, 2)
-        # x = torch.sum(x, 2)
-        mask = abs(x - 0) > 1e-6
+        x = rearrange(x, "b x y z -> b x (y z)", x=nx*ny*nz, y=self.neighbors_in_radius, z=3)
         if self.fourier_features:
             facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1)
         else:
@@ -142,12 +141,8 @@ def forward(
         x = self.activation(self.fc2(x))
         x = F.tanh(self.fc3(x))
 
-        mask = mask[:, :, :, 0:1].expand(
-            mask.shape[0], mask.shape[1], mask.shape[2], x.shape[-1]
-        )
-
-        x = torch.sum(x * mask, 2)
         x = rearrange(x, "b (x y z) c -> b c x y z", x=nx, y=ny, z=nz)
+
         return x
 
 
@@ -337,11 +332,6 @@ def __init__(
                             output_filters=geometry_rep.geo_conv.base_neurons_out,
                             model_parameters=geometry_rep.geo_processor,
                         ),
-                        GeoProcessor(
-                            input_filters=geometry_rep.geo_conv.base_neurons_in,
-                            output_filters=geometry_rep.geo_conv.base_neurons_out,
-                            model_parameters=geometry_rep.geo_processor,
-                        ),
                     )
                 )
             else:
@@ -349,10 +339,11 @@ def __init__(
 
         self.geo_conv_out = nn.ModuleList()
         self.geo_processor_out = nn.ModuleList()
-        for _ in range(len(radii)):
+        for u in range(len(radii)):
             self.geo_conv_out.append(
                 GeoConvOut(
                     input_features=input_features,
+                    neighbors_in_radius=neighbors_in_radius[u],
                     model_parameters=geometry_rep.geo_conv,
                     grid_resolution=model_parameters.interp_res,
                 )
@@ -403,15 +394,11 @@ def __init__(
                     output_filters=geometry_rep.geo_conv.base_neurons_out,
                     model_parameters=geometry_rep.geo_processor,
                 ),
-                GeoProcessor(
-                    input_filters=geometry_rep.geo_conv.base_neurons_out,
-                    output_filters=geometry_rep.geo_conv.base_neurons_out,
-                    model_parameters=geometry_rep.geo_processor,
-                ),
             )
         else:
             raise ValueError("Invalid prompt. Specify unet or conv ...")
         self.radii = radii
+        self.neighbors_in_radius = neighbors_in_radius
         self.hops = hops
 
         self.geo_processor_sdf_out = nn.Conv3d(
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 7a31c2668c..8ac54f8e9a 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -359,10 +359,7 @@ def __init__(
         self.activation = get_activation(model_parameters.activation)
         self.use_sdf_in_basis_func = model_parameters.use_sdf_in_basis_func
         if self.output_features_vol is not None:
-            if model_parameters.positional_encoding:
-                inp_pos_vol = 25 if model_parameters.use_sdf_in_basis_func else 12
-            else:
-                inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3
+            inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3
 
             self.fc_p_vol = EncodingMLP(
                 input_features=inp_pos_vol,
@@ -373,10 +370,7 @@ def __init__(
             )
 
         if self.output_features_surf is not None:
-            if model_parameters.positional_encoding:
-                inp_pos_surf = 12
-            else:
-                inp_pos_surf = 3
+            inp_pos_surf = 3
 
             self.fc_p_surf = EncodingMLP(
                 input_features=inp_pos_surf,

From ffedfaa7defe560bdf668a273cc777bf38df58c7 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 29 Sep 2025 16:34:04 +0000
Subject: [PATCH 54/98] Resolve most of the feedback from PR review.

---
 physicsnemo/models/domino/ball_query.py       | 106 ----
 physicsnemo/models/domino/encodings.py        |  82 +--
 physicsnemo/models/domino/geometry_rep.py     |  43 +-
 physicsnemo/models/domino/mlps.py             |  84 +--
 physicsnemo/models/domino/model.py            |  17 +-
 physicsnemo/models/domino/solutions.py        |  73 +--
 physicsnemo/models/layers/__init__.py         |   9 +-
 physicsnemo/models/layers/ball_query.py       | 550 +++---------------
 physicsnemo/models/layers/fourier_layers.py   |  80 +++
 physicsnemo/models/layers/mlp_layers.py       |  87 ++-
 test/models/data/mlp_output.pth               | Bin 0 -> 1598 bytes
 test/models/domino/test_domino_encodings.py   |  22 +-
 .../models/domino/test_domino_geometry_rep.py |   5 +-
 test/models/domino/test_domino_mlps.py        |  47 +-
 test/models/domino/test_domino_solutions.py   |  15 +-
 test/models/test_mlp_layers.py                |  75 +++
 16 files changed, 430 insertions(+), 865 deletions(-)
 delete mode 100644 physicsnemo/models/domino/ball_query.py
 create mode 100644 test/models/data/mlp_output.pth
 create mode 100644 test/models/test_mlp_layers.py

diff --git a/physicsnemo/models/domino/ball_query.py b/physicsnemo/models/domino/ball_query.py
deleted file mode 100644
index 681fe80733..0000000000
--- a/physicsnemo/models/domino/ball_query.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This code contains the DoMINO model architecture.
-The DoMINO class contains an architecture to model both surface and
-volume quantities together as well as separately (controlled using
-the config.yaml file)
-"""
-
-import torch
-import torch.nn as nn
-from einops import rearrange
-
-from physicsnemo.utils.neighbors import radius_search
-
-
-class BQWarp(nn.Module):
-    """
-    Warp-based ball-query layer for finding neighboring points within a specified radius.
-
-    This layer uses an accelerated ball query implementation to efficiently find points
-    within a specified radius of query points.
-    """
-
-    def __init__(
-        self,
-        radius: float = 0.25,
-        neighbors_in_radius: int = 10,
-    ):
-        """
-        Initialize the BQWarp layer.
-
-        Args:
-            radius: Radius for ball query operation
-            neighbors_in_radius: Maximum number of neighbors to return within radius
-        """
-        super().__init__()
-
-        self.radius = radius
-        self.neighbors_in_radius = neighbors_in_radius
-
-    def forward(
-        self, x: torch.Tensor, p_grid: torch.Tensor, reverse_mapping: bool = True
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        """
-        Performs ball query operation to find neighboring points and their features.
-
-        This method uses the Warp-accelerated ball query implementation to find points
-        within a specified radius. It can operate in two modes:
-        - Forward mapping: Find points from x that are near p_grid points (reverse_mapping=False)
-        - Reverse mapping: Find points from p_grid that are near x points (reverse_mapping=True)
-
-        Args:
-            x: Tensor of shape (batch_size, num_points, 3+features) containing point coordinates
-               and their features
-            p_grid: Tensor of shape (batch_size, grid_x, grid_y, grid_z, 3) containing grid point
-                   coordinates
-            reverse_mapping: Boolean flag to control the direction of the mapping:
-                            - True: Find p_grid points near x points
-                            - False: Find x points near p_grid points
-
-        Returns:
-            tuple containing:
-                - mapping: Tensor containing indices of neighboring points
-                - outputs: Tensor containing coordinates of the neighboring points
-        """
-
-        if p_grid.ndim != 3:
-            p_grid = rearrange(p_grid, "b nx ny nz c -> b (nx ny nz) c")
-
-        if reverse_mapping:
-            mapping, outputs = radius_search(
-                x[0],
-                p_grid[0],
-                self.radius,
-                self.neighbors_in_radius,
-                return_points=True,
-            )
-            mapping = mapping.unsqueeze(0)
-            outputs = outputs.unsqueeze(0)
-        else:
-            mapping, outputs = radius_search(
-                p_grid[0],
-                x[0],
-                self.radius,
-                self.neighbors_in_radius,
-                return_points=True,
-            )
-            mapping = mapping.unsqueeze(0)
-            outputs = outputs.unsqueeze(0)
-
-        return mapping, outputs
diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py
index ce55ed46d3..55ce655090 100644
--- a/physicsnemo/models/domino/encodings.py
+++ b/physicsnemo/models/domino/encodings.py
@@ -21,91 +21,13 @@
 the config.yaml file)
 """
 
-import math
-
 import torch
 import torch.nn as nn
 from einops import rearrange
 
-from .ball_query import BQWarp
-from .mlps import MLP, LocalPointConv
-
-
-def fourier_encode_vectorized(
-    coords: torch.Tensor, freqs: torch.Tensor
-) -> torch.Tensor:
-    """Vectorized Fourier feature encoding
-
-    Args:
-        coords: Tensor containing coordinates, of shape (batch_size, D)
-        freqs: Tensor containing frequencies, of shape (F,) (num frequencies)
-
-    Returns:
-        Tensor containing Fourier features, of shape (batch_size, D * 2 * F)
-    """
-
-    D = coords.shape[-1]
-    F = freqs.shape[0]
-
-    freqs = freqs[None, None, :, None]  # reshape to [*, F, 1] for broadcasting
-
-    coords = coords.unsqueeze(-2)  # [*, 1, D]
-    scaled = (coords * freqs).reshape(*coords.shape[:-2], D * F)  # [*, D, F]
-    features = torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1)  # [*, D, 2F]
-
-    return features.reshape(*coords.shape[:-2], D * 2 * F)  # [*, D * 2F]
-
-
-class EncodingMLP(nn.Module):
-    """
-    This is an MLP that will, optionally, fourier encode the input features.
-
-    The encoded features are concatenated to the original inputs, and then
-    processed with an MLP.
-
-    Args:
-        input_features: The number of input features to the MLP.
-        base_layer: The number of neurons in the hidden layer of the MLP.
-        fourier_features: Whether to fourier encode the input features.
-        num_modes: The number of modes to use for the fourier encoding.
-        activation: The activation function to use in the MLP.
-
-    """
-
-    def __init__(
-        self,
-        input_features: int,
-        base_layer: int,
-        fourier_features: bool,
-        num_modes: int,
-        activation: nn.Module,
-    ):
-        super().__init__()
-        self.fourier_features = fourier_features
-
-        # self.num_modes = model_parameters.num_modes
-
-        if self.fourier_features:
-            input_features_calculated = input_features + input_features * num_modes * 2
-            self.register_buffer(
-                "freqs", torch.exp(torch.linspace(0, math.pi, num_modes))
-            )
-        else:
-            input_features_calculated = input_features
-
-        self.mlp = MLP(
-            input_features=input_features_calculated,
-            base_layer=base_layer,
-            output_features=base_layer,
-            activation=activation,
-            n_layers=3,
-        )
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.fourier_features:
-            x = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1)
+from physicsnemo.models.layers import BQWarp
 
-        return self.mlp(x)
+from .mlps import LocalPointConv
 
 
 class LocalGeometryEncoding(nn.Module):
diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py
index ea77ef5f15..698e5ff19f 100644
--- a/physicsnemo/models/domino/geometry_rep.py
+++ b/physicsnemo/models/domino/geometry_rep.py
@@ -15,29 +15,17 @@
 # limitations under the License.
 
 import math
-from typing import Callable, Literal, Sequence
+from typing import Sequence
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from einops import rearrange
 
+from physicsnemo.models.layers import BQWarp, Mlp, fourier_encode, get_activation
 from physicsnemo.models.unet import UNet
 
-from .ball_query import BQWarp
-from .encodings import fourier_encode_vectorized
-
-
-def get_activation(activation: Literal["relu", "gelu"]) -> Callable:
-    """
-    Return a PyTorch activation function corresponding to the given name.
-    """
-    if activation == "relu":
-        return nn.ReLU()
-    elif activation == "gelu":
-        return nn.GELU()
-    else:
-        raise ValueError(f"Activation function {activation} not found")
+# from .encodings import fourier_encode
 
 
 def scale_sdf(sdf: torch.Tensor) -> torch.Tensor:
@@ -88,9 +76,17 @@ def __init__(
         else:
             input_features_calculated = input_features
 
-        self.fc1 = nn.Linear(input_features_calculated, base_neurons)
-        self.fc2 = nn.Linear(base_neurons, base_neurons // 2)
-        self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in)
+        self.mlp = Mlp(
+            in_features=input_features_calculated,
+            hidden_features=[base_neurons, base_neurons // 2],
+            out_features=model_parameters.base_neurons_in,
+            act_layer=get_activation(model_parameters.activation),
+            drop=0.0,
+        )
+
+        # self.fc1 = nn.Linear(input_features_calculated, base_neurons)
+        # self.fc2 = nn.Linear(base_neurons, base_neurons // 2)
+        # self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in)
 
         self.grid_resolution = grid_resolution
 
@@ -135,12 +131,13 @@ def forward(
         # x = torch.sum(x, 2)
         mask = abs(x - 0) > 1e-6
         if self.fourier_features:
-            facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1)
+            facets = torch.cat((x, fourier_encode(x, self.freqs)), axis=-1)
         else:
             facets = x
-        x = self.activation(self.fc1(facets))
-        x = self.activation(self.fc2(x))
-        x = F.tanh(self.fc3(x))
+        # x = self.activation(self.fc1(facets))
+        # x = self.activation(self.fc2(x))
+        # x = F.tanh(self.fc3(x))
+        x = F.tanh(self.mlp(facets))
 
         mask = mask[:, :, :, 0:1].expand(
             mask.shape[0], mask.shape[1], mask.shape[2], x.shape[-1]
@@ -272,6 +269,8 @@ def __init__(
         neighbors_in_radius,
         hops=1,
         model_parameters=None,
+        # activation_conv: nn.Module,
+        # activation_processor: nn.Module,
     ):
         """
         Initialize the GeometryRep module.
diff --git a/physicsnemo/models/domino/mlps.py b/physicsnemo/models/domino/mlps.py
index e74583dea0..f074fa7735 100644
--- a/physicsnemo/models/domino/mlps.py
+++ b/physicsnemo/models/domino/mlps.py
@@ -15,60 +15,17 @@
 # limitations under the License.
 
 """
-This code contains the DoMINO model architecture.
-The DoMINO class contains an architecture to model both surface and
-volume quantities together as well as separately (controlled using
-the config.yaml file)
+This file contains specific MLPs for the DoMINO model.
+
+The main feature here is we've locked in the number of layers.
 """
 
-import torch
 import torch.nn as nn
 
+from physicsnemo.models.layers import Mlp
 
-class MLP(nn.Module):
-    """
-    FlexibleMulti-layer perceptron (MLP) module.
 
-    This is reused in various domino layers to simplify and unify
-    the MLP implementations.
-    """
-
-    def __init__(
-        self,
-        input_features: int,
-        output_features: int,
-        base_layer: int,
-        activation: nn.Module,
-        n_layers: int,
-    ):
-        super(MLP, self).__init__()
-        self.input_features = input_features
-
-        modules = []
-
-        if n_layers == 1:
-            # Single layer: input_features -> output_features
-            modules.append(nn.Linear(input_features, output_features))
-        else:
-            # First layer: input_features -> base_layer
-            modules.append(nn.Linear(input_features, base_layer))
-            modules.append(activation)
-
-            # Hidden layers: base_layer -> base_layer
-            for _ in range(n_layers - 2):
-                modules.append(nn.Linear(base_layer, base_layer))
-                modules.append(activation)
-
-            # Final layer: base_layer -> output_features (no activation)
-            modules.append(nn.Linear(base_layer, output_features))
-
-        self.mlp_modules = torch.nn.Sequential(*modules)
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.mlp_modules(x)
-
-
-class AggregationModel(MLP):
+class AggregationModel(Mlp):
     """
     Neural network module to aggregate local geometry encoding with basis functions.
 
@@ -76,6 +33,8 @@ class AggregationModel(MLP):
     to predict the final output quantities. It serves as the final prediction layer
     that integrates all available information sources.
 
+    It is implemented as a straightforward MLP with 5 total layers.
+
     """
 
     def __init__(
@@ -85,17 +44,22 @@ def __init__(
         base_layer: int,
         activation: nn.Module,
     ):
+        hidden_features = [base_layer, base_layer, base_layer, base_layer]
+
         super().__init__(
-            input_features=input_features,
-            output_features=output_features,
-            base_layer=base_layer,
-            activation=activation,
-            n_layers=5,
+            in_features=input_features,
+            hidden_features=hidden_features,
+            out_features=output_features,
+            act_layer=activation,
+            drop=0.0,
         )
 
 
-class LocalPointConv(MLP):
-    """Layer for local geometry point kernel"""
+class LocalPointConv(Mlp):
+    """Layer for local geometry point kernel
+
+    This is a straight forward MLP, with exactly two layers.
+    """
 
     def __init__(
         self,
@@ -105,9 +69,9 @@ def __init__(
         activation: nn.Module,
     ):
         super().__init__(
-            input_features=input_features,
-            base_layer=base_layer,
-            output_features=output_features,
-            activation=activation,
-            n_layers=2,
+            in_features=input_features,
+            hidden_features=base_layer,
+            out_features=output_features,
+            act_layer=activation,
+            drop=0.0,
         )
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 7a31c2668c..23a68ca56c 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -24,13 +24,13 @@
 import torch
 import torch.nn as nn
 
+from physicsnemo.models.layers import FourierMLP, get_activation
 from physicsnemo.models.unet import UNet
 
 from .encodings import (
-    EncodingMLP,
     MultiGeometryEncoding,
 )
-from .geometry_rep import GeometryRep, get_activation
+from .geometry_rep import GeometryRep
 from .mlps import AggregationModel
 from .solutions import SolutionCalculatorSurface, SolutionCalculatorVolume
 
@@ -283,7 +283,7 @@ def __init__(
         if self.encode_parameters:
             # Defining the parameter model
             base_layer_p = model_parameters.parameter_model.base_layer
-            self.parameter_model = EncodingMLP(
+            self.parameter_model = FourierMLP(
                 input_features=self.global_features,
                 fourier_features=model_parameters.parameter_model.fourier_features,
                 num_modes=model_parameters.parameter_model.num_modes,
@@ -324,7 +324,7 @@ def __init__(
                 self.num_variables_surf
             ):  # Have the same basis function for each variable
                 self.nn_basis_surf.append(
-                    EncodingMLP(
+                    FourierMLP(
                         input_features=input_features_surface,
                         base_layer=model_parameters.nn_basis_functions.base_layer,
                         fourier_features=model_parameters.nn_basis_functions.fourier_features,
@@ -342,7 +342,7 @@ def __init__(
                 self.num_variables_vol
             ):  # Have the same basis function for each variable
                 self.nn_basis_vol.append(
-                    EncodingMLP(
+                    FourierMLP(
                         input_features=input_features,
                         base_layer=model_parameters.nn_basis_functions.base_layer,
                         fourier_features=model_parameters.nn_basis_functions.fourier_features,
@@ -364,7 +364,7 @@ def __init__(
             else:
                 inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3
 
-            self.fc_p_vol = EncodingMLP(
+            self.fc_p_vol = FourierMLP(
                 input_features=inp_pos_vol,
                 fourier_features=model_parameters.position_encoder.fourier_features,
                 num_modes=model_parameters.position_encoder.num_modes,
@@ -378,7 +378,7 @@ def __init__(
             else:
                 inp_pos_surf = 3
 
-            self.fc_p_surf = EncodingMLP(
+            self.fc_p_surf = FourierMLP(
                 input_features=inp_pos_surf,
                 fourier_features=model_parameters.position_encoder.fourier_features,
                 num_modes=model_parameters.position_encoder.num_modes,
@@ -450,7 +450,6 @@ def __init__(
                 num_sample_points=self.num_sample_points_surface,
                 use_surface_normals=self.use_surface_normals,
                 use_surface_area=self.use_surface_area,
-                noise_intensity=50,
                 encode_parameters=self.encode_parameters,
                 parameter_model=self.parameter_model
                 if self.encode_parameters
@@ -498,7 +497,7 @@ def __init__(
                 nn_basis=self.nn_basis_vol,
             )
 
-    def forward(self, data_dict, return_volume_neighbors=False):
+    def forward(self, data_dict):
         # Loading STL inputs, bounding box grids, precomputed SDF and scaling factors
 
         # STL nodes
diff --git a/physicsnemo/models/domino/solutions.py b/physicsnemo/models/domino/solutions.py
index c3968e8dcf..23a7e36f39 100644
--- a/physicsnemo/models/domino/solutions.py
+++ b/physicsnemo/models/domino/solutions.py
@@ -27,6 +27,27 @@
 import torch.nn as nn
 
 
+def apply_parameter_encoding(
+    mesh_centers: torch.Tensor,
+    global_params_values: torch.Tensor,
+    global_params_reference: torch.Tensor,
+) -> torch.Tensor:
+    processed_parameters = []
+    for k in range(global_params_values.shape[1]):
+        param = torch.unsqueeze(global_params_values[:, k, :], 1)
+        ref = torch.unsqueeze(global_params_reference[:, k, :], 1)
+        param = param.expand(
+            param.shape[0],
+            mesh_centers.shape[1],
+            param.shape[2],
+        )
+        param = param / ref
+        processed_parameters.append(param)
+    processed_parameters = torch.cat(processed_parameters, axis=-1)
+
+    return processed_parameters
+
+
 def sample_sphere(center, r, num_points):
     """Uniformly sample points in a 3D sphere around the center.
 
@@ -122,28 +143,6 @@ def __init__(
                     "Parameter model is required when encode_parameters is True"
                 )
 
-    def apply_parameter_encoding(
-        self,
-        mesh_centers: torch.Tensor,
-        global_params_values: torch.Tensor,
-        global_params_reference: torch.Tensor,
-    ) -> torch.Tensor:
-        processed_parameters = []
-        for k in range(global_params_values.shape[1]):
-            param = torch.unsqueeze(global_params_values[:, k, :], 1)
-            ref = torch.unsqueeze(global_params_reference[:, k, :], 1)
-            param = param.expand(
-                param.shape[0],
-                mesh_centers.shape[1],
-                param.shape[2],
-            )
-            param = param / ref
-            processed_parameters.append(param)
-        processed_parameters = torch.cat(processed_parameters, axis=-1)
-        param_encoding = self.parameter_model(processed_parameters)
-
-        return param_encoding
-
     def forward(
         self,
         volume_mesh_centers: torch.Tensor,
@@ -156,9 +155,10 @@ def forward(
         Forward pass of the SolutionCalculator module.
         """
         if self.encode_parameters:
-            param_encoding = self.apply_parameter_encoding(
+            param_encoding = apply_parameter_encoding(
                 volume_mesh_centers, global_params_values, global_params_reference
             )
+            param_encoding = self.parameter_model(param_encoding)
 
         volume_m_c_perturbed = [volume_mesh_centers.unsqueeze(2)]
 
@@ -266,7 +266,6 @@ def __init__(
         self,
         num_variables: int,
         num_sample_points: int,
-        noise_intensity: float,
         encode_parameters: bool,
         use_surface_normals: bool,
         use_surface_area: bool,
@@ -277,7 +276,6 @@ def __init__(
         super().__init__()
         self.num_variables = num_variables
         self.num_sample_points = num_sample_points
-        self.noise_intensity = noise_intensity
         self.encode_parameters = encode_parameters
         self.use_surface_normals = use_surface_normals
         self.use_surface_area = use_surface_area
@@ -291,28 +289,6 @@ def __init__(
                     "Parameter model is required when encode_parameters is True"
                 )
 
-    def apply_parameter_encoding(
-        self,
-        mesh_centers: torch.Tensor,
-        global_params_values: torch.Tensor,
-        global_params_reference: torch.Tensor,
-    ) -> torch.Tensor:
-        processed_parameters = []
-        for k in range(global_params_values.shape[1]):
-            param = torch.unsqueeze(global_params_values[:, k, :], 1)
-            ref = torch.unsqueeze(global_params_reference[:, k, :], 1)
-            param = param.expand(
-                param.shape[0],
-                mesh_centers.shape[1],
-                param.shape[2],
-            )
-            param = param / ref
-            processed_parameters.append(param)
-        processed_parameters = torch.cat(processed_parameters, axis=-1)
-        param_encoding = self.parameter_model(processed_parameters)
-
-        return param_encoding
-
     def forward(
         self,
         surface_mesh_centers: torch.Tensor,
@@ -329,9 +305,10 @@ def forward(
         """Function to approximate solution given the neighborhood information"""
 
         if self.encode_parameters:
-            param_encoding = self.apply_parameter_encoding(
+            param_encoding = apply_parameter_encoding(
                 surface_mesh_centers, global_params_values, global_params_reference
             )
+            param_encoding = self.parameter_model(param_encoding)
 
         centers_inputs = [
             surface_mesh_centers,
diff --git a/physicsnemo/models/layers/__init__.py b/physicsnemo/models/layers/__init__.py
index 627fa4f07f..cfebf5e38d 100644
--- a/physicsnemo/models/layers/__init__.py
+++ b/physicsnemo/models/layers/__init__.py
@@ -22,9 +22,16 @@
     Stan,
     get_activation,
 )
+from .ball_query import BQWarp
 from .conv_layers import ConvBlock, CubeEmbedding
 from .dgm_layers import DGMLayer
-from .fourier_layers import FourierFilter, FourierLayer, GaborFilter
+from .fourier_layers import (
+    FourierFilter,
+    FourierLayer,
+    FourierMLP,
+    GaborFilter,
+    fourier_encode,
+)
 from .fully_connected_layers import (
     Conv1dFCLayer,
     Conv2dFCLayer,
diff --git a/physicsnemo/models/layers/ball_query.py b/physicsnemo/models/layers/ball_query.py
index ee3e1538a9..795958800a 100644
--- a/physicsnemo/models/layers/ball_query.py
+++ b/physicsnemo/models/layers/ball_query.py
@@ -14,504 +14,104 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Tuple
+"""
+This layer is a compilable, ball-query operation.
 
-import torch
-import warp as wp
-from torch.overrides import handle_torch_function, has_torch_function
+By default, it will project a grid of points to a 1D set of points.
 
+It does not support batch size > 1.
+"""
 
-@wp.kernel
-def ball_query(
-    points1: wp.array(dtype=wp.vec3),
-    points2: wp.array(dtype=wp.vec3),
-    grid: wp.uint64,
-    k: wp.int32,
-    radius: wp.float32,
-    mapping: wp.array3d(dtype=wp.int32),
-    num_neighbors: wp.array2d(dtype=wp.int32),
-):
-    """
-    Performs ball query operation to find neighboring points within a specified radius.
+import torch
+import torch.nn as nn
+from einops import rearrange
 
-    For each point in points1, finds up to k neighboring points from points2 that are
-    within the specified radius. Uses a hash grid for efficient spatial queries.
+from physicsnemo.utils.neighbors import radius_search
 
-    Note that the neighbors found are not strictly guaranteed to be the closest k neighbors,
-    in the event that more than k neighbors are found within the radius.
 
-    Args:
-        points1: Array of query points
-        points2: Array of points to search
-        grid: Pre-computed hash grid for accelerated spatial queries
-        k: Maximum number of neighbors to find for each query point
-        radius: Maximum search radius for finding neighbors
-        mapping: Output array to store indices of neighboring points. Should be instantiated as zeros(1, len(points1), k)
-        num_neighbors: Output array to store the number of neighbors found for each query point. Should be instantiated as zeros(1, len(points1))
+class BQWarp(nn.Module):
     """
-    tid = wp.tid()
-
-    # Get position from points1
-    pos = points1[tid]
-
-    # particle contact
-    neighbors = wp.hash_grid_query(id=grid, point=pos, max_dist=radius)
-
-    # Keep track of the number of neighbors found
-    neighbors_found = wp.int32(0)
-
-    # loop through neighbors to compute density
-    for index in neighbors:
-        # Check if outside the radius
-        pos2 = points2[index]
-        if wp.length(pos - pos2) > radius:
-            continue
-
-        # Add neighbor to the list
-        mapping[0, tid, neighbors_found] = index
-
-        # Increment the number of neighbors found
-        neighbors_found += 1
-
-        # Break if we have found enough neighbors
-        if neighbors_found == k:
-            num_neighbors[0, tid] = k
-            break
-
-    # Set the number of neighbors
-    num_neighbors[0, tid] = neighbors_found
-
-
-@wp.kernel
-def sparse_ball_query(
-    points2: wp.array(dtype=wp.vec3),
-    mapping: wp.array3d(dtype=wp.int32),
-    num_neighbors: wp.array2d(dtype=wp.int32),
-    outputs: wp.array4d(dtype=wp.float32),
-):
-    tid = wp.tid()
-
-    # Get number of neighbors
-    k = num_neighbors[0, tid]
-
-    # Loop through neighbors
-    for _k in range(k):
-        # Get point2 index
-        index = mapping[0, tid, _k]
-
-        # Get position from points2
-        pos = points2[index]
-
-        # Set the output
-        outputs[0, tid, _k, 0] = pos[0]
-        outputs[0, tid, _k, 1] = pos[1]
-        outputs[0, tid, _k, 2] = pos[2]
-
+    Warp-based ball-query layer for finding neighboring points within a specified radius.
 
-def _ball_query_forward_primitive_(
-    points1: torch.Tensor,
-    points2: torch.Tensor,
-    k: int,
-    radius: float,
-    hash_grid: wp.HashGrid,
-) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-    # Create output tensors:
-    mapping = torch.zeros(
-        (1, points1.shape[0], k),
-        dtype=torch.int32,
-        device=points1.device,
-        requires_grad=False,
-    )
-    num_neighbors = torch.zeros(
-        (1, points1.shape[0]),
-        dtype=torch.int32,
-        device=points1.device,
-        requires_grad=False,
-    )
-    outputs = torch.zeros(
-        (1, points1.shape[0], k, 3),
-        dtype=torch.float32,
-        device=points1.device,
-        requires_grad=(points1.requires_grad or points2.requires_grad),
-    )
+    This layer uses an accelerated ball query implementation to efficiently find points
+    within a specified radius of query points.
 
-    # Convert from torch to warp
-    points1 = wp.from_torch(points1, dtype=wp.vec3, requires_grad=points1.requires_grad)
-    points2 = wp.from_torch(points2, dtype=wp.vec3, requires_grad=points2.requires_grad)
-
-    wp_mapping = wp.from_torch(mapping, dtype=wp.int32, requires_grad=False)
-    wp_num_neighbors = wp.from_torch(num_neighbors, dtype=wp.int32, requires_grad=False)
-    wp_outputs = wp.from_torch(
-        outputs,
-        dtype=wp.float32,
-        requires_grad=(points1.requires_grad or points2.requires_grad),
-    )
-
-    # Build the grid
-    hash_grid.build(points2, radius)
-
-    # Run the kernel to get mapping
-    wp.launch(
-        ball_query,
-        inputs=[
-            points1,
-            points2,
-            hash_grid.id,
-            k,
-            radius,
-        ],
-        outputs=[
-            wp_mapping,
-            wp_num_neighbors,
-        ],
-        dim=[points1.shape[0]],
-    )
-
-    # Run the kernel to get outputs
-    wp.launch(
-        sparse_ball_query,
-        inputs=[
-            points2,
-            wp_mapping,
-            wp_num_neighbors,
-        ],
-        outputs=[
-            wp_outputs,
-        ],
-        dim=[points1.shape[0]],
-    )
-
-    return mapping, num_neighbors, outputs
-
-
-def _ball_query_backward_primitive_(
-    points1,
-    points2,
-    mapping,
-    num_neighbors,
-    outputs,
-    grad_mapping,
-    grad_num_neighbors,
-    grad_outputs,
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    p2_grad = torch.zeros_like(points2)
-
-    # Run the kernel in adjoint mode
-    wp.launch(
-        sparse_ball_query,
-        inputs=[
-            wp.from_torch(points2, dtype=wp.vec3, requires_grad=points2.requires_grad),
-            wp.from_torch(mapping, dtype=wp.int32, requires_grad=False),
-            wp.from_torch(num_neighbors, dtype=wp.int32, requires_grad=False),
-        ],
-        outputs=[
-            wp.from_torch(outputs, dtype=wp.float32, requires_grad=False),
-        ],
-        adj_inputs=[
-            wp.from_torch(p2_grad, dtype=wp.vec3, requires_grad=points2.requires_grad),
-            wp.from_torch(
-                grad_mapping, dtype=wp.int32, requires_grad=mapping.requires_grad
-            ),
-            wp.from_torch(
-                grad_num_neighbors,
-                dtype=wp.int32,
-                requires_grad=num_neighbors.requires_grad,
-            ),
-        ],
-        adj_outputs=[
-            wp.from_torch(grad_outputs, dtype=wp.float32),
-        ],
-        dim=[points1.shape[0]],
-        adjoint=True,
-    )
-
-    return p2_grad
-
-
-class BallQuery(torch.autograd.Function):
+    Only supports batch size 1.
     """
-    Warp based Ball Query.
-
-    Note: only differentiable with respect to points1 and points2.
-    """
-
-    @staticmethod
-    def forward(
-        ctx,
-        points1: torch.Tensor,
-        points2: torch.Tensor,
-        k: int,
-        radius: float,
-        hash_grid: wp.HashGrid,
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        # Only works for batch size 1
-        if points1.shape[0] != 1:
-            raise AssertionError("Ball Query only works for batch size 1")
-
-        # CJA - 5/15/25 - This was added recently, but it looks like I also
-        # addressed it.  The primitive functions below handle device selection
-        # via compute-follows-data: they will allocate new tensors on the device
-        # where points1 currently resides (forward) and points2 resides (backward).
-        # there isn't checking that the devices match, but it will crash if they do not.
-        # try:
-        #     device = str(wp.get_device())
-        # except Exception:
-        #     device = "cuda"
-
-        ctx.k = k
-        ctx.radius = radius
-
-        # Make grid
-        ctx.hash_grid = hash_grid
 
-        # Apply the primitive.  Note the batch index is removed.
-        mapping, num_neighbors, outputs = _ball_query_forward_primitive_(
-            points1[0],
-            points2[0],
-            k,
-            radius,
-            hash_grid,
-        )
-        ctx.save_for_backward(points1, points2, mapping, num_neighbors, outputs)
-
-        return mapping, num_neighbors, outputs
-
-    @staticmethod
-    def backward(ctx, grad_mapping, grad_num_neighbors, grad_outputs):
-        points1, points2, mapping, num_neighbors, outputs = ctx.saved_tensors
-        # Apply the primitive
-        p2_grad = _ball_query_backward_primitive_(
-            points1[0],
-            points2[0],
-            mapping,
-            num_neighbors,
-            outputs,
-            grad_mapping,
-            grad_num_neighbors,
-            grad_outputs,
-        )
-        p2_grad = p2_grad.unsqueeze(0)
-
-        # Return the gradients
-        return (
-            torch.zeros_like(points1),
-            p2_grad,
-            None,
-            None,
-            None,
-        )
-
-
-def ball_query_layer(
-    points1: torch.Tensor,
-    points2: torch.Tensor,
-    k: int,
-    radius: float,
-    hash_grid: wp.HashGrid,
-):
-    """
-    Wrapper for BallQuery.apply to support a functional interface.
-    """
-    if has_torch_function((points1, points2)):
-        return handle_torch_function(
-            ball_query_layer, (points1, points2), points1, points2, k, radius, hash_grid
-        )
-    return BallQuery.apply(points1, points2, k, radius, hash_grid)
-
-
-class BallQueryLayer(torch.nn.Module):
-    """
-    Torch layer for differentiable and accelerated Ball Query
-    operation using Warp.
-    Args:
-        k (int): Number of neighbors.
-        radius (float): Radius of influence.
-        grid_size (int): Resolution of the hash grid. (Assumed to be uniform in all dimensions.)
-    """
+    def __init__(
+        self,
+        radius: float = 0.25,
+        neighbors_in_radius: int | None = 10,
+    ):
+        """
+        Initialize the BQWarp layer.
 
-    def __init__(self, k: int, radius: float, grid_size: int = 32):
+        Args:
+            radius: Radius for ball query operation
+            neighbors_in_radius: Maximum number of neighbors to return within radius. If None, all neighbors will be returned.
+        """
         super().__init__()
-        wp.init()
-        self.k = k
+
         self.radius = radius
-        self.hash_grid = wp.HashGrid(grid_size, grid_size, grid_size)
+        self.neighbors_in_radius = neighbors_in_radius
 
     def forward(
-        self, points1: torch.Tensor, points2: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        self, x: torch.Tensor, p_grid: torch.Tensor, reverse_mapping: bool = True
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         """
-        Performs ball query operation to find neighboring points within a specified radius.
+        Performs ball query operation to find neighboring points and their features.
 
-        For each point in points1, finds up to k neighboring points from points2 that are
-        within the specified radius. Uses a hash grid for efficient spatial queries.
+        This method uses the Warp-accelerated ball query implementation to find points
+        within a specified radius. It can operate in two modes:
+        - Forward mapping: Find points from x that are near p_grid points (reverse_mapping=False)
+        - Reverse mapping: Find points from p_grid that are near x points (reverse_mapping=True)
 
         Args:
-            points1: Tensor of shape (batch_size, num_points1, 3) containing query points
-            points2: Tensor of shape (batch_size, num_points2, 3) containing points to search
+            x: Tensor of shape (batch_size, num_points, 3+features) containing point coordinates
+               and their features
+            p_grid: Tensor of shape (batch_size, grid_x, grid_y, grid_z, 3) containing grid point
+                   coordinates
+            reverse_mapping: Boolean flag to control the direction of the mapping:
+                            - True: Find p_grid points near x points
+                            - False: Find x points near p_grid points
 
         Returns:
             tuple containing:
                 - mapping: Tensor containing indices of neighboring points
-                - num_neighbors: Tensor containing the number of neighbors found for each query point
-                - outputs: Tensor containing features or coordinates of the neighboring points
+                - outputs: Tensor containing coordinates of the neighboring points
         """
-        return ball_query_layer(
-            points1,
-            points2,
-            self.k,
-            self.radius,
-            self.hash_grid,
-        )
-
-
-if __name__ == "__main__":
-    # Make function for saving point clouds
-    import pyvista as pv
-
-    from physicsnemo.utils.neighbors import radius_search
-
-    radius_search = torch.compile(radius_search)
-
-    torch.random.manual_seed(0)
-    torch.cuda.manual_seed(0)
-
-    def save_point_cloud(points, name):
-        cloud = pv.PolyData(points.detach().cpu().numpy())
-        cloud.save(name)
-
-    # Check forward pass
-    # Initialize tensors
-    n = 1  # number of point clouds
-    p1 = 1600_000  # 100000  # number of points in point cloud 1
-    d = 3  # dimension of the points
-    p2 = 1600_000  # 100000  # number of points in point cloud 2
-    points1 = torch.rand(n, p1, d, device="cuda", requires_grad=False)
-
-    points2 = torch.rand(n, p2, d, device="cuda", requires_grad=True)
-    k = 256  # maximum number of neighbors
-    radius = 0.1
-
-    # Make ball query layer
-    layer = BallQueryLayer(k, radius)
-
-    # Make ball query
-
-    for i in range(5):
-        mapping, num_neighbors, outputs = layer(
-            points1,
-            points2,
-        )
-        indices, points = radius_search(
-            points=points2[0],
-            queries=points1[0],
-            radius=radius,
-            max_points=k,
-            return_dists=False,
-            return_points=True,
-        )
-
-    # sorted_bq_indices = torch.sort(mapping[0][0]).values
-    # sorted_rs_indices = torch.sort(indices[0]).values
-
-    # print(sorted_bq_indices - sorted_rs_indices)
-    # print(sorted_bq_indices)
-    # print(sorted_rs_indices)
-
-    start_event = torch.cuda.Event(enable_timing=True)
-    end_event = torch.cuda.Event(enable_timing=True)
-    torch.cuda.synchronize()
-    for i in range(25):
-        if i == 5:
-            start_event.record()
-        mapping, num_neighbors, outputs = layer(
-            points1,
-            points2,
-        )
-    end_event.record()
-    torch.cuda.synchronize()
-    print(
-        f"Ball Query Time taken: {start_event.elapsed_time(end_event) / 20} ms per iteration"
-    )
-
-    start_event = torch.cuda.Event(enable_timing=True)
-    end_event = torch.cuda.Event(enable_timing=True)
-    for i in range(25):
-        if i == 5:
-            torch.cuda.synchronize()
-            start_event.record()
-        indices, points = radius_search(
-            points=points2[0],
-            queries=points1[0],
-            radius=radius,
-            max_points=k,
-            return_dists=False,
-            return_points=True,
-        )
-    end_event.record()
-    torch.cuda.synchronize()
-    print(
-        f"Radius Search Time taken: {start_event.elapsed_time(end_event) / 20} ms per iteration"
-    )
-
-    # Optimize the background points to move to the query points
-    optimizer = torch.optim.SGD([points2], 0.00)
-
-    # Test optimization
-    start_event = torch.cuda.Event(enable_timing=True)
-    end_event = torch.cuda.Event(enable_timing=True)
-    torch.cuda.synchronize()
-    target = points1.unsqueeze(2).clone().detach()
-    for i in range(25):
-        if i == 5:
-            start_event.record()
-        optimizer.zero_grad()
-        # mapping, num_neighbors, outputs = layer(points1, points2, lengths1, lengths2)
-        mapping, num_neighbors, outputs = layer(points1, points2)
-        # print(mapping[0][3])
-        # print(torch.where(mapping == 1))
-        loss = (points1.unsqueeze(2) - outputs).pow(2).sum()
-        loss.backward()
-        # print(f"ball query Points1 grad: {points1.grad}")
-        optimizer.step()
-        optimizer.zero_grad()
-
-    end_event.record()
-    torch.cuda.synchronize()
-    print(
-        f"Ball Query + backwards Time taken: {start_event.elapsed_time(end_event) / 20} ms per iteration"
-    )
-
-    # Optimize the background points to move to the query points
-    optimizer = torch.optim.SGD(
-        [points2], 0.00
-    )  # Setting the LR to 0.0 ensures the same gradients each time
-
-    # Test optimization
-    start_event = torch.cuda.Event(enable_timing=True)
-    end_event = torch.cuda.Event(enable_timing=True)
-    torch.cuda.synchronize()
-    start_event.record()
-    for i in range(25):
-        if i == 5:
-            start_event.record()
-        optimizer.zero_grad()
-        # mapping, num_neighbors, outputs = layer(points1, points2, lengths1, lengths2)
-        indexes, points = radius_search(
-            points=points2[0],
-            queries=points1[0],
-            radius=radius,
-            max_points=k,
-            return_dists=False,
-            return_points=True,
-        )
-        loss = (target - points).pow(2).sum()
-        loss.backward()
-        optimizer.step()
-        # print(f"radius search Points1 grad: {points1.grad}")
-        optimizer.zero_grad()
 
-    end_event.record()
-    torch.cuda.synchronize()
-    print(
-        f"radius search + backwards Time taken: {start_event.elapsed_time(end_event) / 20} ms per iteration"
-    )
+        if p_grid.shape[-1] != x.shape[-1] or x.shape[-1] != 3:
+            raise ValueError("The last dimension of p_grid and x must be 3")
+
+        if p_grid.ndim != 3:
+            if p_grid.ndim == 4:
+                p_grid = rearrange(p_grid, "b nx ny c -> b (nx ny) c")
+            elif p_grid.ndim == 5:
+                p_grid = rearrange(p_grid, "b nx ny nz c -> b (nx ny nz) c")
+            else:
+                raise ValueError("p_grid must be 3D, 4D, 5D only")
+
+        if reverse_mapping:
+            mapping, outputs = radius_search(
+                x[0],
+                p_grid[0],
+                self.radius,
+                self.neighbors_in_radius,
+                return_points=True,
+            )
+            mapping = mapping.unsqueeze(0)
+            outputs = outputs.unsqueeze(0)
+        else:
+            mapping, outputs = radius_search(
+                p_grid[0],
+                x[0],
+                self.radius,
+                self.neighbors_in_radius,
+                return_points=True,
+            )
+            mapping = mapping.unsqueeze(0)
+            outputs = outputs.unsqueeze(0)
+
+        return mapping, outputs
diff --git a/physicsnemo/models/layers/fourier_layers.py b/physicsnemo/models/layers/fourier_layers.py
index 35cb4d81a1..ba7db24a68 100644
--- a/physicsnemo/models/layers/fourier_layers.py
+++ b/physicsnemo/models/layers/fourier_layers.py
@@ -21,6 +21,86 @@
 import torch.nn as nn
 from torch import Tensor
 
+from .mlp_layers import Mlp
+
+
+def fourier_encode(coords: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor:
+    """Vectorized Fourier feature encoding
+
+    Args:
+        coords: Tensor containing coordinates, of shape (batch_size, D)
+        freqs: Tensor containing frequencies, of shape (F,) (num frequencies)
+
+    Returns:
+        Tensor containing Fourier features, of shape (batch_size, D * 2 * F)
+    """
+
+    D = coords.shape[-1]
+    F = freqs.shape[0]
+
+    freqs = freqs[None, None, :, None]  # reshape to [*, F, 1] for broadcasting
+
+    coords = coords.unsqueeze(-2)  # [*, 1, D]
+    scaled = (coords * freqs).reshape(*coords.shape[:-2], D * F)  # [*, D, F]
+    features = torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1)  # [*, D, 2F]
+
+    return features.reshape(*coords.shape[:-2], D * 2 * F)  # [*, D * 2F]
+
+
+class FourierMLP(nn.Module):
+    """
+    This is an MLP that will, optionally, fourier encode the input features.
+
+    The encoded features are concatenated to the original inputs, and then
+    processed with an MLP.
+
+    Args:
+        input_features: The number of input features to the MLP.
+        base_layer: The number of neurons in the hidden layer of the MLP.
+        fourier_features: Whether to fourier encode the input features.
+        num_modes: The number of modes to use for the fourier encoding.
+        activation: The activation function to use in the MLP.
+
+    """
+
+    def __init__(
+        self,
+        input_features: int,
+        base_layer: int,
+        fourier_features: bool,
+        num_modes: int,
+        activation: nn.Module | str,
+    ):
+        super().__init__()
+        self.fourier_features = fourier_features
+
+        # self.num_modes = model_parameters.num_modes
+
+        if self.fourier_features:
+            input_features_calculated = input_features + input_features * num_modes * 2
+            self.register_buffer(
+                "freqs", torch.exp(torch.linspace(0, math.pi, num_modes))
+            )
+        else:
+            input_features_calculated = input_features
+
+        self.mlp = Mlp(
+            in_features=input_features_calculated,
+            hidden_features=[
+                base_layer,
+                base_layer,
+            ],
+            out_features=base_layer,
+            act_layer=activation,
+            drop=0.0,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.fourier_features:
+            x = torch.cat((x, fourier_encode(x, self.freqs)), dim=-1)
+
+        return self.mlp(x)
+
 
 class FourierLayer(nn.Module):
     """Fourier layer used in the Fourier feature network"""
diff --git a/physicsnemo/models/layers/mlp_layers.py b/physicsnemo/models/layers/mlp_layers.py
index 8e9a18858b..ec832ad6b1 100644
--- a/physicsnemo/models/layers/mlp_layers.py
+++ b/physicsnemo/models/layers/mlp_layers.py
@@ -17,28 +17,83 @@
 import torch
 from torch import nn
 
+from .activations import get_activation
+
+# class Mlp(nn.Module):
+#     def __init__(
+#         self,
+#         in_features,
+#         hidden_features=None,
+#         out_features=None,
+#         act_layer=nn.GELU,
+#         drop=0.0,
+#     ):
+#         super().__init__()
+#         out_features = out_features or in_features
+#         hidden_features = hidden_features or in_features
+#         self.fc1 = nn.Linear(in_features, hidden_features)
+#         self.act = act_layer()
+#         self.fc2 = nn.Linear(hidden_features, out_features)
+#         self.drop = nn.Dropout(drop)
+
+#     def forward(self, x: torch.Tensor):
+#         x = self.fc1(x)
+#         x = self.act(x)
+#         x = self.drop(x)
+#         x = self.fc2(x)
+#         x = self.drop(x)
+#         return x
+
 
 class Mlp(nn.Module):
     def __init__(
         self,
-        in_features,
-        hidden_features=None,
-        out_features=None,
-        act_layer=nn.GELU,
-        drop=0.0,
+        in_features: int,
+        hidden_features: int | list[int] | None = None,
+        out_features: int | None = None,
+        act_layer: nn.Module | str = nn.GELU,
+        drop: float = 0.0,
     ):
         super().__init__()
         out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
+        if isinstance(hidden_features, int):
+            hidden_features = [
+                hidden_features,
+            ]
+        elif hidden_features is None:
+            hidden_features = [
+                in_features,
+            ]
+
+        # If the activation is a string, get it.
+        # It's it's a type, instantiate it.
+        # If it's a module, leave it be.
+        if isinstance(act_layer, str):
+            act_layer = get_activation(act_layer)
+        elif isinstance(act_layer, nn.Module):
+            pass
+        else:
+            act_layer = act_layer()
+            if not isinstance(act_layer, nn.Module):
+                raise ValueError(
+                    f"Activation layer must be a string or a module, got {type(act_layer)}"
+                )
+
+        layers = []
+        input_dim = in_features
+        for hidden_dim in hidden_features:
+            layers.append(nn.Linear(input_dim, hidden_dim))
+            layers.append(act_layer)
+            if drop != 0:
+                layers.append(nn.Dropout(drop))
+            input_dim = hidden_dim
+
+        # Add the last layers:
+        layers.append(nn.Linear(input_dim, out_features))
+        if drop != 0:
+            layers.append(nn.Dropout(drop))
+
+        self.layers = nn.Sequential(*layers)
 
     def forward(self, x: torch.Tensor):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop(x)
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
+        return self.layers(x)
diff --git a/test/models/data/mlp_output.pth b/test/models/data/mlp_output.pth
new file mode 100644
index 0000000000000000000000000000000000000000..cc2f0ea9de0d96a3e72c4317c9f36b2d1c06cd09
GIT binary patch
literal 1598
zcmbVM-)qxQ6uxbjwq}ZK>w^v^?ybYwCS97fXeR{8OCHQBO7SIRX;y=2)0^B*6huKb
z@WsDCMSSs35LEUDC<s#!ANnA;7oXgtkKVIx=}p$+^uQrM?mg%G?z!JNx|W(iD4Rt`
zYzCc1ZMSQ9J<{!wV$&pMp?k+wl}U7KXpfQ)Fw&4kJm0!qFnYvs1K#lMwVvZP4Pti!
z&o?$oJWk~EjSK||=uWd`<K!TOS6$B}*9Sj&oN6TK9sy4@QuJwcdzjNWg43%S(h?fC
zg)<tbCAM((iDh}NYg@$eIst#pZ`!`yyy{ql$0ru*m?wC$j!y;!UCU$_zU+N^Nz;Mr
zW6wK_&ame#tb2ajB*upA2NXk9a_q6g0*C<c=2!nQ@CF|bFo&cFz(FMp({);%w%sAB
zB8<cc!A88u5)im@7ynFBaPaX2a_BNkVC^2UJt&;2RQ?(Fkc#UN;@ZGrv-aa`n&N|x
z=P-pXu^d2EML|`T|Chkd$FD2rUq4)N@*ir?-aXy^_Ui36uzAk)Pvs~R@bPS>(IU$x
zyzBh2=`8Vpw)3KO<L~vKM@1H`uz&c#_8qE#`z9#aaGI)e;n?8x62$UysUlXZWl^k@
zC0SY$O0pnVswF|HN@7J4s*)^}<ub5*|Gl=Er}Y5WwFD3jJD=^58CZzeh&d2X!1lmw
zBzTX0F$rRg70<&;c`lM?m9`g~=Scs=Q?ZH!9SrSrG*#FM@eHhrriS74?Ix`rLuR1k
zcsMJI=}7o)H2gq#@p$&a<|FZSIvyGL(Nfa2bb9e1>|E+%m;lBw*x_>*<`%WzFmtKV
Xxc#L*H~5BSUE`p!X-o#e{=wcqwx@0G

literal 0
HcmV?d00001

diff --git a/test/models/domino/test_domino_encodings.py b/test/models/domino/test_domino_encodings.py
index a27e2dd0a9..6570e0a686 100644
--- a/test/models/domino/test_domino_encodings.py
+++ b/test/models/domino/test_domino_encodings.py
@@ -25,19 +25,18 @@
 @pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("fourier_features", [True, False])
 @pytest.mark.parametrize("num_modes", [3, 5, 10])
-def test_encoding_mlp(device, fourier_features, num_modes):
-    """Test EncodingMLP with various configurations"""
-    from physicsnemo.models.domino.encodings import EncodingMLP
-    from physicsnemo.models.domino.model import get_activation
+def test_fourier_mlp(device, fourier_features, num_modes):
+    """Test FourierMLP with various configurations"""
+    from physicsnemo.models.layers import FourierMLP
 
     torch.manual_seed(0)
 
-    model = EncodingMLP(
+    model = FourierMLP(
         input_features=3,
         base_layer=64,
         fourier_features=fourier_features,
         num_modes=num_modes,
-        activation=get_activation("relu"),
+        activation="relu",
     ).to(device)
 
     x = torch.randn(2, 100, 3).to(device)
@@ -49,14 +48,14 @@ def test_encoding_mlp(device, fourier_features, num_modes):
 @pytest.mark.parametrize("device", ["cuda:0"])
 def test_fourier_encode_vectorized(device):
     """Test fourier encoding function"""
-    from physicsnemo.models.domino.encodings import fourier_encode_vectorized
+    from physicsnemo.models.layers import fourier_encode
 
     torch.manual_seed(0)
 
     coords = torch.randn(4, 20, 3).to(device)
     freqs = torch.exp(torch.linspace(0, math.pi, 5)).to(device)
 
-    output = fourier_encode_vectorized(coords, freqs)
+    output = fourier_encode(coords, freqs)
 
     # Output should be [batch, points, D * 2 * F] = [4, 20, 3 * 2 * 5] = [4, 20, 30]
     validate_output_shape_and_values(output, (4, 20, 30))
@@ -118,6 +117,7 @@ def test_multi_geometry_encoding(device, geo_encoding_type):
         neighbors_in_radius=neighbors_in_radius,
         geo_encoding_type=geo_encoding_type,
         base_layer=64,
+        n_upstream_radii=2,
         activation=get_activation("relu"),
         grid_resolution=GRID_RESOLUTION,
     ).to(device)
@@ -133,14 +133,8 @@ def test_multi_geometry_encoding(device, geo_encoding_type):
     volume_mesh_centers = torch.randn(BATCH_SIZE, N_MESH_POINTS, 3).to(device)
     p_grid = torch.randn(BATCH_SIZE, *GRID_RESOLUTION, 3).to(device)
 
-    print(f"encoding_g.shape: {encoding_g.shape}")
-    print(f"volume_mesh_centers.shape: {volume_mesh_centers.shape}")
-    print(f"p_grid.shape: {p_grid.shape}")
-
     output = model(encoding_g, volume_mesh_centers, p_grid)
 
-    print(f"output.shape: {output.shape}")
-
     expected_output_dim = sum(neighbors_in_radius)
 
     validate_output_shape_and_values(
diff --git a/test/models/domino/test_domino_geometry_rep.py b/test/models/domino/test_domino_geometry_rep.py
index 813b246bae..628e760aa5 100644
--- a/test/models/domino/test_domino_geometry_rep.py
+++ b/test/models/domino/test_domino_geometry_rep.py
@@ -24,7 +24,8 @@
 
 @pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("act", ["relu", "gelu"])
-def test_geo_conv_out(device, act):
+@pytest.mark.parametrize("fourier_features", [True, False])
+def test_geo_conv_out(device, act, fourier_features):
     """Test GeoConvOut layer"""
     from physicsnemo.models.domino.geometry_rep import GeoConvOut
 
@@ -39,6 +40,8 @@ class TestParams:
         activation: str = act
 
     params = TestParams()
+    params.fourier_features = fourier_features
+
     grid_resolution = [32, 32, 32]
 
     layer = GeoConvOut(
diff --git a/test/models/domino/test_domino_mlps.py b/test/models/domino/test_domino_mlps.py
index 86d2d9a208..d181f24401 100644
--- a/test/models/domino/test_domino_mlps.py
+++ b/test/models/domino/test_domino_mlps.py
@@ -19,33 +19,33 @@
 
 from .utils import validate_output_shape_and_values
 
+# @pytest.mark.parametrize("device", ["cuda:0"])
+# @pytest.mark.parametrize("activation", ["relu", "gelu"])
+# @pytest.mark.parametrize("n_layers", [1, 2, 3, 5])
+# def test_mlp(device, activation, n_layers):
+#     """Test basic MLP functionality"""
+#     from physicsnemo.models.domino.mlps import MLP
+#     from physicsnemo.models.domino.model import get_activation
 
-@pytest.mark.parametrize("device", ["cuda:0"])
-@pytest.mark.parametrize("activation", ["relu", "gelu"])
-@pytest.mark.parametrize("n_layers", [1, 2, 3, 5])
-def test_mlp(device, activation, n_layers):
-    """Test basic MLP functionality"""
-    from physicsnemo.models.domino.mlps import MLP
-    from physicsnemo.models.domino.model import get_activation
+#     torch.manual_seed(0)
 
-    torch.manual_seed(0)
+#     mlp = MLP(
+#         input_features=10,
+#         output_features=5,
+#         base_layer=32,
+#         activation=get_activation(activation),
+#         n_layers=n_layers,
+#     ).to(device)
 
-    mlp = MLP(
-        input_features=10,
-        output_features=5,
-        base_layer=32,
-        activation=get_activation(activation),
-        n_layers=n_layers,
-    ).to(device)
+#     x = torch.randn(4, 50, 10).to(device)
+#     output = mlp(x)
 
-    x = torch.randn(4, 50, 10).to(device)
-    output = mlp(x)
-
-    validate_output_shape_and_values(output, (4, 50, 5))
+#     validate_output_shape_and_values(output, (4, 50, 5))
 
 
 @pytest.mark.parametrize("device", ["cuda:0"])
-def test_aggregation_model(device):
+@pytest.mark.parametrize("activation", ["relu", "gelu"])
+def test_aggregation_model(device, activation):
     """Test AggregationModel"""
     from physicsnemo.models.domino.mlps import AggregationModel
     from physicsnemo.models.domino.model import get_activation
@@ -56,7 +56,7 @@ def test_aggregation_model(device):
         input_features=100,
         output_features=1,
         base_layer=64,
-        activation=get_activation("relu"),
+        activation=get_activation(activation),
     ).to(device)
 
     x = torch.randn(2, 30, 100).to(device)
@@ -66,7 +66,8 @@ def test_aggregation_model(device):
 
 
 @pytest.mark.parametrize("device", ["cuda:0"])
-def test_local_point_conv(device):
+@pytest.mark.parametrize("activation", ["relu", "gelu"])
+def test_local_point_conv(device, activation):
     """Test LocalPointConv"""
     from physicsnemo.models.domino.mlps import LocalPointConv
     from physicsnemo.models.domino.model import get_activation
@@ -77,7 +78,7 @@ def test_local_point_conv(device):
         input_features=50,
         base_layer=128,
         output_features=32,
-        activation=get_activation("relu"),
+        activation=get_activation(activation),
     ).to(device)
 
     x = torch.randn(2, 100, 50).to(device)
diff --git a/test/models/domino/test_domino_solutions.py b/test/models/domino/test_domino_solutions.py
index be4797eafc..36ddd0d3db 100644
--- a/test/models/domino/test_domino_solutions.py
+++ b/test/models/domino/test_domino_solutions.py
@@ -29,10 +29,9 @@ def test_solution_calculator_volume(
     device, num_variables, num_sample_points, encode_parameters
 ):
     """Test SolutionCalculatorVolume with various configurations"""
-    from physicsnemo.models.domino.encodings import EncodingMLP
     from physicsnemo.models.domino.mlps import AggregationModel
-    from physicsnemo.models.domino.model import get_activation
     from physicsnemo.models.domino.solutions import SolutionCalculatorVolume
+    from physicsnemo.models.layers import FourierMLP, get_activation
 
     torch.manual_seed(0)
 
@@ -40,7 +39,7 @@ def test_solution_calculator_volume(
 
     # Create parameter model if needed
     parameter_model = (
-        EncodingMLP(
+        FourierMLP(
             input_features=2,
             base_layer=32,
             fourier_features=True,
@@ -67,7 +66,7 @@ def test_solution_calculator_volume(
     # Create basis functions
     nn_basis = nn.ModuleList(
         [
-            EncodingMLP(
+            FourierMLP(
                 input_features=3,
                 base_layer=32,
                 fourier_features=False,
@@ -115,10 +114,9 @@ def test_solution_calculator_surface(
     device, num_variables, use_surface_normals, use_surface_area
 ):
     """Test SolutionCalculatorSurface with various configurations"""
-    from physicsnemo.models.domino.encodings import EncodingMLP
     from physicsnemo.models.domino.mlps import AggregationModel
-    from physicsnemo.models.domino.model import get_activation
     from physicsnemo.models.domino.solutions import SolutionCalculatorSurface
+    from physicsnemo.models.layers import FourierMLP, get_activation
 
     torch.manual_seed(0)
 
@@ -131,8 +129,6 @@ def test_solution_calculator_surface(
     if use_surface_area:
         input_features += 1
 
-    print(f"Input features: {input_features}")
-
     # Create aggregation models
     aggregation_model = nn.ModuleList(
         [
@@ -149,7 +145,7 @@ def test_solution_calculator_surface(
     # Create basis functions
     nn_basis = nn.ModuleList(
         [
-            EncodingMLP(
+            FourierMLP(
                 input_features=input_features,
                 base_layer=32,
                 fourier_features=False,
@@ -163,7 +159,6 @@ def test_solution_calculator_surface(
     model = SolutionCalculatorSurface(
         num_variables=num_variables,
         num_sample_points=3,
-        noise_intensity=50.0,
         encode_parameters=False,
         use_surface_normals=use_surface_normals,
         use_surface_area=use_surface_area,
diff --git a/test/models/test_mlp_layers.py b/test/models/test_mlp_layers.py
new file mode 100644
index 0000000000..19db339ba8
--- /dev/null
+++ b/test/models/test_mlp_layers.py
@@ -0,0 +1,75 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+
+from physicsnemo.models.layers import Mlp
+
+from .common import (
+    validate_forward_accuracy,
+)
+
+
+@pytest.mark.parametrize("device", ["cuda:0", "cpu"])
+def test_mlp_forward_accuracy(device):
+    torch.manual_seed(7)
+    target_device = torch.device(device)
+
+    model = Mlp(in_features=10, hidden_features=20, out_features=5).to(target_device)
+    input_tensor = torch.randn(1, 10).to(
+        target_device
+    )  # Assuming a batch size of 1 for simplicity
+    model(input_tensor)
+
+    file_name = "mlp_output.pth"
+
+    # Tack this on for the test, since model is not a physicsnemo Module:
+    model.device = target_device
+
+    assert validate_forward_accuracy(
+        model,
+        (input_tensor,),
+        file_name=file_name,
+        atol=1e-3,
+    )
+
+
+def test_mlp_activation_and_dropout():
+    model = Mlp(in_features=10, hidden_features=20, out_features=5, drop=0.5)
+    input_tensor = torch.randn(2, 10)  # Assuming a batch size of 1 for simplicity
+
+    output_tensor = model(input_tensor)
+
+    assert output_tensor.shape == torch.Size([2, 5])
+
+
+def test_mlp_different_activation():
+    model = Mlp(
+        in_features=10, hidden_features=20, out_features=7, act_layer=torch.nn.ReLU
+    )
+    input_tensor = torch.randn(3, 10)  # Assuming a batch size of 1 for simplicity
+
+    output_tensor = model(input_tensor)
+    assert output_tensor.shape == torch.Size([3, 7])
+
+
+def test_multiple_hidden_layers():
+    model = Mlp(in_features=10, hidden_features=[20, 30], out_features=5)
+    input_tensor = torch.randn(4, 10)  # Assuming a batch size of 1 for simplicity
+
+    output_tensor = model(input_tensor)
+    assert output_tensor.shape == torch.Size([4, 5])

From a87f666e06d71143ed4060473e8fe772f4e8b7df Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 29 Sep 2025 20:38:06 +0000
Subject: [PATCH 55/98] Align new datapipe with Rishi's

---
 physicsnemo/datapipes/cae/domino_datapipe2.py | 127 ++++++++----------
 1 file changed, 59 insertions(+), 68 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
index 0cf516f438..d953e1c9df 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe2.py
@@ -17,7 +17,7 @@
 """
 This code provides the datapipe for reading the processed npy files,
 generating multi-res grids, calculating signed distance fields,
-positional encodings, sampling random points in the volume and on surface,
+sampling random points in the volume and on surface,
 normalizing fields and returning the output tensors as a dictionary.
 
 This datapipe also non-dimensionalizes the fields, so the order in which the variables should
@@ -34,6 +34,7 @@
 import torch
 import torch.cuda.nvtx as nvtx
 from omegaconf import DictConfig
+from torch.distributed.tensor.placement_types import Replicate
 from torch.utils.data import Dataset
 
 from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
@@ -41,9 +42,9 @@
     compute_mean_std_min_max,
 )
 from physicsnemo.distributed import DistributedManager
+from physicsnemo.distributed.shard_tensor import ShardTensor, scatter_tensor
 from physicsnemo.utils.domino.utils import (
     calculate_center_of_mass,
-    calculate_normal_positional_encoding,
     create_grid,
     get_filenames,
     normalize,
@@ -77,8 +78,6 @@ class DoMINODataConfig:
         surface_variables: (Surface specific) Names of surface variables.
         surface_points_sample: (Surface specific) Number of surface points to sample per batch.
         num_surface_neighbors: (Surface specific) Number of surface neighbors to consider for nearest neighbors approach.
-        resample_surfaces: (Surface specific) Whether to resample the surface before kdtree/knn. Not available if caching.
-        resampling_points: (Surface specific) Number of points to resample the surface to.
         surface_sampling_algorithm: (Surface specific) Algorithm to use for surface sampling ("area_weighted" or "random").
         surface_factors: (Surface specific) Non-dimensionalization factors for surface variables.
             If set, and scaling_type is:
@@ -111,10 +110,6 @@ class DoMINODataConfig:
             - volume.points_sample
         geom_points_sample: Number of STL points sampled per batch.
             Independent of volume.points_sample and surface.points_sample.
-        positional_encoding: Whether to use positional encoding. Affects the calculation of:
-            - pos_volume_closest
-            - pos_volume_center_of_mass
-            - pos_surface_centter_of_mass
         scaling_type: Scaling type for volume variables.
             If used, will rescale the volume_fields and surface fields outputs.
             Requires volume.factor and surface.factor to be set.
@@ -136,8 +131,6 @@ class DoMINODataConfig:
     surface_variables: Optional[Sequence] = ("pMean", "wallShearStress")
     surface_points_sample: int = 1024
     num_surface_neighbors: int = 11
-    resample_surfaces: bool = False
-    resampling_points: int = 1_000_000
     surface_sampling_algorithm: str = Literal["area_weighted", "random"]
     surface_factors: Optional[Sequence] = None
     bounding_box_dims_surf: Optional[Union[BoundingBox, Sequence]] = None
@@ -153,7 +146,6 @@ class DoMINODataConfig:
     sample_in_bbox: bool = False
     sampling: bool = False
     geom_points_sample: int = 300000
-    positional_encoding: bool = False
     scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None
     compute_scaling_factors: bool = False
     caching: bool = False
@@ -180,8 +172,6 @@ def __post_init__(self):
                 raise ValueError("Sampling should be False for caching")
             if self.compute_scaling_factors:
                 raise ValueError("Compute scaling factors should be False for caching")
-            if self.resample_surfaces:
-                raise ValueError("Resample surface should be False for caching")
 
         if self.phase not in [
             "train",
@@ -202,7 +192,8 @@ def __post_init__(self):
 
 
 ##### TODO
-# - check the bounding box protocol works
+# - The SDF normalization here is based on using a normalized mesh and
+#   a normalized coordinate.  The alternate method is to normalize to the min/max of the grid.
 
 
 class DoMINODataPipe(Dataset):
@@ -373,6 +364,7 @@ def downsample_geometry(
 
         if self.config.sampling:
             geometry_points = self.config.geom_points_sample
+
             geometry_coordinates_sampled, idx_geometry = shuffle_array(
                 stl_vertices, geometry_points
             )
@@ -416,23 +408,6 @@ def process_surface(
         if surface_fields is not None:
             surface_fields = surface_fields[idx]
 
-        ########################################################################
-        # Surface resampling ...
-        ########################################################################
-        if self.config.resample_surfaces:
-            if self.config.resampling_points > surface_coordinates.shape[0]:
-                resampling_points = surface_coordinates.shape[0]
-            else:
-                resampling_points = self.config.resampling_points
-
-            surface_coordinates, idx_s = shuffle_array(
-                surface_coordinates, resampling_points
-            )
-            surface_normals = surface_normals[idx_s]
-            surface_sizes = surface_sizes[idx_s]
-            if surface_fields is not None:
-                surface_fields = surface_fields[idx_s]
-
         ########################################################################
         # Reject surface points outside of the Bounding Box
         # NOTE - this is using the VOLUME bounding box!
@@ -450,19 +425,6 @@ def process_surface(
             if surface_fields is not None:
                 surface_fields = surface_fields[ids_in_bbox]
 
-        # Compute the positional encoding before sampling
-        if self.config.positional_encoding:
-            dx, dy, dz = (
-                (s_max[0] - s_min[0]) / nx,
-                (s_max[1] - s_min[1]) / ny,
-                (s_max[2] - s_min[2]) / nz,
-            )
-            pos_normals_com_surface = calculate_normal_positional_encoding(
-                surface_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz]
-            )
-        else:
-            pos_normals_com_surface = surface_coordinates - center_of_mass
-
         ########################################################################
         # Perform Down sampling of the surface fields.
         # Note that we snapshot the full surface coordinates for
@@ -496,7 +458,7 @@ def process_surface(
             # Select out the sampled points for non-neighbor arrays:
             if surface_fields is not None:
                 surface_fields = surface_fields[idx_surface]
-            pos_normals_com_surface = pos_normals_com_surface[idx_surface]
+
             # Subsample the normals and sizes:
             surface_normals = surface_normals[idx_surface]
             surface_sizes = surface_sizes[idx_surface]
@@ -526,6 +488,10 @@ def process_surface(
             surf_grid = normalize(surf_grid, s_max, s_min)
             surface_coordinates = normalize(surface_coordinates, s_max, s_min)
             surface_neighbors = normalize(surface_neighbors, s_max, s_min)
+            # Make sure to normalize the center of mass for the normals_com_surface calc
+            center_of_mass = normalize(center_of_mass, s_max, s_min)
+
+        pos_normals_com_surface = surface_coordinates - center_of_mass
 
         ########################################################################
         # Apply scaling to the targets, if desired:
@@ -691,26 +657,13 @@ def calculate_volume_encoding(
         sdf_node_closest_point: torch.Tensor,
         center_of_mass: torch.Tensor,
     ):
-        nx, ny, nz = self.config.grid_resolution
-
-        dx, dy, dz = (
-            (c_max[0] - c_min[0]) / nx,
-            (c_max[1] - c_min[1]) / ny,
-            (c_max[2] - c_min[2]) / nz,
-        )
+        if self.config.normalize_coordinates:
+            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
+            sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
+            center_of_mass = normalize(center_of_mass, c_max, c_min)
 
-        if self.config.positional_encoding:
-            pos_normals_closest_vol = calculate_normal_positional_encoding(
-                volume_coordinates,
-                sdf_node_closest_point,
-                cell_dimensions=[dx, dy, dz],
-            )
-            pos_normals_com_vol = calculate_normal_positional_encoding(
-                volume_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz]
-            )
-        else:
-            pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
-            pos_normals_com_vol = volume_coordinates - center_of_mass
+        pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
+        pos_normals_com_vol = volume_coordinates - center_of_mass
 
         return pos_normals_closest_vol, pos_normals_com_vol
 
@@ -728,9 +681,50 @@ def process_data(self, data_dict):
 
         # This function gets information about the surface scale,
         # and decides what the surface grid will be:
+
+        stl_coordinates = data_dict["stl_coordinates"]
+
         s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids(
-            data_dict["stl_coordinates"]
+            stl_coordinates
         )
+
+        if isinstance(stl_coordinates, ShardTensor):
+            mesh = stl_coordinates._spec.mesh
+            # Then, replicate the bounding box along the mesh if present.
+            s_max = scatter_tensor(
+                s_max,
+                0,
+                mesh=mesh,
+                placements=[
+                    Replicate(),
+                ],
+                global_shape=s_max.shape,
+                dtype=s_max.dtype,
+                requires_grad=False,
+            )
+            s_min = scatter_tensor(
+                s_min,
+                0,
+                mesh=mesh,
+                placements=[
+                    Replicate(),
+                ],
+                global_shape=s_min.shape,
+                dtype=s_min.dtype,
+                requires_grad=False,
+            )
+            surf_grid = scatter_tensor(
+                surf_grid,
+                0,
+                mesh=mesh,
+                placements=[
+                    Replicate(),
+                ],
+                global_shape=surf_grid.shape,
+                dtype=surf_grid.dtype,
+                requires_grad=False,
+            )
+
         return_dict["surf_grid"] = surf_grid
 
         # We always need to calculate the SDF on the surface grid:
@@ -1203,7 +1197,6 @@ def create_domino_dataset(
             volume_points_sample=cfg.model.volume_points_sample,
             surface_points_sample=cfg.model.surface_points_sample,
             geom_points_sample=cfg.model.geom_points_sample,
-            positional_encoding=cfg.model.positional_encoding,
             volume_factors=vol_factors,
             surface_factors=surf_factors,
             scaling_type=cfg.model.normalization,
@@ -1211,8 +1204,6 @@ def create_domino_dataset(
             bounding_box_dims=cfg.data.bounding_box,
             bounding_box_dims_surf=cfg.data.bounding_box_surface,
             num_surface_neighbors=cfg.model.num_neighbors_surface,
-            resample_surfaces=cfg.model.resampling_surface_mesh.resample,
-            resampling_points=cfg.model.resampling_surface_mesh.points,
             surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
             **overrides,
         )

From 1c191b5e4923429b070acc6f32396301bb520466 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 29 Sep 2025 20:38:40 +0000
Subject: [PATCH 56/98] Use ones_like to create a tensor

---
 physicsnemo/utils/domino/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index 95a7011976..394b86a420 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -442,7 +442,7 @@ def shuffle_array(
 
     # If there are no weights, use uniform weights:
     if weights is None:
-        weights = torch.ones(N_input_points, device=points.device)
+        weights = torch.ones_like(points, device=points.device)
 
     # Using torch multinomial for this.
     # Multinomial can't work with more than 2^24 input points.

From 4310e524781e1e554c4884cbee6e924eda29664a Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 29 Sep 2025 20:43:15 +0000
Subject: [PATCH 57/98] Move old script to new location

---
 .../src/{ => deprecated}/inference_on_stl.py  | 25 ++++++++++++++++---
 1 file changed, 21 insertions(+), 4 deletions(-)
 rename examples/cfd/external_aerodynamics/domino/src/{ => deprecated}/inference_on_stl.py (98%)

diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py
similarity index 98%
rename from examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
rename to examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py
index e4ec80f2e0..b48e2b50f2 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py
@@ -372,8 +372,18 @@ def process_surface_mesh(self, bounding_box=None, bounding_box_surface=None):
         surf_sdf_grid = torch.reshape(surf_sdf_grid, (nx, ny, nz))
 
         if self.normalize_coordinates:
-            sdf_grid = 2.0 * (sdf_grid - torch.amax(grid)) / (torch.amax(grid) - torch.amin(grid)) - 1.0
-            surf_sdf_grid = 2.0 * (surf_sdf_grid - torch.amax(s_grid)) / (torch.amax(s_grid) - torch.amin(s_grid)) - 1.0
+            sdf_grid = (
+                2.0
+                * (sdf_grid - torch.amax(grid))
+                / (torch.amax(grid) - torch.amin(grid))
+                - 1.0
+            )
+            surf_sdf_grid = (
+                2.0
+                * (surf_sdf_grid - torch.amax(s_grid))
+                / (torch.amax(s_grid) - torch.amin(s_grid))
+                - 1.0
+            )
             grid = 2.0 * (grid - c_min) / (c_max - c_min) - 1.0
             s_grid = 2.0 * (s_grid - surf_min) / (surf_max - surf_min) - 1.0
 
@@ -645,8 +655,15 @@ def sample_points_in_volume(
         sdf_nodes = torch.unsqueeze(sdf_nodes, -1)
 
         if self.normalize_coordinates:
-            volume_coordinates = 2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0
-            sdf_nodes = 2.0 * (sdf_nodes - torch.amax(c_max)) / (torch.amax(c_max) - torch.amin(c_min)) - 1.0
+            volume_coordinates = (
+                2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0
+            )
+            sdf_nodes = (
+                2.0
+                * (sdf_nodes - torch.amax(c_max))
+                / (torch.amax(c_max) - torch.amin(c_min))
+                - 1.0
+            )
             sdf_node_closest_point = (
                 2.0 * (sdf_node_closest_point - c_min) / (c_max - c_min) - 1.0
             )

From cd64439fce5c22dcedcbaba913d0f2d9f3413e3b Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 30 Sep 2025 03:32:39 +0000
Subject: [PATCH 58/98] Update some tests to match the new datapipe structure

---
 .../datapipes/cae/drivaer_ml_dataset.py       |   6 +-
 physicsnemo/utils/domino/utils.py             |   2 +-
 test/datapipes/test_domino_datapipe.py        | 203 +++++++++++-------
 .../shard_tensor/ops/test_radius_search.py    |   3 +-
 4 files changed, 132 insertions(+), 82 deletions(-)

diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index 13009fc968..1e6ae62f81 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -669,7 +669,11 @@ def __init__(
             raise NotADirectoryError(f"Data directory {data_dir} is not a directory")
 
         self._keys_to_read = keys_to_read
-        self._keys_to_read_if_available = keys_to_read_if_available
+
+        # Make sure the optional keys are on the right device:
+        self._keys_to_read_if_available = {
+            k: v.to(output_device) for k, v in keys_to_read_if_available.items()
+        }
 
         self.file_reader, self._filenames = self._infer_file_type_and_filenames(
             data_dir
diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index 394b86a420..fc3af36334 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -442,7 +442,7 @@ def shuffle_array(
 
     # If there are no weights, use uniform weights:
     if weights is None:
-        weights = torch.ones_like(points, device=points.device)
+        weights = torch.ones(points.shape[0], device=points.device)
 
     # Using torch multinomial for this.
     # Multinomial can't work with more than 2^24 input points.
diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py
index 8df540d9ef..bb463792ff 100644
--- a/test/datapipes/test_domino_datapipe.py
+++ b/test/datapipes/test_domino_datapipe.py
@@ -18,7 +18,7 @@
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Literal
+from typing import List, Literal, Optional, Sequence
 
 import numpy as np
 import pytest
@@ -32,6 +32,7 @@
     DoMINODataConfig,
     DoMINODataPipe,
 )
+from physicsnemo.datapipes.cae.drivaer_ml_dataset import DrivaerMLDataset
 
 Tensor = torch.Tensor
 
@@ -242,7 +243,22 @@ def bounding_boxes():
     }
 
 
-def create_basic_dataset(data_dir, model_type, **kwargs):
+def create_basic_dataset(
+    data_dir,
+    model_type,
+    gpu_preprocessing: bool = False,
+    gpu_output: bool = False,
+    normalize_coordinates: bool = False,
+    sample_in_bbox: bool = False,
+    sampling: bool = False,
+    volume_points_sample: int = 1234,
+    surface_points_sample: int = 1234,
+    surface_sampling_algorithm: str = "random",
+    caching: bool = False,
+    scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None,
+    volume_factors: Optional[Sequence] = None,
+    surface_factors: Optional[Sequence] = None,
+):
     """Helper function to create a basic DoMINODataPipe with default settings."""
 
     # assert model_type in ["volume", "surface", "combined"]
@@ -251,43 +267,80 @@ def create_basic_dataset(data_dir, model_type, **kwargs):
 
     bounding_box = bounding_boxes()
 
+    keys_to_read = [
+        "stl_coordinates",
+        "stl_faces",
+        "stl_centers",
+        "stl_areas",
+    ]
+
+    if model_type == "volume" or model_type == "combined":
+        keys_to_read += [
+            "volume_mesh_centers",
+            "volume_fields",
+        ]
+
+    if model_type == "surface" or model_type == "combined":
+        keys_to_read += [
+            "surface_mesh_centers",
+            "surface_areas",
+            "surface_normals",
+            "surface_fields",
+        ]
+
+    keys_to_read_if_available = {
+        "global_params_values": torch.tensor([1.225, 10.0]),
+        "global_params_reference": torch.tensor([1.225, 10.0]),
+    }
+
+    dataset = DrivaerMLDataset(
+        data_dir=input_path,
+        keys_to_read=keys_to_read,
+        keys_to_read_if_available=keys_to_read_if_available,
+        output_device=torch.device("cuda")
+        if gpu_preprocessing
+        else torch.device("cpu"),
+        preload_depth=0,
+        pin_memory=False,
+        device_mesh=None,
+        placements=None,
+    )
+
     default_kwargs = {
         "phase": "test",
         "grid_resolution": [64, 64, 64],
-        "volume_points_sample": 1234,
-        "surface_points_sample": 1234,
-        "geom_points_sample": 2345,
+        "volume_points_sample": volume_points_sample,
+        "surface_points_sample": surface_points_sample,
+        "geom_points_sample": 500,
         "num_surface_neighbors": 5,
         "bounding_box_dims": bounding_box["volume"],
         "bounding_box_dims_surf": bounding_box["surface"],
-        "normalize_coordinates": True,
-        "sampling": False,
-        "sample_in_bbox": False,
-        "positional_encoding": False,
-        "scaling_type": None,
-        "volume_factors": None,
-        "surface_factors": None,
-        "caching": False,
-        "compute_scaling_factors": False,
-        "gpu_preprocessing": True,
-        "gpu_output": True,
+        "normalize_coordinates": normalize_coordinates,
+        "sampling": sampling,
+        "sample_in_bbox": sample_in_bbox,
+        "scaling_type": scaling_type,
+        "volume_factors": volume_factors,
+        "surface_factors": surface_factors,
+        "caching": caching,
+        "gpu_preprocessing": gpu_preprocessing,
+        "gpu_output": gpu_output,
+        "surface_sampling_algorithm": surface_sampling_algorithm,
     }
 
-    default_kwargs.update(kwargs)
-
-    print(f"kwargs: {default_kwargs}")
-
-    return DoMINODataPipe(
+    pipe = DoMINODataPipe(
         input_path=input_path, model_type=model_type, **default_kwargs
     )
 
+    pipe.set_dataset(dataset)
+    return pipe
+
 
 def validate_sample_structure(sample, model_type, gpu_output):
     """Helper function to validate the structure of a dataset sample."""
     assert isinstance(sample, dict)
 
     # Common keys that should always be present
-    expected_keys = ["geometry_coordinates", "length_scale", "surface_min_max"]
+    expected_keys = ["geometry_coordinates"]
 
     # Model-specific keys
     volume_keys = [
@@ -310,6 +363,9 @@ def validate_sample_structure(sample, model_type, gpu_output):
         expected_keys.extend(surface_keys)
 
     # Check that required keys are present and are torch tensors on correct device
+    for key in expected_keys:
+        print(f"Got key: {key} on device: {sample[key].device.type}")
+
     for key in expected_keys:
         if key in sample:  # Some keys may be None if compute_scaling_factors=True
             if sample[key] is not None:
@@ -335,7 +391,13 @@ def test_domino_datapipe_core(
 
     data_dir = request.getfixturevalue(data_dir)
     dataset = create_basic_dataset(
-        data_dir, model_type, gpu_preprocessing=gpu_preprocessing, gpu_output=gpu_output
+        data_dir,
+        model_type,
+        gpu_preprocessing=gpu_preprocessing,
+        gpu_output=gpu_output,
+        normalize_coordinates=False,
+        sample_in_bbox=False,
+        sampling=False,
     )
 
     assert len(dataset) > 0
@@ -356,8 +418,10 @@ def test_domino_datapipe_coordinate_normalization(
         zarr_dataset,
         model_type,
         gpu_preprocessing=True,
+        gpu_output=True,
         normalize_coordinates=normalize_coordinates,
         sample_in_bbox=sample_in_bbox,
+        sampling=False,
     )
 
     sample = dataset[0]
@@ -457,17 +521,26 @@ def test_domino_datapipe_coordinate_normalization(
 def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconfig):
     """Test point sampling functionality."""
     sample_points = 4321
+
+    use_cuda = torch.cuda.is_available()
+
     dataset = create_basic_dataset(
         zarr_dataset,
         model_type,
-        gpu_preprocessing=False,
+        gpu_preprocessing=use_cuda,
+        gpu_output=use_cuda,
+        normalize_coordinates=False,
+        sample_in_bbox=False,
         sampling=sampling,
         volume_points_sample=sample_points,
         surface_points_sample=sample_points,
     )
 
     sample = dataset[0]
-    validate_sample_structure(sample, model_type, gpu_output=True)
+    validate_sample_structure(sample, model_type, gpu_output=use_cuda)
+
+    for key in sample:
+        print(f"sample[{key}].shape: {sample[key].shape}")
 
     if model_type in ["volume", "combined"]:
         for key in ["volume_mesh_centers", "volume_fields"]:
@@ -501,41 +574,13 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf
                 assert sample[key].shape[2] == dataset.config.num_surface_neighbors - 1
 
 
-@import_or_fail(["warp", "cupy", "cuml"])
-@pytest.mark.parametrize("model_type", ["combined"])
-@pytest.mark.parametrize(
-    "positional_encoding",
-    [
-        True,
-    ],
-)
-def test_domino_datapipe_positional_encoding(
-    zarr_dataset, model_type, positional_encoding, pytestconfig
-):
-    """Test positional encoding functionality."""
-    dataset = create_basic_dataset(
-        zarr_dataset,
-        model_type,
-        gpu_preprocessing=False,
-        positional_encoding=positional_encoding,
-    )
-
-    sample = dataset[0]
-    validate_sample_structure(sample, model_type, gpu_output=True)
-
-    # Check for positional encoding keys
-    if positional_encoding:
-        pos_keys = ["pos_volume_closest", "pos_volume_center_of_mass"]
-        for key in pos_keys:
-            if key in sample:
-                assert sample[key] is not None
-
-
 @import_or_fail(["warp", "cupy", "cuml"])
 @pytest.mark.parametrize("model_type", ["volume"])
 @pytest.mark.parametrize("scaling_type", [None, "min_max_scaling", "mean_std_scaling"])
 def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestconfig):
     """Test field scaling functionality."""
+    use_cuda = torch.cuda.is_available()
+
     if scaling_type == "min_max_scaling":
         volume_factors = [10.0, -10.0]  # [max, min]
     elif scaling_type == "mean_std_scaling":
@@ -546,13 +591,14 @@ def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestc
     dataset = create_basic_dataset(
         zarr_dataset,
         model_type,
-        gpu_preprocessing=False,
+        gpu_preprocessing=use_cuda,
+        gpu_output=use_cuda,
         scaling_type=scaling_type,
         volume_factors=volume_factors,
     )
 
     sample = dataset[0]
-    validate_sample_structure(sample, model_type, gpu_output=True)
+    validate_sample_structure(sample, model_type, gpu_output=use_cuda)
 
 
 # Caching tests
@@ -560,18 +606,18 @@ def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestc
 @pytest.mark.parametrize("model_type", ["volume"])
 def test_domino_datapipe_caching_config(zarr_dataset, model_type, pytestconfig):
     """Test DoMINODataPipe with caching=True configuration."""
+    use_cuda = torch.cuda.is_available()
     dataset = create_basic_dataset(
         zarr_dataset,
         model_type,
-        gpu_preprocessing=False,
+        gpu_preprocessing=use_cuda,
+        gpu_output=use_cuda,
         caching=True,
         sampling=False,  # Required for caching
-        compute_scaling_factors=False,  # Required for caching
-        resample_surfaces=False,  # Required for caching
     )
 
     sample = dataset[0]
-    validate_sample_structure(sample, model_type, gpu_output=True)
+    validate_sample_structure(sample, model_type, gpu_output=use_cuda)
 
 
 @import_or_fail(["warp", "cupy", "cuml"])
@@ -617,24 +663,16 @@ def test_cached_domino_dataset(zarr_dataset, tmp_path, pytestconfig):
 def test_domino_datapipe_invalid_caching_config(zarr_dataset, pytestconfig):
     """Test that invalid caching configurations raise appropriate errors."""
 
+    use_cuda = torch.cuda.is_available()
     # Test: caching=True with sampling=True should fail
     with pytest.raises(ValueError, match="Sampling should be False for caching"):
-        create_basic_dataset(zarr_dataset, "volume", caching=True, sampling=True)
-
-    # Test: caching=True with compute_scaling_factors=True should fail
-    with pytest.raises(
-        ValueError, match="Compute scaling factors should be False for caching"
-    ):
-        create_basic_dataset(
-            zarr_dataset, "volume", caching=True, compute_scaling_factors=True
-        )
-
-    # Test: caching=True with resample_surfaces=True should fail
-    with pytest.raises(
-        ValueError, match="Resample surface should be False for caching"
-    ):
         create_basic_dataset(
-            zarr_dataset, "volume", caching=True, resample_surfaces=True
+            zarr_dataset,
+            "volume",
+            caching=True,
+            sampling=True,
+            gpu_preprocessing=use_cuda,
+            gpu_output=use_cuda,
         )
 
 
@@ -661,12 +699,15 @@ def test_domino_datapipe_file_format_support(zarr_dataset, pytestconfig):
     """Test support for different file formats (.zarr, .npz, .npy)."""
     # This test assumes the data directory has files in these formats
     # If not available, we can mock the file reading
-    dataset = create_basic_dataset(zarr_dataset, "volume", gpu_preprocessing=False)
+    use_cuda = torch.cuda.is_available()
+    dataset = create_basic_dataset(
+        zarr_dataset, "volume", gpu_preprocessing=use_cuda, gpu_output=use_cuda
+    )
 
     # Just verify we can load at least one sample
     assert len(dataset) > 0
     sample = dataset[0]
-    validate_sample_structure(sample, "volume", gpu_output=True)
+    validate_sample_structure(sample, "volume", gpu_output=use_cuda)
 
 
 # Surface-specific tests (when GPU preprocessing issues are resolved)
@@ -676,10 +717,14 @@ def test_domino_datapipe_surface_sampling(
     zarr_dataset, surface_sampling_algorithm, pytestconfig
 ):
     """Test surface sampling algorithms."""
+
+    gpu = torch.cuda.is_available()
+
     dataset = create_basic_dataset(
         zarr_dataset,
         "surface",
-        gpu_preprocessing=False,  # Avoid known GPU issues
+        gpu_preprocessing=gpu,
+        gpu_output=gpu,
         sampling=True,
         surface_sampling_algorithm=surface_sampling_algorithm,
     )
diff --git a/test/distributed/shard_tensor/ops/test_radius_search.py b/test/distributed/shard_tensor/ops/test_radius_search.py
index 0ebaf05536..7c18cd0190 100644
--- a/test/distributed/shard_tensor/ops/test_radius_search.py
+++ b/test/distributed/shard_tensor/ops/test_radius_search.py
@@ -31,7 +31,6 @@
 import torch
 
 from physicsnemo.distributed import DistributedManager
-from physicsnemo.models.domino.model import BQWarp
 from physicsnemo.utils.version_check import check_module_requirements
 
 try:
@@ -138,6 +137,8 @@ def run_radius_search_module(model, data_dict, reverse_mapping):
 def test_sharded_radius_search_layer_forward(
     distributed_mesh, shard_points, shard_grid, reverse_mapping
 ):
+    from physicsnemo.models.layers.ball_query import BQWarp
+
     dm = DistributedManager()
 
     device = dm.device

From 4b1a3fdd17c57914366512a2ba6197cfd733a994 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 30 Sep 2025 15:35:43 +0000
Subject: [PATCH 59/98] Fix dataloading error, and remove old datapipe

---
 .../domino/requirements.txt                   |    1 +
 .../domino/src/benchmark_dataloader.py        |    2 +-
 .../domino/src/compute_statistics.py          |    2 +-
 .../domino/src/inference_on_stl.py            |    2 +-
 .../external_aerodynamics/domino/src/loss.py  |    6 +-
 .../external_aerodynamics/domino/src/train.py |   29 +-
 physicsnemo/datapipes/cae/__init__.py         |    2 +-
 physicsnemo/datapipes/cae/domino_datapipe.py  | 1649 +++++++----------
 physicsnemo/datapipes/cae/domino_datapipe2.py | 1222 ------------
 .../datapipes/cae/domino_sharded_datapipe.py  |  176 --
 .../datapipes/cae/drivaer_ml_dataset.py       |    3 +
 11 files changed, 732 insertions(+), 2362 deletions(-)
 delete mode 100644 physicsnemo/datapipes/cae/domino_datapipe2.py
 delete mode 100644 physicsnemo/datapipes/cae/domino_sharded_datapipe.py

diff --git a/examples/cfd/external_aerodynamics/domino/requirements.txt b/examples/cfd/external_aerodynamics/domino/requirements.txt
index 4c689c85e2..1d2cfe7dd9 100644
--- a/examples/cfd/external_aerodynamics/domino/requirements.txt
+++ b/examples/cfd/external_aerodynamics/domino/requirements.txt
@@ -3,3 +3,4 @@ warp-lang
 tensorboard
 cuml
 einops
+tensorstore
\ No newline at end of file
diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index 62a41d3383..090fbf361c 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -64,7 +64,7 @@
 from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-from physicsnemo.datapipes.cae.domino_datapipe2 import (
+from physicsnemo.datapipes.cae.domino_datapipe import (
     DoMINODataPipe,
     compute_scaling_factors,
     create_domino_dataset,
diff --git a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
index ac917d5353..d3516dff0f 100644
--- a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
+++ b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
@@ -37,7 +37,7 @@
 from physicsnemo.distributed import DistributedManager
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-from physicsnemo.datapipes.cae.domino_datapipe2 import compute_scaling_factors
+from physicsnemo.datapipes.cae.domino_datapipe import compute_scaling_factors
 from utils import ScalingFactors
 
 
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index f2c3388ada..a55f703d66 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -64,7 +64,7 @@
 from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-from physicsnemo.datapipes.cae.domino_datapipe2 import (
+from physicsnemo.datapipes.cae.domino_datapipe import (
     DoMINODataPipe,
     create_domino_dataset,
 )
diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py
index 0d90ab3674..e8a143b9c4 100644
--- a/examples/cfd/external_aerodynamics/domino/src/loss.py
+++ b/examples/cfd/external_aerodynamics/domino/src/loss.py
@@ -46,11 +46,7 @@
 from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-from physicsnemo.datapipes.cae.domino_datapipe2 import (
-    DoMINODataPipe,
-    compute_scaling_factors,
-    create_domino_dataset,
-)
+
 from physicsnemo.models.domino.model import DoMINO
 from physicsnemo.utils.domino.utils import *
 
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index ea71ee2a71..3763ce68ec 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -75,7 +75,7 @@ def srt2bool(val: str):
 from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
 from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
 
-from physicsnemo.datapipes.cae.domino_datapipe2 import (
+from physicsnemo.datapipes.cae.domino_datapipe import (
     DoMINODataPipe,
     create_domino_dataset,
 )
@@ -122,7 +122,7 @@ def validation_step(
         for i_batch, sample_batched in enumerate(dataloader):
             sampled_batched = dict_to_device(sample_batched, device)
 
-            with autocast("cuda", enabled=True):
+            with autocast("cuda", enabled=True, cache_enabled=False):
                 if add_physics_loss:
                     prediction_vol, prediction_surf = model(
                         sampled_batched, return_volume_neighbors=True
@@ -184,12 +184,16 @@ def train_epoch(
     with Profiler():
         io_start_time = time.perf_counter()
         for i_batch, sampled_batched in enumerate(dataloader):
+            for key in sampled_batched.keys():
+                print(
+                    f"{key} has shape {sampled_batched[key].shape} and autograd fn {sampled_batched[key].autograd_fn if hasattr(sampled_batched[key], 'autograd_fn') else None}"
+                )
             io_end_time = time.perf_counter()
             if add_physics_loss:
                 autocast_enabled = False
             else:
                 autocast_enabled = True
-            with autocast("cuda", enabled=autocast_enabled):
+            with autocast("cuda", enabled=autocast_enabled, cache_enabled=False):
                 with nvtx.range("Model Forward Pass"):
                     if add_physics_loss:
                         prediction_vol, prediction_surf = model(
@@ -478,14 +482,15 @@ def main(cfg: DictConfig) -> None:
     # Load checkpoint if available
     ######################################################
 
-    init_epoch = load_checkpoint(
-        to_absolute_path(cfg.resume_dir),
-        models=model,
-        optimizer=optimizer,
-        scheduler=scheduler,
-        scaler=scaler,
-        device=dist.device,
-    )
+    # init_epoch = load_checkpoint(
+    #     to_absolute_path(cfg.resume_dir),
+    #     models=model,
+    #     optimizer=optimizer,
+    #     scheduler=scheduler,
+    #     scaler=scaler,
+    #     device=dist.device,
+    # )
+    init_epoch = 0
 
     if init_epoch != 0:
         init_epoch += 1  # Start with the next epoch
@@ -529,7 +534,7 @@ def main(cfg: DictConfig) -> None:
         else:
             surface_scaling_loss = cfg.model.surf_loss_scaling
 
-        model.train(True)
+        # model.train(True)
         epoch_start_time = time.perf_counter()
         avg_loss = train_epoch(
             dataloader=train_dataloader,
diff --git a/physicsnemo/datapipes/cae/__init__.py b/physicsnemo/datapipes/cae/__init__.py
index 9af8d88db2..c0d17ff723 100644
--- a/physicsnemo/datapipes/cae/__init__.py
+++ b/physicsnemo/datapipes/cae/__init__.py
@@ -14,5 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .domino_datapipe2 import DoMINODataPipe
+from .domino_datapipe import DoMINODataPipe
 from .mesh_datapipe import MeshDatapipe
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 666e4bfad5..5feae7e118 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -17,7 +17,7 @@
 """
 This code provides the datapipe for reading the processed npy files,
 generating multi-res grids, calculating signed distance fields,
-positional encodings, sampling random points in the volume and on surface,
+sampling random points in the volume and on surface,
 normalizing fields and returning the output tensors as a dictionary.
 
 This datapipe also non-dimensionalizes the fields, so the order in which the variables should
@@ -26,101 +26,46 @@
 variable names, domain resolution, sampling size etc. are configurable in config.yaml.
 """
 
-import os
-import time
-from concurrent.futures import ThreadPoolExecutor
-from contextlib import nullcontext
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Literal, Optional, Protocol, Sequence, Union
+from typing import Iterable, Literal, Optional, Protocol, Sequence, Union
 
-import cuml
-import cupy as cp
 import numpy as np
 import torch
 import torch.cuda.nvtx as nvtx
-import zarr
 from omegaconf import DictConfig
-from scipy.spatial import KDTree
-from torch import Tensor
-from torch.utils.data import Dataset, default_collate
+from torch.distributed.tensor.placement_types import Replicate
+from torch.utils.data import Dataset
 
+from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
+    DrivaerMLDataset,
+    compute_mean_std_min_max,
+)
 from physicsnemo.distributed import DistributedManager
+from physicsnemo.distributed.shard_tensor import ShardTensor, scatter_tensor
 from physicsnemo.utils.domino.utils import (
-    ArrayType,
-    area_weighted_shuffle_array,
     calculate_center_of_mass,
     create_grid,
     get_filenames,
-    mean_std_sampling,
     normalize,
     pad,
-    # sample_array,
     shuffle_array,
-    solution_weighted_shuffle_array,
     standardize,
+    unnormalize,
+    unstandardize,
 )
+from physicsnemo.utils.neighbors import knn
 from physicsnemo.utils.profiling import profile
 from physicsnemo.utils.sdf import signed_distance_field
 
-"""
-These functions, below, are to handle the SDF calculation which only 
-accepts torch tensors.  The entire pipeline is moving to torch, so
-these aren't necessary after that.
-"""
-
-
-def _convert_array_to_torch(array: cp.ndarray | np.ndarray) -> torch.Tensor:
-    """
-    TEMPORARY function to convert cupy and numpy arrays to torch tensors.
-    """
-    if isinstance(array, cp.ndarray):
-        return torch.utils.dlpack.from_dlpack(array)
-    elif isinstance(array, np.ndarray):
-        return torch.from_numpy(array)
-    else:
-        raise ValueError(f"Unsupported array type: {type(array)}")
-
-
-def _convert_torch_to_array(array: torch.Tensor, provider) -> cp.ndarray | np.ndarray:
-    """
-    TEMPORARY function to convert torch tensors to cupy arrays.
-    """
-    return provider.from_dlpack(array)
-
-
-def domino_collate_fn(batch):
-    """
-    This function is a custom collation function to move cupy data to torch tensors on the device.
-
-    For things that aren't cupy arrays, fall back to torch.data.default_convert.  Data, here,
-    is a dictionary of numpy arrays or cupy arrays.
-
-    """
-
-    def convert(obj):
-        if isinstance(obj, cp.ndarray):
-            return torch.utils.dlpack.from_dlpack(obj.toDlpack())
-        elif isinstance(obj, list):
-            return [convert(x) for x in obj]
-        elif isinstance(obj, tuple):
-            return tuple(convert(x) for x in obj)
-        elif isinstance(obj, dict):
-            return {k: convert(v) for k, v in obj.items()}
-        else:
-            return obj
-
-    batch = [convert(sample) for sample in batch]
-    return default_collate(batch)
-
 
 class BoundingBox(Protocol):
     """
     Type definition for the required format of bounding box dimensions.
     """
 
-    min: ArrayType
-    max: ArrayType
+    min: Sequence
+    max: Sequence
 
 
 @dataclass
@@ -179,7 +124,7 @@ class DoMINODataConfig:
             You might choose gpu_preprocessing=True and gpu_output=False if caching.
     """
 
-    data_path: Path
+    data_path: Path | None
     phase: Literal["train", "val", "test"]
 
     # Surface-specific variables:
@@ -196,7 +141,7 @@ class DoMINODataConfig:
     volume_factors: Optional[Sequence] = None
     bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None
 
-    grid_resolution: Union[Sequence, ArrayType] = (256, 96, 64)
+    grid_resolution: Sequence = (256, 96, 64)
     normalize_coordinates: bool = False
     sample_in_bbox: bool = False
     sampling: bool = False
@@ -209,16 +154,17 @@ class DoMINODataConfig:
     gpu_output: bool = True
 
     def __post_init__(self):
-        # Ensure data_path is a Path object:
-        if isinstance(self.data_path, str):
-            self.data_path = Path(self.data_path)
-        self.data_path = self.data_path.expanduser()
+        if self.data_path is not None:
+            # Ensure data_path is a Path object:
+            if isinstance(self.data_path, str):
+                self.data_path = Path(self.data_path)
+            self.data_path = self.data_path.expanduser()
 
-        if not self.data_path.exists():
-            raise ValueError(f"Path {self.data_path} does not exist")
+            if not self.data_path.exists():
+                raise ValueError(f"Path {self.data_path} does not exist")
 
-        if not self.data_path.is_dir():
-            raise ValueError(f"Path {self.data_path} is not a directory")
+            if not self.data_path.is_dir():
+                raise ValueError(f"Path {self.data_path} is not a directory")
 
         # Object if caching settings are impossible:
         if self.caching:
@@ -246,328 +192,179 @@ def __post_init__(self):
 
 
 ##### TODO
-# - put model type in config or leave in __init__
-# - check the bounding box protocol works
+# - The SDF normalization here is based on using a normalized mesh and
+#   a normalized coordinate.  The alternate method is to normalize to the min/max of the grid.
 
 
 class DoMINODataPipe(Dataset):
     """
     Datapipe for DoMINO
 
+    Leverages a dataset for the actual reading of the data, and this
+    object is responsible for preprocessing the data.
+
     """
 
     def __init__(
         self,
         input_path,
         model_type: Literal["surface", "volume", "combined"],
+        pin_memory: bool = False,
         **data_config_overrides,
     ):
         # Perform config packaging and validation
         self.config = DoMINODataConfig(data_path=input_path, **data_config_overrides)
 
+        # Set up the distributed manager:
         if not DistributedManager.is_initialized():
             DistributedManager.initialize()
 
         dist = DistributedManager()
-        if self.config.gpu_preprocessing or self.config.gpu_output:
-            # Make sure we move data to the right device:
-            target_device = dist.device.index
-            self.device_context = cp.cuda.Device(target_device)
-            self.device_context.use()
-        else:
-            self.device_context = nullcontext()
 
-        self.device = dist.device
-
-        if self.config.deterministic:
-            np.random.seed(42)
-            cp.random.seed(42)
-        else:
-            np.random.seed(seed=int(time.time()))
-            cp.random.seed(seed=int(time.time()))
+        # Set devices for the preprocessing and IO target
+        self.preproc_device = (
+            dist.device if self.config.gpu_preprocessing else torch.device("cpu")
+        )
+        # The drivaer_ml_dataset will automatically target this device
+        # In an async transfer.
+        self.output_device = (
+            dist.device if self.config.gpu_output else torch.device("cpu")
+        )
 
+        # Model type determines whether we process surface, volume, or both.
         self.model_type = model_type
 
-        self.filenames = get_filenames(self.config.data_path, exclude_dirs=True)
-        total_files = len(self.filenames)
-
-        self.indices = np.array(range(total_files))
-
-        # Why shuffle the indices here if only using random access below?
-
-        np.random.shuffle(self.indices)
-
-        # Determine the array provider based on what device
-        # will do preprocessing:
-        self.array_provider = cp if self.config.gpu_preprocessing else np
         # Update the arrays for bounding boxes:
-
         if hasattr(self.config.bounding_box_dims, "max") and hasattr(
             self.config.bounding_box_dims, "min"
         ):
             self.config.bounding_box_dims = [
-                self.array_provider.asarray(self.config.bounding_box_dims.max).astype(
-                    "float32"
+                torch.tensor(
+                    self.config.bounding_box_dims.max,
+                    device=self.preproc_device,
+                    dtype=torch.float32,
                 ),
-                self.array_provider.asarray(self.config.bounding_box_dims.min).astype(
-                    "float32"
+                torch.tensor(
+                    self.config.bounding_box_dims.min,
+                    device=self.preproc_device,
+                    dtype=torch.float32,
                 ),
             ]
+            self.default_volume_grid = create_grid(
+                self.config.bounding_box_dims[0],
+                self.config.bounding_box_dims[1],
+                self.config.grid_resolution,
+            )
+
+        # And, do the surface bounding box if supplied:
         if hasattr(self.config.bounding_box_dims_surf, "max") and hasattr(
             self.config.bounding_box_dims_surf, "min"
         ):
             self.config.bounding_box_dims_surf = [
-                self.array_provider.asarray(
-                    self.config.bounding_box_dims_surf.max
-                ).astype("float32"),
-                self.array_provider.asarray(
-                    self.config.bounding_box_dims_surf.min
-                ).astype("float32"),
+                torch.tensor(
+                    self.config.bounding_box_dims_surf.max,
+                    device=self.preproc_device,
+                    dtype=torch.float32,
+                ),
+                torch.tensor(
+                    self.config.bounding_box_dims_surf.min,
+                    device=self.preproc_device,
+                    dtype=torch.float32,
+                ),
             ]
 
-        # Used if threaded data is enabled:
-        self.max_workers = 24
-        # Create a single thread pool for the class
-        self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
-
-        # Define here the keys to read for each __getitem__ call
-
-        # Always read these keys
-        self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"]
-        with self.device_context:
-            xp = self.array_provider
-            self.keys_to_read_if_available = {
-                "global_params_values": xp.asarray([30.0, 1.226]),
-                "global_params_reference": xp.asarray([30.0, 1.226]),
-            }
-        self.volume_keys = ["volume_mesh_centers", "volume_fields"]
-        self.surface_keys = [
-            "surface_mesh_centers",
-            "surface_normals",
-            "surface_areas",
-            "surface_fields",
-        ]
-
-        if self.model_type == "volume" or self.model_type == "combined":
-            self.keys_to_read.extend(self.volume_keys)
-        if self.model_type == "surface" or self.model_type == "combined":
-            self.keys_to_read.extend(self.surface_keys)
-
-    def __del__(self):
-        # Clean up the executor when the instance is being destroyed
-        if hasattr(self, "executor"):
-            self.executor.shutdown()
+            self.default_surface_grid = create_grid(
+                self.config.bounding_box_dims_surf[0],
+                self.config.bounding_box_dims_surf[1],
+                self.config.grid_resolution,
+            )
 
-    @profile
-    def read_data_zarr(self, filepath):
-        # def create_pinned_streaming_space(shape, dtype):
-        #     # TODO - this function could boost performance a little, but
-        #     # the pinned memory pool seems too small.
-        #     if self.array_provider == cp:
-        #         nbytes = np.prod(shape) * dtype.itemsize
-        #         ptr = cp.cuda.alloc_pinned_memory(nbytes)
-        #         arr = np.frombuffer(ptr, dtype)
-        #         return arr.reshape(shape)
-        #     else:
-        #         return np.empty(shape, dtype=dtype)
-
-        def read_chunk_into_array(ram_array, fs_zarr_array, slice):
-            ram_array[slice] = fs_zarr_array[slice]
-
-        @profile
-        def chunked_aligned_read(zarr_group, key, futures):
-            zarr_array = zarr_group[key]
-
-            shape = zarr_array.shape
-            chunk_size = zarr_array.chunks[0]
-
-            # Pre-allocate the full result array
-            result_shape = zarr_array.shape
-            result_dtype = zarr_array.dtype
-
-            result = np.empty(result_shape, dtype=result_dtype)
-
-            for start in range(0, shape[0], chunk_size):
-                end = min(start + chunk_size, shape[0])
-                read_slice = np.s_[start:end]
-                futures.append(
-                    self.executor.submit(
-                        read_chunk_into_array, result, zarr_array, read_slice
-                    )
-                )
+        # Ensure the volume and surface scaling factors are torch tensors
+        # and on the right device:
+        if self.config.volume_factors is not None:
+            self.config.volume_factors = torch.tensor(
+                self.config.volume_factors,
+                device=self.preproc_device,
+                dtype=torch.float32,
+            )
+        if self.config.surface_factors is not None:
+            self.config.surface_factors = torch.tensor(
+                self.config.surface_factors,
+                device=self.preproc_device,
+                dtype=torch.float32,
+            )
 
-            return result
+        self.dataset = None
 
-        with zarr.open_group(filepath, mode="r") as z:
-            data = {}
-            futures = []
-            if "volume_fields" in z.keys():
-                data["volume_fields"] = chunked_aligned_read(
-                    z, "volume_fields", futures
-                )
-            if "volume_mesh_centers" in z.keys():
-                data["volume_mesh_centers"] = chunked_aligned_read(
-                    z, "volume_mesh_centers", futures
-                )
-
-            for key in self.keys_to_read:
-                if z[key].shape == ():
-                    data[key] = z[key]
-                elif key in ["volume_fields", "volume_mesh_centers"]:
-                    continue
-                else:
-                    data[key] = np.empty(z[key].shape, dtype=z[key].dtype)
-                    slice = np.s_[:]
-                    futures.append(
-                        self.executor.submit(
-                            read_chunk_into_array, data[key], z[key], slice
-                        )
-                    )
-
-            # Now wait for all the futures to complete
-            for future in futures:
-                result = future.result()
-                if isinstance(result, tuple) and len(result) == 2:
-                    key, value = result
-                    data[key] = value
-
-            # Move big data to GPU
-            for key in data.keys():
-                data[key] = self.array_provider.asarray(data[key])
-
-            # Optional, maybe-present keys
-            for key in self.keys_to_read_if_available:
-                if key not in data.keys():
-                    data[key] = self.keys_to_read_if_available[key]
-
-        return data
+    def compute_stl_scaling_and_surface_grids(
+        self,
+        stl_vertices: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Compute the min and max for the defining mesh.
 
-    @profile
-    def read_data_npy(self, filepath):
-        with open(filepath, "rb") as f:
-            data = np.load(f, allow_pickle=True).item()
+        If the user supplies a bounding box, we use that.  Otherwise,
+        it's created dynamically from the min/max of the stl vertices.
 
-        for key in self.keys_to_read_if_available:
-            if key not in data.keys():
-                data[key] = self.keys_to_read_if_available[key]
+        The returned min/max and grid are used for surface data.
+        """
 
-        if "filename" in data.keys():
-            data.pop("filename", None)
+        # Check the bounding box is not unit length
 
-        if not (isinstance(data["stl_coordinates"], np.ndarray)):
-            data["stl_coordinates"] = np.asarray(data["stl_coordinates"])
+        if self.config.bounding_box_dims_surf is not None:
+            s_max = self.config.bounding_box_dims_surf[0]
+            s_min = self.config.bounding_box_dims_surf[1]
+            surf_grid = self.default_surface_grid
+        else:
+            # Create the grid dynamically
+            s_min = torch.amin(stl_vertices, 0)
+            s_max = torch.amax(stl_vertices, 0)
+            surf_grid = create_grid(s_max, s_min, self.config.grid_resolution)
 
-        # Maybe move to GPU:
-        with self.device_context:
-            for key in data.keys():
-                if data[key] is not None:
-                    data[key] = self.array_provider.asarray(data[key])
-        return data
+        return s_min, s_max, surf_grid
 
-    @profile
-    def read_data_npz(
-        self,
-        filepath,
-        max_workers=None,
+    def compute_volume_scaling_and_grids(
+        self, s_min: torch.Tensor, s_max: torch.Tensor
     ):
-        if max_workers is not None:
-            self.max_workers = max_workers
-
-        def load_one(key):
-            with np.load(filepath) as data:
-                return key, data[key]
-
-        def check_optional_keys():
-            with np.load(filepath) as data:
-                optional_results = {}
-                for key in self.keys_to_read_if_available:
-                    if key in data.keys():
-                        optional_results[key] = data[key]
-                    else:
-                        optional_results[key] = self.keys_to_read_if_available[key]
-            with self.device_context:
-                optional_results = {
-                    key: self.array_provider.asarray(value)
-                    for key, value in optional_results.items()
-                }
-            return optional_results
-
-        # Use the class-level executor instead of creating a new one
-        results = dict(self.executor.map(load_one, self.keys_to_read))
-
-        # Move the results to the GPU:
-        with self.device_context:
-            for key in results.keys():
-                results[key] = self.array_provider.asarray(results[key])
-
-        # Check the optional ones:
-        optional_results = check_optional_keys()
-        results.update(optional_results)
-
-        return results
+        """
+        Compute the min and max and grid for volume data.
 
-    def __len__(self):
-        return len(self.indices)
+        If the user supplies a bounding box, we use that.  Otherwise,
+        it's created dynamically from the surface min/max.
 
-    @profile
-    def preprocess_combined(self, data_dict):
-        # Pull these out and force to fp32:
-        with self.device_context:
-            global_params_values = data_dict["global_params_values"].astype(
-                self.array_provider.float32
-            )
-            global_params_reference = data_dict["global_params_reference"].astype(
-                self.array_provider.float32
-            )
+        This will be 2x longer in x and y and the same in z as the surface bounding box.
+        """
 
-        # Pull these pieces out of the data_dict for manipulation
-        stl_vertices = data_dict["stl_coordinates"]
-        stl_centers = data_dict["stl_centers"]
-        mesh_indices_flattened = data_dict["stl_faces"]
-        stl_sizes = data_dict["stl_areas"]
-        idx = np.where(stl_sizes > 0.0)
-        stl_sizes = stl_sizes[idx]
-        stl_centers = stl_centers[idx]
+        # Determine the volume min / max locations
+        if self.config.bounding_box_dims is not None:
+            c_max = self.config.bounding_box_dims[0]
+            c_min = self.config.bounding_box_dims[1]
+            volume_grid = self.default_volume_grid
 
-        xp = self.array_provider
+        else:
+            # Create the grid based on the surface grid
+            c_max = s_max + (s_max - s_min) / 2
+            c_min = s_min - (s_max - s_min) / 2
+            c_min[2] = s_min[2]
+            volume_grid = create_grid(c_max, c_min, self.config.grid_resolution)
 
-        # Make sure the mesh_indices_flattened is an integer array:
-        if mesh_indices_flattened.dtype != xp.int32:
-            mesh_indices_flattened = mesh_indices_flattened.astype(xp.int32)
+        return c_min, c_max, volume_grid
 
-        if self.config.bounding_box_dims_surf is None:
-            s_max = xp.amax(stl_vertices, 0)
-            s_min = xp.amin(stl_vertices, 0)
-        else:
-            s_max = xp.asarray(self.config.bounding_box_dims_surf[0])
-            s_min = xp.asarray(self.config.bounding_box_dims_surf[1])
-
-        center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes)
-
-        # SDF calculation on the grid using WARP
-        if not self.config.compute_scaling_factors:
-            nx, ny, nz = self.config.grid_resolution
-            surf_grid = create_grid(s_max, s_min, [nx, ny, nz])
-            surf_grid_reshaped = surf_grid.reshape(nx * ny * nz, 3)
-
-            sdf_surf_grid, _ = signed_distance_field(
-                _convert_array_to_torch(stl_vertices),
-                _convert_array_to_torch(mesh_indices_flattened),
-                _convert_array_to_torch(surf_grid_reshaped),
-                use_sign_winding_number=True,
-            )
-            sdf_surf_grid = sdf_surf_grid.reshape(nx, ny, nz)
-            sdf_surf_grid = _convert_torch_to_array(sdf_surf_grid, self.array_provider)
-            if self.config.normalize_coordinates:
-                sdf_surf_grid = normalize(sdf_surf_grid, xp.amax(surf_grid), xp.amin(surf_grid))
+    @profile
+    def downsample_geometry(
+        self,
+        stl_vertices,
+    ) -> torch.Tensor:
+        """
+        Downsample the geometry to the desired number of points.
 
-        else:
-            surf_grid = None
-            sdf_surf_grid = None
+        Args:
+            stl_vertices: The vertices of the surface.
+        """
 
         if self.config.sampling:
-            # nvtx.range_push("Geometry Sampling")
             geometry_points = self.config.geom_points_sample
+
             geometry_coordinates_sampled, idx_geometry = shuffle_array(
                 stl_vertices, geometry_points
             )
@@ -576,208 +373,133 @@ def preprocess_combined(self, data_dict):
                     geometry_coordinates_sampled, geometry_points, pad_value=-100.0
                 )
             geom_centers = geometry_coordinates_sampled
-            # nvtx.range_pop()
         else:
             geom_centers = stl_vertices
 
-        # geom_centers = self.array_provider.float32(geom_centers)
-
-        surf_grid_max_min = xp.stack([s_min, s_max])
+        return geom_centers
 
-        return_dict = {
-            "surf_grid": surf_grid,
-            "sdf_surf_grid": sdf_surf_grid,
-            "surface_min_max": surf_grid_max_min,
-            "global_params_values": xp.expand_dims(
-                xp.array(global_params_values, dtype=xp.float32), -1
-            ),
-            "global_params_reference": xp.expand_dims(
-                xp.array(global_params_reference, dtype=xp.float32), -1
-            ),
-            "geometry_coordinates": geom_centers,
-        }
-
-        return (
-            return_dict,
-            s_min,
-            s_max,
-            mesh_indices_flattened,
-            stl_vertices,
-            center_of_mass,
-        )
-
-    @profile
-    def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max):
+    def process_surface(
+        self,
+        s_min: torch.Tensor,
+        s_max: torch.Tensor,
+        c_min: torch.Tensor,
+        c_max: torch.Tensor,
+        *,  # Forcing the rest by keyword only since it's a long list ...
+        center_of_mass: torch.Tensor,
+        surf_grid: torch.Tensor,
+        surface_coordinates: torch.Tensor,
+        surface_normals: torch.Tensor,
+        surface_sizes: torch.Tensor,
+        stl_vertices: torch.Tensor,
+        stl_indices: torch.Tensor,
+        surface_fields: torch.Tensor | None,
+    ) -> dict[str, torch.Tensor]:
         nx, ny, nz = self.config.grid_resolution
 
         return_dict = {}
-        surface_coordinates = data_dict["surface_mesh_centers"]
-        surface_normals = data_dict["surface_normals"]
-        surface_sizes = data_dict["surface_areas"]
-        surface_fields = data_dict["surface_fields"]
 
-        idx = np.where(surface_sizes > 0)
+        ########################################################################
+        # Remove any sizes <= 0:
+        ########################################################################
+        idx = surface_sizes > 0
         surface_sizes = surface_sizes[idx]
-        surface_fields = surface_fields[idx]
         surface_normals = surface_normals[idx]
         surface_coordinates = surface_coordinates[idx]
+        if surface_fields is not None:
+            surface_fields = surface_fields[idx]
+
+        ########################################################################
+        # Reject surface points outside of the Bounding Box
+        # NOTE - this is using the VOLUME bounding box!
+        ########################################################################
+        if self.config.sample_in_bbox:
+            ids_min = surface_coordinates[:] > c_min
+            ids_max = surface_coordinates[:] < c_max
+
+            ids_in_bbox = ids_min & ids_max
+            ids_in_bbox = ids_in_bbox.all(dim=-1)
+
+            surface_coordinates = surface_coordinates[ids_in_bbox]
+            surface_normals = surface_normals[ids_in_bbox]
+            surface_sizes = surface_sizes[ids_in_bbox]
+            if surface_fields is not None:
+                surface_fields = surface_fields[ids_in_bbox]
 
-        xp = self.array_provider
+        ########################################################################
+        # Perform Down sampling of the surface fields.
+        # Note that we snapshot the full surface coordinates for
+        # use in the kNN in the next step.
+        ########################################################################
 
-        if not self.config.compute_scaling_factors:
-            c_max = self.config.bounding_box_dims[0]
-            c_min = self.config.bounding_box_dims[1]
+        full_surface_coordinates = surface_coordinates
+        full_surface_normals = surface_normals
+        full_surface_sizes = surface_sizes
+
+        if self.config.sampling:
+            # Perform the down sampling:
+            if self.config.surface_sampling_algorithm == "area_weighted":
+                weights = surface_sizes
+            else:
+                weights = None
+
+            surface_coordinates_sampled, idx_surface = shuffle_array(
+                surface_coordinates,
+                self.config.surface_points_sample,
+                weights=weights,
+            )
 
-            if self.config.sample_in_bbox:
-                # TODO - clean this up with vectorization?
-                # TODO - the xp.where is likely a useless op.  Need to check.
-                ids_in_bbox = xp.where(
-                    (surface_coordinates[:, 0] > c_min[0])
-                    & (surface_coordinates[:, 0] < c_max[0])
-                    & (surface_coordinates[:, 1] > c_min[1])
-                    & (surface_coordinates[:, 1] < c_max[1])
-                    & (surface_coordinates[:, 2] > c_min[2])
-                    & (surface_coordinates[:, 2] < c_max[2])
+            if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample:
+                surface_coordinates_sampled = pad(
+                    surface_coordinates_sampled,
+                    self.config.surface_points_sample,
+                    pad_value=-10.0,
                 )
-                surface_coordinates = surface_coordinates[ids_in_bbox]
-                surface_normals = surface_normals[ids_in_bbox]
-                surface_sizes = surface_sizes[ids_in_bbox]
-                surface_fields = surface_fields[ids_in_bbox]
 
-            
-            # Have to normalize neighbors after the kNN and sampling
-            if self.config.normalize_coordinates:
-                core_dict["surf_grid"] = normalize(core_dict["surf_grid"], s_max, s_min)
-                surface_coordinates = normalize(surface_coordinates, s_max, s_min)
-                center_of_mass_normalized = normalize(xp.asarray(center_of_mass), s_max, s_min)
-            else:
-                center_of_mass_normalized = xp.asarray(center_of_mass)
-
-            pos_normals_com_surface = surface_coordinates - center_of_mass_normalized
-
-            # Fit the kNN (or KDTree, if CPU) on ALL points:
-            if self.config.num_surface_neighbors > 1:
-                if self.array_provider == cp:
-                    knn = cuml.neighbors.NearestNeighbors(
-                        n_neighbors=self.config.num_surface_neighbors,
-                        algorithm="rbc",
-                    )
-                    knn.fit(surface_coordinates)
-                else:
-                    # Under the hood this is instantiating a KDTree.
-                    # aka here knn is a type, not a class, technically.
-                    interp_func = KDTree(surface_coordinates)
-
-            if self.config.sampling:
-                # Perform the down sampling:
-                if self.config.surface_sampling_algorithm == "area_weighted":
-                    (
-                        surface_coordinates_sampled,
-                        idx_surface,
-                    ) = area_weighted_shuffle_array(
-                        surface_coordinates,
-                        self.config.surface_points_sample,
-                        surface_sizes,
-                    )
-                elif self.config.surface_sampling_algorithm == "solution_weighted":
-                    (
-                        surface_coordinates_sampled,
-                        idx_surface,
-                    ) = solution_weighted_shuffle_array(
-                        surface_coordinates,
-                        self.config.surface_points_sample,
-                        surface_fields[:, 0],
-                        scaling_factor=0.5,
-                    )
-                else:
-                    surface_coordinates_sampled, idx_surface = shuffle_array(
-                        surface_coordinates, self.config.surface_points_sample
-                    )
-
-                if (
-                    surface_coordinates_sampled.shape[0]
-                    < self.config.surface_points_sample
-                ):
-                    surface_coordinates_sampled = pad(
-                        surface_coordinates_sampled,
-                        self.config.surface_points_sample,
-                        pad_value=-10.0,
-                    )
-
-                # Select out the sampled points for non-neighbor arrays:
+            # Select out the sampled points for non-neighbor arrays:
+            if surface_fields is not None:
                 surface_fields = surface_fields[idx_surface]
-                pos_normals_com_surface = pos_normals_com_surface[idx_surface]
-
-                # Now, perform the kNN on the sampled points:
-                if self.config.num_surface_neighbors > 1:
-                    if self.array_provider == cp:
-                        ii = knn.kneighbors(
-                            surface_coordinates_sampled, return_distance=False
-                        )
-                    else:
-                        _, ii = interp_func.query(
-                            surface_coordinates_sampled,
-                            k=self.config.num_surface_neighbors,
-                        )
-
-                    # Pull out the neighbor elements.  Note that ii is the index into the original
-                    # points - but only exists for the sampled points
-                    # In other words, a point from `surface_coordinates_sampled` has neighbors
-                    # from the full `surface_coordinates` array.
-                    surface_neighbors = surface_coordinates[ii][:, 1:]
-                    surface_neighbors_normals = surface_normals[ii][:, 1:]
-                    surface_neighbors_sizes = surface_sizes[ii][:, 1:]
-                else:
-                    surface_neighbors = surface_coordinates
-                    surface_neighbors_normals = surface_normals
-                    surface_neighbors_sizes = surface_sizes
-
-                # We could index into these above the knn step too; they aren't dependent on that.
-                surface_normals = surface_normals[idx_surface]
-                surface_sizes = surface_sizes[idx_surface]
-
-                # Update the coordinates to the sampled points:
-                surface_coordinates = surface_coordinates_sampled
 
-            else:
-                # We are *not* sampling, kNN on ALL points:
-                if self.array_provider == cp:
-                    ii = knn.kneighbors(surface_coordinates, return_distance=False)
-                else:
-                    _, ii = interp_func.query(
-                        surface_coordinates,
-                        k=self.config.num_surface_neighbors,
-                    )
-
-                # Construct the neighbors arrays:
-                surface_neighbors = surface_coordinates[ii][:, 1:]
-                surface_neighbors_normals = surface_normals[ii][:, 1:]
-                surface_neighbors_sizes = surface_sizes[ii][:, 1:]
-
-            if self.config.scaling_type is not None:
-                if self.config.surface_factors is not None:
-                    if self.config.scaling_type == "mean_std_scaling":
-                        surf_mean = self.config.surface_factors[0]
-                        surf_std = self.config.surface_factors[1]
-                        # TODO - Are these array calls needed?
-                        surface_fields = standardize(
-                            surface_fields, xp.asarray(surf_mean), xp.asarray(surf_std)
-                        )
-                    elif self.config.scaling_type == "min_max_scaling":
-                        surf_min = self.config.surface_factors[1]
-                        surf_max = self.config.surface_factors[0]
-                        # TODO - Are these array calls needed?
-                        surface_fields = normalize(
-                            surface_fields, xp.asarray(surf_max), xp.asarray(surf_min)
-                        )
+            # Subsample the normals and sizes:
+            surface_normals = surface_normals[idx_surface]
+            surface_sizes = surface_sizes[idx_surface]
+            # Update the coordinates to the sampled points:
+            surface_coordinates = surface_coordinates_sampled
+
+        ########################################################################
+        # Perform a kNN on the surface to find the neighbor information
+        ########################################################################
+        if self.config.num_surface_neighbors > 1:
+            # Perform the kNN:
+            neighbor_indices, neighbor_distances = knn(
+                points=full_surface_coordinates,
+                queries=surface_coordinates,
+                k=self.config.num_surface_neighbors,
+            )
 
-        else:
-            surface_sizes = None
-            surface_normals = None
-            surface_neighbors = None
-            surface_neighbors_normals = None
-            surface_neighbors_sizes = None
-            pos_normals_com_surface = None
+            # Pull out the neighbor elements.
+            # Note that `neighbor_indices` is the index into the original,
+            # full sized tensors (full_surface_coordinates, etc).
+            surface_neighbors = full_surface_coordinates[neighbor_indices][:, 1:]
+            surface_neighbors_normals = full_surface_normals[neighbor_indices][:, 1:]
+            surface_neighbors_sizes = full_surface_sizes[neighbor_indices][:, 1:]
+
+        # Better to normalize everything after the kNN and sampling
+        if self.config.normalize_coordinates:
+            surf_grid = normalize(surf_grid, s_max, s_min)
+            surface_coordinates = normalize(surface_coordinates, s_max, s_min)
+            surface_neighbors = normalize(surface_neighbors, s_max, s_min)
+            # Make sure to normalize the center of mass for the normals_com_surface calc
+            center_of_mass = normalize(center_of_mass, s_max, s_min)
+
+        pos_normals_com_surface = surface_coordinates - center_of_mass
+
+        ########################################################################
+        # Apply scaling to the targets, if desired:
+        ########################################################################
+        if self.config.scaling_type is not None and surface_fields is not None:
+            surface_fields = self.scale_model_targets(
+                surface_fields, self.config.surface_factors
+            )
 
         return_dict.update(
             {
@@ -788,465 +510,450 @@ def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max)
                 "surface_neighbors_normals": surface_neighbors_normals,
                 "surface_areas": surface_sizes,
                 "surface_neighbors_areas": surface_neighbors_sizes,
-                "surface_fields": surface_fields,
             }
         )
+        if surface_fields is not None:
+            return_dict["surface_fields"] = surface_fields
 
         return return_dict
 
-    @profile
-    def preprocess_volume(
+    def process_volume(
         self,
-        data_dict,
-        core_dict,
-        s_min,
-        s_max,
-        mesh_indices_flattened,
-        stl_vertices,
-        center_of_mass,
-    ):
-        return_dict = {}
-
-        nx, ny, nz = self.config.grid_resolution
+        c_min: torch.Tensor,
+        c_max: torch.Tensor,
+        volume_coordinates: torch.Tensor,
+        volume_grid: torch.Tensor,
+        center_of_mass: torch.Tensor,
+        stl_vertices: torch.Tensor,
+        stl_indices: torch.Tensor,
+        volume_fields: torch.Tensor | None,
+    ) -> dict[str, torch.Tensor]:
+        """
+        Preprocess the volume data.
 
-        xp = self.array_provider
+        First, if configured, we reject points not in the volume bounding box.
 
-        # # Temporary: convert to cupy here:
-        volume_coordinates = data_dict["volume_mesh_centers"]
-        volume_fields = data_dict["volume_fields"]
+        Next, if sampling is enabled, we sample the volume points and apply that
+        sampling to the ground truth too, if it's present.
 
-        if not self.config.compute_scaling_factors:
-            if self.config.bounding_box_dims is None:
-                c_max = s_max + (s_max - s_min) / 2
-                c_min = s_min - (s_max - s_min) / 2
-                c_min[2] = s_min[2]
-            else:
-                c_max = xp.asarray(self.config.bounding_box_dims[0])
-                c_min = xp.asarray(self.config.bounding_box_dims[1])
-
-            if self.config.sample_in_bbox:
-                # TODO - xp.where can probably be removed.
-                ids_in_bbox = self.array_provider.where(
-                    (volume_coordinates[:, 0] > c_min[0])
-                    & (volume_coordinates[:, 0] < c_max[0])
-                    & (volume_coordinates[:, 1] > c_min[1])
-                    & (volume_coordinates[:, 1] < c_max[1])
-                    & (volume_coordinates[:, 2] > c_min[2])
-                    & (volume_coordinates[:, 2] < c_max[2])
-                )
-                volume_coordinates = volume_coordinates[ids_in_bbox]
+        """
+        ########################################################################
+        # Reject points outside the volumetric BBox
+        ########################################################################
+        if self.config.sample_in_bbox:
+            # Remove points in the volume that are outside
+            # of the bbox area.
+            min_check = volume_coordinates[:] > c_min
+            max_check = volume_coordinates[:] < c_max
+
+            ids_in_bbox = min_check & max_check
+            ids_in_bbox = ids_in_bbox.all(dim=1)
+
+            volume_coordinates = volume_coordinates[ids_in_bbox]
+            if volume_fields is not None:
                 volume_fields = volume_fields[ids_in_bbox]
 
-            # Generate a grid of specified resolution to map the bounding box
-            # The grid is used for capturing structured geometry features and SDF representation of geometry
-            grid = create_grid(c_max, c_min, [nx, ny, nz])
-            grid_reshaped = grid.reshape(nx * ny * nz, 3)
-
-            # SDF calculation on the grid using WARP
-            sdf_grid, _ = signed_distance_field(
-                _convert_array_to_torch(stl_vertices),
-                _convert_array_to_torch(mesh_indices_flattened),
-                _convert_array_to_torch(grid_reshaped),
-                use_sign_winding_number=True,
+        ########################################################################
+        # Apply sampling to the volume coordinates and fields
+        ########################################################################
+
+        if self.config.sampling:
+            # Generate a series of idx to sample the volume
+            # without replacement
+            volume_coordinates_sampled, idx_volume = shuffle_array(
+                volume_coordinates, self.config.volume_points_sample
             )
-            sdf_grid = sdf_grid.reshape((nx, ny, nz))
-            sdf_grid = _convert_torch_to_array(sdf_grid, self.array_provider)
+            volume_coordinates_sampled = volume_coordinates[idx_volume]
+            # In case too few points are in the sampled data (because the
+            # inputs were too few), pad the outputs:
+            if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample:
+                padding_size = (
+                    self.config.volume_points_sample
+                    - volume_coordinates_sampled.shape[0]
+                )
 
-            if self.config.sampling:
-                volume_coordinates_sampled, idx_volume = shuffle_array(
-                    volume_coordinates, self.config.volume_points_sample
+                volume_coordinates_sampled = torch.nn.functional.pad(
+                    volume_coordinates_sampled,
+                    (0, 0, 0, 0, 0, padding_size),
+                    mode="constant",
+                    value=-10.0,
                 )
-                if (
-                    volume_coordinates_sampled.shape[0]
-                    < self.config.volume_points_sample
-                ):
-                    volume_coordinates_sampled = pad(
-                        volume_coordinates_sampled,
-                        self.config.volume_points_sample,
-                        pad_value=-10.0,
-                    )
+
+            # Apply the same sampling to the targets, too:
+            if volume_fields is not None:
                 volume_fields = volume_fields[idx_volume]
-                volume_coordinates = volume_coordinates_sampled
 
-            sdf_nodes, sdf_node_closest_point = signed_distance_field(
-                _convert_array_to_torch(stl_vertices),
-                _convert_array_to_torch(mesh_indices_flattened),
-                _convert_array_to_torch(volume_coordinates),
-                use_sign_winding_number=True,
-            )
-            sdf_nodes = _convert_torch_to_array(sdf_nodes, self.array_provider)
-            sdf_node_closest_point = _convert_torch_to_array(
-                sdf_node_closest_point, self.array_provider
+            volume_coordinates = volume_coordinates_sampled
+
+        ########################################################################
+        # Apply normalization to the coordinates, if desired:
+        ########################################################################
+        if self.config.normalize_coordinates:
+            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
+            grid = normalize(volume_grid, c_max, c_min)
+            # This is used later in the SDF, apply the same scaling to the mesh
+            # coordinates:
+            normed_vertices = normalize(stl_vertices, c_max, c_min)
+        else:
+            grid = volume_grid
+            normed_vertices = stl_vertices
+
+        ########################################################################
+        # Apply scaling to the targets, if desired:
+        ########################################################################
+        if self.config.scaling_type is not None and volume_fields is not None:
+            volume_fields = self.scale_model_targets(
+                volume_fields, self.config.volume_factors
             )
 
-            # TODO - is this needed?
-            sdf_nodes = xp.asarray(sdf_nodes)
-            sdf_node_closest_point = xp.asarray(sdf_node_closest_point)
+        ########################################################################
+        # Compute Signed Distance Function for volumetric quantities
+        # Note - the SDF happens here, after volume data processing finishes,
+        # because we need to use the (maybe) normalized volume coordinates and grid
+        ########################################################################
+
+        # SDF calculation on the volume grid using WARP
+        sdf_grid, _ = signed_distance_field(
+            normed_vertices,
+            stl_indices,
+            grid,
+            use_sign_winding_number=True,
+        )
 
-            sdf_nodes = sdf_nodes.reshape((-1, 1))
+        # Get the SDF of all the selected volume coordinates,
+        # And keep the closest point to each one.
+        sdf_nodes, sdf_node_closest_point = signed_distance_field(
+            normed_vertices,
+            stl_indices,
+            volume_coordinates,
+            use_sign_winding_number=True,
+        )
+        sdf_nodes = sdf_nodes.reshape((-1, 1))
 
-            if self.config.normalize_coordinates:
-                volume_coordinates = normalize(volume_coordinates, c_max, c_min)
-                grid = normalize(grid, c_max, c_min)
-                sdf_grid = normalize(sdf_grid, xp.amax(grid), xp.amin(grid))
-                sdf_nodes = normalize(sdf_nodes, xp.amax(grid), xp.amin(grid))
-                sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
-                center_of_mass_normalized = normalize(xp.asarray(center_of_mass), c_max, c_min)
-            else:
-                center_of_mass_normalized = xp.asarray(center_of_mass)
-        
-            pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
-            pos_normals_com_vol = volume_coordinates - center_of_mass_normalized
-
-            if self.config.scaling_type is not None:
-                if self.config.volume_factors is not None:
-                    if self.config.scaling_type == "mean_std_scaling":
-                        vol_mean = self.config.volume_factors[0]
-                        vol_std = self.config.volume_factors[1]
-                        volume_fields = standardize(volume_fields, vol_mean, vol_std)
-                    elif self.config.scaling_type == "min_max_scaling":
-                        vol_min = xp.asarray(self.config.volume_factors[1])
-                        vol_max = xp.asarray(self.config.volume_factors[0])
-                        volume_fields = normalize(volume_fields, vol_max, vol_min)
-
-            vol_grid_max_min = xp.stack([c_min, c_max])
+        # Use the closest point from the mesh to compute the volume encodings:
+        pos_normals_closest_vol, pos_normals_com_vol = self.calculate_volume_encoding(
+            c_min, c_max, volume_coordinates, sdf_node_closest_point, center_of_mass
+        )
+
+        return_dict = {
+            "volume_mesh_centers": volume_coordinates,
+            "sdf_nodes": sdf_nodes,
+            "grid": grid,
+            "sdf_grid": sdf_grid,
+            "pos_volume_closest": pos_normals_closest_vol,
+            "pos_volume_center_of_mass": pos_normals_com_vol,
+        }
+
+        if volume_fields is not None:
+            return_dict["volume_fields"] = volume_fields
+
+        return return_dict
+
+    def calculate_volume_encoding(
+        self,
+        c_min: torch.Tensor,
+        c_max: torch.Tensor,
+        volume_coordinates: torch.Tensor,
+        sdf_node_closest_point: torch.Tensor,
+        center_of_mass: torch.Tensor,
+    ):
+        if self.config.normalize_coordinates:
+            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
+            sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
+            center_of_mass = normalize(center_of_mass, c_max, c_min)
+
+        pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
+        pos_normals_com_vol = volume_coordinates - center_of_mass
+
+        return pos_normals_closest_vol, pos_normals_com_vol
+
+    @torch.no_grad()
+    def process_data(self, data_dict):
+        # Start building the preprocessed return dict:
+        return_dict = {
+            "global_params_values": data_dict["global_params_values"],
+            "global_params_reference": data_dict["global_params_reference"],
+        }
 
+        ########################################################################
+        # Process the core STL information
+        ########################################################################
+
+        # This function gets information about the surface scale,
+        # and decides what the surface grid will be:
+
+        stl_coordinates = data_dict["stl_coordinates"]
+
+        s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids(
+            stl_coordinates
+        )
+
+        if isinstance(stl_coordinates, ShardTensor):
+            mesh = stl_coordinates._spec.mesh
+            # Then, replicate the bounding box along the mesh if present.
+            s_max = scatter_tensor(
+                s_max,
+                0,
+                mesh=mesh,
+                placements=[
+                    Replicate(),
+                ],
+                global_shape=s_max.shape,
+                dtype=s_max.dtype,
+                requires_grad=False,
+            )
+            s_min = scatter_tensor(
+                s_min,
+                0,
+                mesh=mesh,
+                placements=[
+                    Replicate(),
+                ],
+                global_shape=s_min.shape,
+                dtype=s_min.dtype,
+                requires_grad=False,
+            )
+            surf_grid = scatter_tensor(
+                surf_grid,
+                0,
+                mesh=mesh,
+                placements=[
+                    Replicate(),
+                ],
+                global_shape=surf_grid.shape,
+                dtype=surf_grid.dtype,
+                requires_grad=False,
+            )
+
+        return_dict["surf_grid"] = surf_grid
+
+        # We always need to calculate the SDF on the surface grid:
+        # This is for the SDF Later:
+        if self.config.normalize_coordinates:
+            normed_vertices = normalize(data_dict["stl_coordinates"], s_max, s_min)
         else:
-            pos_normals_closest_vol = None
-            pos_normals_com_vol = None
-            sdf_nodes = None
-            sdf_grid = None
-            grid = None
-            vol_grid_max_min = None
+            normed_vertices = data_dict["stl_coordinates"]
 
-        return_dict.update(
-            {
-                "pos_volume_closest": pos_normals_closest_vol,
-                "pos_volume_center_of_mass": pos_normals_com_vol,
-                "grid": grid,
-                "sdf_grid": sdf_grid,
-                "sdf_nodes": sdf_nodes,
-                "volume_fields": volume_fields,
-                "volume_mesh_centers": volume_coordinates,
-                "volume_min_max": vol_grid_max_min,
-            }
+        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
+        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
+
+        # Compute signed distance function for the surface grid:
+        sdf_surf_grid, _ = signed_distance_field(
+            mesh_vertices=normed_vertices,
+            mesh_indices=mesh_indices_flattened,
+            input_points=surf_grid,
+            use_sign_winding_number=True,
         )
+        return_dict["sdf_surf_grid"] = sdf_surf_grid
 
-        return return_dict
+        # Store this only if normalization is active:
+        if self.config.normalize_coordinates:
+            return_dict["surface_min_max"] = torch.stack([s_min, s_max])
 
-    @profile
-    def preprocess_data(self, data_dict):
-        (
-            return_dict,
-            s_min,
-            s_max,
-            mesh_indices_flattened,
-            stl_vertices,
-            center_of_mass,
-        ) = self.preprocess_combined(data_dict)
+        # This is a center of mass computation for the stl surface,
+        # using the size of each mesh point as weight.
+        center_of_mass = calculate_center_of_mass(
+            data_dict["stl_centers"], data_dict["stl_areas"]
+        )
+
+        # This will apply downsampling if needed to the geometry coordinates
+        geom_centers = self.downsample_geometry(
+            stl_vertices=data_dict["stl_coordinates"],
+        )
+        return_dict["geometry_coordinates"] = geom_centers
 
+        ########################################################################
+        # Determine the volumetric bounds of the data:
+        ########################################################################
+        # Compute the min/max for volume an the unnomralized grid:
+        c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids(s_min, s_max)
+
+        # For volume data, we store this only if normalizing coordinates:
         if self.model_type == "volume" or self.model_type == "combined":
-            volume_dict = self.preprocess_volume(
-                data_dict,
-                return_dict,
-                s_min,
-                s_max,
-                mesh_indices_flattened,
-                stl_vertices,
-                center_of_mass,
+            if self.config.normalize_coordinates:
+                return_dict["volume_min_max"] = torch.stack([c_min, c_max])
+
+        if self.model_type == "volume" or self.model_type == "combined":
+            volume_fields_raw = (
+                data_dict["volume_fields"] if "volume_fields" in data_dict else None
+            )
+            volume_dict = self.process_volume(
+                c_min,
+                c_max,
+                volume_coordinates=data_dict["volume_mesh_centers"],
+                volume_grid=volume_grid,
+                center_of_mass=center_of_mass,
+                stl_vertices=data_dict["stl_coordinates"],
+                stl_indices=mesh_indices_flattened,
+                volume_fields=volume_fields_raw,
             )
 
             return_dict.update(volume_dict)
 
         if self.model_type == "surface" or self.model_type == "combined":
-            surface_dict = self.preprocess_surface(
-                data_dict, return_dict, center_of_mass, s_min, s_max
+            surface_fields_raw = (
+                data_dict["surface_fields"] if "surface_fields" in data_dict else None
             )
+            surface_dict = self.process_surface(
+                s_min,
+                s_max,
+                c_min,
+                c_max,
+                center_of_mass=center_of_mass,
+                surf_grid=surf_grid,
+                surface_coordinates=data_dict["surface_mesh_centers"],
+                surface_normals=data_dict["surface_normals"],
+                surface_sizes=data_dict["surface_areas"],
+                stl_vertices=data_dict["stl_coordinates"],
+                stl_indices=mesh_indices_flattened,
+                surface_fields=surface_fields_raw,
+            )
+
             return_dict.update(surface_dict)
 
         return return_dict
 
-    @profile
+    def scale_model_targets(
+        self, fields: torch.Tensor, factors: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Scale the model targets based on the configured scaling factors.
+        """
+        if self.config.scaling_type == "mean_std_scaling":
+            field_mean = self.config.volume_factors[0]
+            field_std = self.config.volume_factors[1]
+            return standardize(fields, field_mean, field_std)
+        elif self.config.scaling_type == "min_max_scaling":
+            field_min = self.config.volume_factors[1]
+            field_max = self.config.volume_factors[0]
+            return normalize(fields, field_max, field_min)
+
+    def unscale_model_outputs(
+        self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None
+    ):
+        """
+        Unscale the model outputs based on the configured scaling factors.
+
+        The unscaling is included here to make it a consistent interface regardless
+        of the scaling factors and type used.
+
+        """
+
+        if volume_fields is not None:
+            if self.config.scaling_type == "mean_std_scaling":
+                vol_mean = self.config.volume_factors[0]
+                vol_std = self.config.volume_factors[1]
+                volume_fields = unstandardize(volume_fields, vol_mean, vol_std)
+            elif self.config.scaling_type == "min_max_scaling":
+                vol_min = self.config.volume_factors[1]
+                vol_max = self.config.volume_factors[0]
+                volume_fields = unnormalize(volume_fields, vol_max, vol_min)
+        if surface_fields is not None:
+            if self.config.scaling_type == "mean_std_scaling":
+                surf_mean = self.config.surface_factors[0]
+                surf_std = self.config.surface_factors[1]
+                surface_fields = unstandardize(surface_fields, surf_mean, surf_std)
+            elif self.config.scaling_type == "min_max_scaling":
+                surf_min = self.config.surface_factors[1]
+                surf_max = self.config.surface_factors[0]
+                surface_fields = unnormalize(surface_fields, surf_max, surf_min)
+
+        return volume_fields, surface_fields
+
+    def set_dataset(self, dataset: Iterable) -> None:
+        """
+        Pass a dataset to the datapipe to enable iterating over both in one pass.
+        """
+        self.dataset = dataset
+
+    def __len__(self):
+        if self.dataset is not None:
+            return len(self.dataset)
+        else:
+            return 0
+
     def __getitem__(self, idx):
         """
         Function for fetching and processing a single file's data.
 
         Domino, in general, expects one example per file and the files
         are relatively large due to the mesh size.
+
+        Requires the user to have set a dataset via `set_dataset`.
         """
+        if self.dataset is None:
+            raise ValueError("Dataset is not present")
 
-        if self.config.deterministic:
-            self.array_provider.random.seed(idx)
-            # But also always set numpy:
-            np.random.seed(idx)
+        # Get the data from the dataset.
+        # Under the hood, this may be fetching preloaded data.
+        data_dict = self.dataset[idx]
 
-        index = self.indices[idx]
-        cfd_filename = self.filenames[index]
+        return self.__call__(data_dict)
 
-        # Get all of the data:
-        filepath = self.config.data_path / cfd_filename
+    def __call__(self, data_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
+        """
+        Process the incoming data dictionary.
+        - Processes the data
+        - moves it to GPU
+        - adds a batch dimension
 
-        if filepath.suffix == ".zarr":
-            data_dict = self.read_data_zarr(filepath)
-        elif filepath.suffix == ".npz":
-            data_dict = self.read_data_npz(filepath)
-        elif filepath.suffix == ".npy":
-            data_dict = self.read_data_npy(filepath)
-        else:
-            raise ValueError(f"Unsupported file extension: {filepath.suffix}")
-
-        return_dict = self.preprocess_data(data_dict)
-
-        # return only pytorch tensor objects.
-        # If returning on CPU (but processed on GPU), convert below.
-        # This assumes we keep the data on the device it's on.
-        for key, value in return_dict.items():
-            if isinstance(value, np.ndarray):
-                return_dict[key] = torch.from_numpy(value)
-            elif isinstance(value, cp.ndarray):
-                return_dict[key] = torch.utils.dlpack.from_dlpack(value.toDlpack())
-
-        if self.config.gpu_output:
-            # Make sure this is all on the GPU.
-            # Everything here should be a torch tensor now.
-            for key, value in return_dict.items():
-                if isinstance(value, torch.Tensor) and not value.is_cuda:
-                    return_dict[key] = value.pin_memory().to(self.device)
-        else:
-            # Make sure everything is on the CPU.
-            for key, value in return_dict.items():
-                if isinstance(value, torch.Tensor) and value.is_cuda:
-                    return_dict[key] = value.cpu()
+        Args:
+            data_dict: Dictionary containing the data to process as torch.Tensors.
 
-        return return_dict
+        Returns:
+            Dictionary containing the processed data as torch.Tensors.
 
+        """
+        data_dict = self.process_data(data_dict)
 
-@profile
-def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None:
-    model_type = cfg.model.model_type
-    max_scaling_factor_files = 20
-
-    if model_type == "volume" or model_type == "combined":
-        vol_save_path = os.path.join(cfg.project_dir, "volume_scaling_factors.npy")
-        if not os.path.exists(vol_save_path):
-            print("Computing volume scaling factors")
-            volume_variable_names = list(cfg.variables.volume.solution.keys())
-
-            fm_dict = DoMINODataPipe(
-                input_path,
-                phase="train",
-                grid_resolution=cfg.model.interp_res,
-                volume_variables=volume_variable_names,
-                surface_variables=None,
-                normalize_coordinates=True,
-                sampling=False,
-                sample_in_bbox=True,
-                volume_points_sample=cfg.model.volume_points_sample,
-                geom_points_sample=cfg.model.geom_points_sample,
-                model_type=cfg.model.model_type,
-                bounding_box_dims=cfg.data.bounding_box,
-                bounding_box_dims_surf=cfg.data.bounding_box_surface,
-                compute_scaling_factors=True,
-                gpu_preprocessing=True,
-                gpu_output=True,
-            )
+        # If the data is not on the target device, put it there:
+        for key, value in data_dict.items():
+            if value.device != self.output_device:
+                data_dict[key] = value.to(self.output_device)
+
+        # Add a batch dimension to the data_dict
+        data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()}
 
-            # Calculate mean
-            if cfg.model.normalization == "mean_std_scaling":
-                for j in range(len(fm_dict)):
-                    print("On iteration {j}")
-                    d_dict = fm_dict[j]
-                    vol_fields = d_dict["volume_fields"]
-
-                    if vol_fields is not None:
-                        if j == 0:
-                            vol_fields_sum = np.mean(vol_fields, 0)
-                        else:
-                            vol_fields_sum += np.mean(vol_fields, 0)
-                    else:
-                        vol_fields_sum = 0.0
-
-                vol_fields_mean = vol_fields_sum / len(fm_dict)
-
-                for j in range(len(fm_dict)):
-                    print("On iteration {j} again")
-                    d_dict = fm_dict[j]
-                    vol_fields = d_dict["volume_fields"]
-
-                    if vol_fields is not None:
-                        if j == 0:
-                            vol_fields_sum_square = np.mean(
-                                (vol_fields - vol_fields_mean) ** 2.0, 0
-                            )
-                        else:
-                            vol_fields_sum_square += np.mean(
-                                (vol_fields - vol_fields_mean) ** 2.0, 0
-                            )
-                    else:
-                        vol_fields_sum_square = 0.0
-
-                vol_fields_std = np.sqrt(vol_fields_sum_square / len(fm_dict))
-
-                vol_scaling_factors = [vol_fields_mean, vol_fields_std]
-
-            if cfg.model.normalization == "min_max_scaling":
-                for j in range(len(fm_dict)):
-                    print(f"Min max scaling on iteration {j}")
-                    d_dict = fm_dict[j]
-                    vol_fields = d_dict["volume_fields"]
-
-                    if vol_fields.device.type == "cuda":
-                        xp = cp
-                        vol_fields = vol_fields.cuda()
-                        vol_fields = cp.from_dlpack(vol_fields)
-                    else:
-                        xp = np
-                        vol_fields = vol_fields.cpu().numpy()
-
-                    if vol_fields is not None:
-                        vol_mean = xp.mean(vol_fields, 0)
-                        vol_std = xp.std(vol_fields, 0)
-                        vol_idx = mean_std_sampling(
-                            vol_fields, vol_mean, vol_std, tolerance=12.0
-                        )
-                        vol_fields_sampled = xp.delete(vol_fields, vol_idx, axis=0)
-                        if j == 0:
-                            vol_fields_max = xp.amax(vol_fields_sampled, 0)
-                            vol_fields_min = xp.amin(vol_fields_sampled, 0)
-                        else:
-                            vol_fields_max1 = xp.amax(vol_fields_sampled, 0)
-                            vol_fields_min1 = xp.amin(vol_fields_sampled, 0)
-
-                            for k in range(vol_fields.shape[-1]):
-                                if vol_fields_max1[k] > vol_fields_max[k]:
-                                    vol_fields_max[k] = vol_fields_max1[k]
-
-                                if vol_fields_min1[k] < vol_fields_min[k]:
-                                    vol_fields_min[k] = vol_fields_min1[k]
-                    else:
-                        vol_fields_max = 0.0
-                        vol_fields_min = 0.0
-
-                    if j > max_scaling_factor_files:
-                        break
-                vol_scaling_factors = [vol_fields_max, vol_fields_min]
-
-            for i, item in enumerate(vol_scaling_factors):
-                if isinstance(item, cp.ndarray):
-                    vol_scaling_factors[i] = item.get()
-
-            np.save(vol_save_path, vol_scaling_factors)
-
-    if model_type == "surface" or model_type == "combined":
-        surf_save_path = os.path.join(cfg.project_dir, "surface_scaling_factors.npy")
-
-        if not os.path.exists(surf_save_path):
-            print("Computing surface scaling factors")
-            volume_variable_names = list(cfg.variables.volume.solution.keys())
-            surface_variable_names = list(cfg.variables.surface.solution.keys())
-
-            fm_dict = DoMINODataPipe(
-                input_path,
-                phase="train",
-                grid_resolution=cfg.model.interp_res,
-                volume_variables=None,
-                surface_variables=surface_variable_names,
-                normalize_coordinates=True,
-                sampling=False,
-                sample_in_bbox=True,
-                volume_points_sample=cfg.model.volume_points_sample,
-                geom_points_sample=cfg.model.geom_points_sample,
-                model_type=cfg.model.model_type,
-                bounding_box_dims=cfg.data.bounding_box,
-                bounding_box_dims_surf=cfg.data.bounding_box_surface,
-                compute_scaling_factors=True,
+        return data_dict
+
+    def __iter__(self):
+        if self.dataset is None:
+            raise ValueError(
+                "Dataset is not present, can not use the datapipe as an iterator."
             )
 
-            # Calculate mean
-            if cfg.model.normalization == "mean_std_scaling":
-                for j in range(len(fm_dict)):
-                    print(f"Mean std scaling on iteration {j}")
-                    d_dict = fm_dict[j]
-                    surf_fields = d_dict["surface_fields"].cpu().numpy()
-
-                    if surf_fields is not None:
-                        if j == 0:
-                            surf_fields_sum = np.mean(surf_fields, 0)
-                        else:
-                            surf_fields_sum += np.mean(surf_fields, 0)
-                    else:
-                        surf_fields_sum = 0.0
-
-                surf_fields_mean = surf_fields_sum / len(fm_dict)
-
-                for j in range(len(fm_dict)):
-                    print(f"Mean std scaling on iteration {j} again")
-                    d_dict = fm_dict[j]
-                    surf_fields = d_dict["surface_fields"]
-
-                    if surf_fields is not None:
-                        if j == 0:
-                            surf_fields_sum_square = np.mean(
-                                (surf_fields - surf_fields_mean) ** 2.0, 0
-                            )
-                        else:
-                            surf_fields_sum_square += np.mean(
-                                (surf_fields - surf_fields_mean) ** 2.0, 0
-                            )
-                    else:
-                        surf_fields_sum_square = 0.0
-
-                surf_fields_std = np.sqrt(surf_fields_sum_square / len(fm_dict))
-
-                surf_scaling_factors = [surf_fields_mean, surf_fields_std]
-
-            if cfg.model.normalization == "min_max_scaling":
-                for j in range(len(fm_dict)):
-                    print(f"Min max scaling on iteration {j}")
-                    d_dict = fm_dict[j]
-                    surf_fields = d_dict["surface_fields"]
-                    if surf_fields.device.type == "cuda":
-                        xp = cp
-                        surf_fields = surf_fields.cuda()
-                        surf_fields = cp.from_dlpack(surf_fields)
-                    else:
-                        xp = np
-                        surf_fields = surf_fields.cpu().numpy()
-
-                    if surf_fields is not None:
-                        surf_mean = xp.mean(surf_fields, 0)
-                        surf_std = xp.std(surf_fields, 0)
-                        surf_idx = mean_std_sampling(
-                            surf_fields, surf_mean, surf_std, tolerance=12.0
-                        )
-                        surf_fields_sampled = xp.delete(surf_fields, surf_idx, axis=0)
-                        if j == 0:
-                            surf_fields_max = xp.amax(surf_fields_sampled, 0)
-                            surf_fields_min = xp.amin(surf_fields_sampled, 0)
-                        else:
-                            surf_fields_max1 = xp.amax(surf_fields_sampled, 0)
-                            surf_fields_min1 = xp.amin(surf_fields_sampled, 0)
-
-                            for k in range(surf_fields.shape[-1]):
-                                if surf_fields_max1[k] > surf_fields_max[k]:
-                                    surf_fields_max[k] = surf_fields_max1[k]
-
-                                if surf_fields_min1[k] < surf_fields_min[k]:
-                                    surf_fields_min[k] = surf_fields_min1[k]
-                    else:
-                        surf_fields_max = 0.0
-                        surf_fields_min = 0.0
-
-                    if j > max_scaling_factor_files:
-                        break
-
-                surf_scaling_factors = [surf_fields_max, surf_fields_min]
-
-                for i, item in enumerate(surf_scaling_factors):
-                    if isinstance(item, cp.ndarray):
-                        surf_scaling_factors[i] = item.get()
-
-            np.save(surf_save_path, surf_scaling_factors)
+        for i, batch in enumerate(self.dataset):
+            yield self.__call__(batch)
+
+
+def compute_scaling_factors(
+    cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """
+    Using the dataset at the path, compute the mean, std, min, and max of the target keys.
+
+    Args:
+        cfg: Hydra configuration object containing all parameters
+        input_path: Path to the dataset to load.
+        target_keys: List of keys to compute the mean, std, min, and max of.
+        use_cache: (deprecated) This argument has no effect.
+    """
+
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+
+    dataset = DrivaerMLDataset(
+        data_dir=input_path,
+        keys_to_read=target_keys,
+        keys_to_read_if_available={},
+        output_device=device,
+    )
+
+    mean, std, min_val, max_val = compute_mean_std_min_max(
+        dataset,
+        field_keys=target_keys,
+    )
+
+    return mean, std, min_val, max_val
 
 
 class CachedDoMINODataset(Dataset):
@@ -1317,7 +1024,8 @@ def __getitem__(self, idx):
         filepath = self.data_path / cfd_filename
         result = np.load(filepath, allow_pickle=True).item()
         result = {
-            k: v.numpy() if isinstance(v, Tensor) else v for k, v in result.items()
+            k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v
+            for k, v in result.items()
         }
 
         nvtx.range_pop()
@@ -1349,10 +1057,10 @@ def __getitem__(self, idx):
         # Sample surface points if present
         if "surface_mesh_centers" in result and self.surface_points:
             if self.surface_sampling_algorithm == "area_weighted":
-                coords_sampled, idx_surface = area_weighted_shuffle_array(
-                    result["surface_mesh_centers"],
-                    self.surface_points,
-                    result["surface_areas"],
+                coords_sampled, idx_surface = shuffle_array(
+                    points=result["surface_mesh_centers"],
+                    n_points=self.surface_points,
+                    weights=result["surface_areas"],
                 )
             else:
                 coords_sampled, idx_surface = shuffle_array(
@@ -1399,12 +1107,28 @@ def __getitem__(self, idx):
 
 
 def create_domino_dataset(
-    cfg, phase, volume_variable_names, surface_variable_names, vol_factors, surf_factors
+    cfg: DictConfig,
+    phase: Literal["train", "val", "test"],
+    keys_to_read: list[str],
+    keys_to_read_if_available: dict[str, torch.Tensor],
+    vol_factors: list[float],
+    surf_factors: list[float],
+    normalize_coordinates: bool = True,
+    sample_in_bbox: bool = True,
+    sampling: bool = True,
+    device_mesh: torch.distributed.DeviceMesh | None = None,
+    placements: dict[str, torch.distributed.tensor.Placement] | None = None,
 ):
+    model_type = cfg.model.model_type
     if phase == "train":
         input_path = cfg.data.input_dir
+        dataloader_cfg = cfg.train.dataloader
     elif phase == "val":
         input_path = cfg.data.input_dir_val
+        dataloader_cfg = cfg.val.dataloader
+    elif phase == "test":
+        input_path = cfg.eval.test_path
+        dataloader_cfg = None
     else:
         raise ValueError(f"Invalid phase {phase}")
 
@@ -1412,7 +1136,7 @@ def create_domino_dataset(
         return CachedDoMINODataset(
             input_path,
             phase=phase,
-            sampling=True,
+            sampling=sampling,
             volume_points_sample=cfg.model.volume_points_sample,
             surface_points_sample=cfg.model.surface_points_sample,
             geom_points_sample=cfg.model.geom_points_sample,
@@ -1420,6 +1144,15 @@ def create_domino_dataset(
             surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
         )
     else:
+        # The dataset path works in two pieces:
+        # There is a core "dataset" which is loading data and moving to GPU
+        # And there is the preprocess step, here.
+
+        # Optionally, and for backwards compatibility, the preprocess
+        # object can accept a dataset which will enable it as an iterator.
+        # The iteration function will loop over the dataset, preprocess the
+        # output, and return it.
+
         overrides = {}
         if hasattr(cfg.data, "gpu_preprocessing"):
             overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing
@@ -1427,22 +1160,48 @@ def create_domino_dataset(
         if hasattr(cfg.data, "gpu_output"):
             overrides["gpu_output"] = cfg.data.gpu_output
 
-        return DoMINODataPipe(
+        dm = DistributedManager()
+
+        if cfg.data.gpu_preprocessing:
+            device = dm.device
+            consumer_stream = torch.cuda.default_stream()
+        else:
+            device = torch.device("cpu")
+            consumer_stream = None
+
+        if dataloader_cfg is not None:
+            preload_depth = dataloader_cfg.preload_depth
+            pin_memory = dataloader_cfg.pin_memory
+        else:
+            preload_depth = 2
+            pin_memory = False
+
+        dataset = DrivaerMLDataset(
+            data_dir=input_path,
+            keys_to_read=keys_to_read,
+            keys_to_read_if_available=keys_to_read_if_available,
+            output_device=device,
+            preload_depth=preload_depth,
+            pin_memory=pin_memory,
+            device_mesh=device_mesh,
+            placements=placements,
+            consumer_stream=consumer_stream,
+        )
+
+        datapipe = DoMINODataPipe(
             input_path,
             phase=phase,
             grid_resolution=cfg.model.interp_res,
-            volume_variables=volume_variable_names,
-            surface_variables=surface_variable_names,
-            normalize_coordinates=True,
-            sampling=True,
-            sample_in_bbox=True,
+            normalize_coordinates=normalize_coordinates,
+            sampling=sampling,
+            sample_in_bbox=sample_in_bbox,
             volume_points_sample=cfg.model.volume_points_sample,
             surface_points_sample=cfg.model.surface_points_sample,
             geom_points_sample=cfg.model.geom_points_sample,
             volume_factors=vol_factors,
             surface_factors=surf_factors,
             scaling_type=cfg.model.normalization,
-            model_type=cfg.model.model_type,
+            model_type=model_type,
             bounding_box_dims=cfg.data.bounding_box,
             bounding_box_dims_surf=cfg.data.bounding_box_surface,
             num_surface_neighbors=cfg.model.num_neighbors_surface,
@@ -1450,6 +1209,10 @@ def create_domino_dataset(
             **overrides,
         )
 
+        datapipe.set_dataset(dataset)
+
+        return datapipe
+
 
 if __name__ == "__main__":
     fm_data = DoMINODataPipe(
diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py
deleted file mode 100644
index d953e1c9df..0000000000
--- a/physicsnemo/datapipes/cae/domino_datapipe2.py
+++ /dev/null
@@ -1,1222 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This code provides the datapipe for reading the processed npy files,
-generating multi-res grids, calculating signed distance fields,
-sampling random points in the volume and on surface,
-normalizing fields and returning the output tensors as a dictionary.
-
-This datapipe also non-dimensionalizes the fields, so the order in which the variables should
-be fixed: velocity, pressure, turbulent viscosity for volume variables and
-pressure, wall-shear-stress for surface variables. The different parameters such as
-variable names, domain resolution, sampling size etc. are configurable in config.yaml.
-"""
-
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Iterable, Literal, Optional, Protocol, Sequence, Union
-
-import numpy as np
-import torch
-import torch.cuda.nvtx as nvtx
-from omegaconf import DictConfig
-from torch.distributed.tensor.placement_types import Replicate
-from torch.utils.data import Dataset
-
-from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
-    DrivaerMLDataset,
-    compute_mean_std_min_max,
-)
-from physicsnemo.distributed import DistributedManager
-from physicsnemo.distributed.shard_tensor import ShardTensor, scatter_tensor
-from physicsnemo.utils.domino.utils import (
-    calculate_center_of_mass,
-    create_grid,
-    get_filenames,
-    normalize,
-    pad,
-    shuffle_array,
-    standardize,
-    unnormalize,
-    unstandardize,
-)
-from physicsnemo.utils.neighbors import knn
-from physicsnemo.utils.profiling import profile
-from physicsnemo.utils.sdf import signed_distance_field
-
-
-class BoundingBox(Protocol):
-    """
-    Type definition for the required format of bounding box dimensions.
-    """
-
-    min: Sequence
-    max: Sequence
-
-
-@dataclass
-class DoMINODataConfig:
-    """Configuration for DoMINO dataset processing pipeline.
-
-    Attributes:
-        data_path: Path to the dataset to load.
-        phase: Which phase of data to load ("train", "val", or "test").
-        surface_variables: (Surface specific) Names of surface variables.
-        surface_points_sample: (Surface specific) Number of surface points to sample per batch.
-        num_surface_neighbors: (Surface specific) Number of surface neighbors to consider for nearest neighbors approach.
-        surface_sampling_algorithm: (Surface specific) Algorithm to use for surface sampling ("area_weighted" or "random").
-        surface_factors: (Surface specific) Non-dimensionalization factors for surface variables.
-            If set, and scaling_type is:
-            - min_max_scaling -> rescale surface_fields to the min/max set here
-            - mean_std_scaling -> rescale surface_fields to the mean and std set here.
-        bounding_box_dims_surf: (Surface specific) Dimensions of bounding box. Must be an object with min/max
-            attributes that are arraylike.
-        volume_variables: (Volume specific) Names of volume variables.
-        volume_points_sample: (Volume specific) Number of volume points to sample per batch.
-        volume_factors: (Volume specific) Non-dimensionalization factors for volume variables scaling.
-            If set, and scaling_type is:
-            - min_max_scaling -> rescale volume_fields to the min/max set here
-            - mean_std_scaling -> rescale volume_fields to the mean and std set here.
-        bounding_box_dims: (Volume specific) Dimensions of bounding box. Must be an object with min/max
-            attributes that are arraylike.
-        grid_resolution: Resolution of the latent grid.
-        normalize_coordinates: Whether to normalize coordinates based on min/max values.
-            For surfaces: uses s_min/s_max, defined from:
-            - Surface bounding box, if defined.
-            - Min/max of the stl_vertices
-            For volumes: uses c_min/c_max, defined from:
-            - Volume bounding_box if defined,
-            - 1.5x s_min/max otherwise, except c_min[2] = s_min[2] in this case
-        sample_in_bbox: Whether to sample points in a specified bounding box.
-            Uses the same min/max points as coordinate normalization.
-            Only performed if compute_scaling_factors is false.
-        sampling: Whether to downsample the full resolution mesh to fit in GPU memory.
-            Surface and volume sampling points are configured separately as:
-            - surface.points_sample
-            - volume.points_sample
-        geom_points_sample: Number of STL points sampled per batch.
-            Independent of volume.points_sample and surface.points_sample.
-        scaling_type: Scaling type for volume variables.
-            If used, will rescale the volume_fields and surface fields outputs.
-            Requires volume.factor and surface.factor to be set.
-        compute_scaling_factors: Whether to compute scaling factors.
-            Not available if caching.
-            Many preprocessing pieces are disabled if computing scaling factors.
-        caching: Whether this is for caching or serving.
-        deterministic: Whether to use a deterministic seed for sampling and random numbers.
-        gpu_preprocessing: Whether to do preprocessing on the GPU (False for CPU).
-        gpu_output: Whether to return output on the GPU as cupy arrays.
-            If False, returns numpy arrays.
-            You might choose gpu_preprocessing=True and gpu_output=False if caching.
-    """
-
-    data_path: Path | None
-    phase: Literal["train", "val", "test"]
-
-    # Surface-specific variables:
-    surface_variables: Optional[Sequence] = ("pMean", "wallShearStress")
-    surface_points_sample: int = 1024
-    num_surface_neighbors: int = 11
-    surface_sampling_algorithm: str = Literal["area_weighted", "random"]
-    surface_factors: Optional[Sequence] = None
-    bounding_box_dims_surf: Optional[Union[BoundingBox, Sequence]] = None
-
-    # Volume specific variables:
-    volume_variables: Optional[Sequence] = ("UMean", "pMean")
-    volume_points_sample: int = 1024
-    volume_factors: Optional[Sequence] = None
-    bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None
-
-    grid_resolution: Sequence = (256, 96, 64)
-    normalize_coordinates: bool = False
-    sample_in_bbox: bool = False
-    sampling: bool = False
-    geom_points_sample: int = 300000
-    scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None
-    compute_scaling_factors: bool = False
-    caching: bool = False
-    deterministic: bool = False
-    gpu_preprocessing: bool = True
-    gpu_output: bool = True
-
-    def __post_init__(self):
-        if self.data_path is not None:
-            # Ensure data_path is a Path object:
-            if isinstance(self.data_path, str):
-                self.data_path = Path(self.data_path)
-            self.data_path = self.data_path.expanduser()
-
-            if not self.data_path.exists():
-                raise ValueError(f"Path {self.data_path} does not exist")
-
-            if not self.data_path.is_dir():
-                raise ValueError(f"Path {self.data_path} is not a directory")
-
-        # Object if caching settings are impossible:
-        if self.caching:
-            if self.sampling:
-                raise ValueError("Sampling should be False for caching")
-            if self.compute_scaling_factors:
-                raise ValueError("Compute scaling factors should be False for caching")
-
-        if self.phase not in [
-            "train",
-            "val",
-            "test",
-        ]:
-            raise ValueError(
-                f"phase should be one of ['train', 'val', 'test'], got {self.phase}"
-            )
-        if self.scaling_type is not None:
-            if self.scaling_type not in [
-                "min_max_scaling",
-                "mean_std_scaling",
-            ]:
-                raise ValueError(
-                    f"scaling_type should be one of ['min_max_scaling', 'mean_std_scaling'], got {self.scaling_type}"
-                )
-
-
-##### TODO
-# - The SDF normalization here is based on using a normalized mesh and
-#   a normalized coordinate.  The alternate method is to normalize to the min/max of the grid.
-
-
-class DoMINODataPipe(Dataset):
-    """
-    Datapipe for DoMINO
-
-    Leverages a dataset for the actual reading of the data, and this
-    object is responsible for preprocessing the data.
-
-    """
-
-    def __init__(
-        self,
-        input_path,
-        model_type: Literal["surface", "volume", "combined"],
-        pin_memory: bool = False,
-        **data_config_overrides,
-    ):
-        # Perform config packaging and validation
-        self.config = DoMINODataConfig(data_path=input_path, **data_config_overrides)
-
-        # Set up the distributed manager:
-        if not DistributedManager.is_initialized():
-            DistributedManager.initialize()
-
-        dist = DistributedManager()
-
-        # Set devices for the preprocessing and IO target
-        self.preproc_device = (
-            dist.device if self.config.gpu_preprocessing else torch.device("cpu")
-        )
-        # The drivaer_ml_dataset will automatically target this device
-        # In an async transfer.
-        self.output_device = (
-            dist.device if self.config.gpu_output else torch.device("cpu")
-        )
-
-        # Model type determines whether we process surface, volume, or both.
-        self.model_type = model_type
-
-        # Update the arrays for bounding boxes:
-        if hasattr(self.config.bounding_box_dims, "max") and hasattr(
-            self.config.bounding_box_dims, "min"
-        ):
-            self.config.bounding_box_dims = [
-                torch.tensor(
-                    self.config.bounding_box_dims.max,
-                    device=self.preproc_device,
-                    dtype=torch.float32,
-                ),
-                torch.tensor(
-                    self.config.bounding_box_dims.min,
-                    device=self.preproc_device,
-                    dtype=torch.float32,
-                ),
-            ]
-            self.default_volume_grid = create_grid(
-                self.config.bounding_box_dims[0],
-                self.config.bounding_box_dims[1],
-                self.config.grid_resolution,
-            )
-
-        # And, do the surface bounding box if supplied:
-        if hasattr(self.config.bounding_box_dims_surf, "max") and hasattr(
-            self.config.bounding_box_dims_surf, "min"
-        ):
-            self.config.bounding_box_dims_surf = [
-                torch.tensor(
-                    self.config.bounding_box_dims_surf.max,
-                    device=self.preproc_device,
-                    dtype=torch.float32,
-                ),
-                torch.tensor(
-                    self.config.bounding_box_dims_surf.min,
-                    device=self.preproc_device,
-                    dtype=torch.float32,
-                ),
-            ]
-
-            self.default_surface_grid = create_grid(
-                self.config.bounding_box_dims_surf[0],
-                self.config.bounding_box_dims_surf[1],
-                self.config.grid_resolution,
-            )
-
-        # Ensure the volume and surface scaling factors are torch tensors
-        # and on the right device:
-        if self.config.volume_factors is not None:
-            self.config.volume_factors = torch.tensor(
-                self.config.volume_factors,
-                device=self.preproc_device,
-                dtype=torch.float32,
-            )
-        if self.config.surface_factors is not None:
-            self.config.surface_factors = torch.tensor(
-                self.config.surface_factors,
-                device=self.preproc_device,
-                dtype=torch.float32,
-            )
-
-        self.dataset = None
-
-    def compute_stl_scaling_and_surface_grids(
-        self,
-        stl_vertices: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        """
-        Compute the min and max for the defining mesh.
-
-        If the user supplies a bounding box, we use that.  Otherwise,
-        it's created dynamically from the min/max of the stl vertices.
-
-        The returned min/max and grid are used for surface data.
-        """
-
-        # Check the bounding box is not unit length
-
-        if self.config.bounding_box_dims_surf is not None:
-            s_max = self.config.bounding_box_dims_surf[0]
-            s_min = self.config.bounding_box_dims_surf[1]
-            surf_grid = self.default_surface_grid
-        else:
-            # Create the grid dynamically
-            s_min = torch.amin(stl_vertices, 0)
-            s_max = torch.amax(stl_vertices, 0)
-            surf_grid = create_grid(s_max, s_min, self.config.grid_resolution)
-
-        return s_min, s_max, surf_grid
-
-    def compute_volume_scaling_and_grids(
-        self, s_min: torch.Tensor, s_max: torch.Tensor
-    ):
-        """
-        Compute the min and max and grid for volume data.
-
-        If the user supplies a bounding box, we use that.  Otherwise,
-        it's created dynamically from the surface min/max.
-
-        This will be 2x longer in x and y and the same in z as the surface bounding box.
-        """
-
-        # Determine the volume min / max locations
-        if self.config.bounding_box_dims is not None:
-            c_max = self.config.bounding_box_dims[0]
-            c_min = self.config.bounding_box_dims[1]
-            volume_grid = self.default_volume_grid
-
-        else:
-            # Create the grid based on the surface grid
-            c_max = s_max + (s_max - s_min) / 2
-            c_min = s_min - (s_max - s_min) / 2
-            c_min[2] = s_min[2]
-            volume_grid = create_grid(c_max, c_min, self.config.grid_resolution)
-
-        return c_min, c_max, volume_grid
-
-    @profile
-    def downsample_geometry(
-        self,
-        stl_vertices,
-    ) -> torch.Tensor:
-        """
-        Downsample the geometry to the desired number of points.
-
-        Args:
-            stl_vertices: The vertices of the surface.
-        """
-
-        if self.config.sampling:
-            geometry_points = self.config.geom_points_sample
-
-            geometry_coordinates_sampled, idx_geometry = shuffle_array(
-                stl_vertices, geometry_points
-            )
-            if geometry_coordinates_sampled.shape[0] < geometry_points:
-                geometry_coordinates_sampled = pad(
-                    geometry_coordinates_sampled, geometry_points, pad_value=-100.0
-                )
-            geom_centers = geometry_coordinates_sampled
-        else:
-            geom_centers = stl_vertices
-
-        return geom_centers
-
-    def process_surface(
-        self,
-        s_min: torch.Tensor,
-        s_max: torch.Tensor,
-        c_min: torch.Tensor,
-        c_max: torch.Tensor,
-        *,  # Forcing the rest by keyword only since it's a long list ...
-        center_of_mass: torch.Tensor,
-        surf_grid: torch.Tensor,
-        surface_coordinates: torch.Tensor,
-        surface_normals: torch.Tensor,
-        surface_sizes: torch.Tensor,
-        stl_vertices: torch.Tensor,
-        stl_indices: torch.Tensor,
-        surface_fields: torch.Tensor | None,
-    ) -> dict[str, torch.Tensor]:
-        nx, ny, nz = self.config.grid_resolution
-
-        return_dict = {}
-
-        ########################################################################
-        # Remove any sizes <= 0:
-        ########################################################################
-        idx = surface_sizes > 0
-        surface_sizes = surface_sizes[idx]
-        surface_normals = surface_normals[idx]
-        surface_coordinates = surface_coordinates[idx]
-        if surface_fields is not None:
-            surface_fields = surface_fields[idx]
-
-        ########################################################################
-        # Reject surface points outside of the Bounding Box
-        # NOTE - this is using the VOLUME bounding box!
-        ########################################################################
-        if self.config.sample_in_bbox:
-            ids_min = surface_coordinates[:] > c_min
-            ids_max = surface_coordinates[:] < c_max
-
-            ids_in_bbox = ids_min & ids_max
-            ids_in_bbox = ids_in_bbox.all(dim=-1)
-
-            surface_coordinates = surface_coordinates[ids_in_bbox]
-            surface_normals = surface_normals[ids_in_bbox]
-            surface_sizes = surface_sizes[ids_in_bbox]
-            if surface_fields is not None:
-                surface_fields = surface_fields[ids_in_bbox]
-
-        ########################################################################
-        # Perform Down sampling of the surface fields.
-        # Note that we snapshot the full surface coordinates for
-        # use in the kNN in the next step.
-        ########################################################################
-
-        full_surface_coordinates = surface_coordinates
-        full_surface_normals = surface_normals
-        full_surface_sizes = surface_sizes
-
-        if self.config.sampling:
-            # Perform the down sampling:
-            if self.config.surface_sampling_algorithm == "area_weighted":
-                weights = surface_sizes
-            else:
-                weights = None
-
-            surface_coordinates_sampled, idx_surface = shuffle_array(
-                surface_coordinates,
-                self.config.surface_points_sample,
-                weights=weights,
-            )
-
-            if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample:
-                surface_coordinates_sampled = pad(
-                    surface_coordinates_sampled,
-                    self.config.surface_points_sample,
-                    pad_value=-10.0,
-                )
-
-            # Select out the sampled points for non-neighbor arrays:
-            if surface_fields is not None:
-                surface_fields = surface_fields[idx_surface]
-
-            # Subsample the normals and sizes:
-            surface_normals = surface_normals[idx_surface]
-            surface_sizes = surface_sizes[idx_surface]
-            # Update the coordinates to the sampled points:
-            surface_coordinates = surface_coordinates_sampled
-
-        ########################################################################
-        # Perform a kNN on the surface to find the neighbor information
-        ########################################################################
-        if self.config.num_surface_neighbors > 1:
-            # Perform the kNN:
-            neighbor_indices, neighbor_distances = knn(
-                points=full_surface_coordinates,
-                queries=surface_coordinates,
-                k=self.config.num_surface_neighbors,
-            )
-
-            # Pull out the neighbor elements.
-            # Note that `neighbor_indices` is the index into the original,
-            # full sized tensors (full_surface_coordinates, etc).
-            surface_neighbors = full_surface_coordinates[neighbor_indices][:, 1:]
-            surface_neighbors_normals = full_surface_normals[neighbor_indices][:, 1:]
-            surface_neighbors_sizes = full_surface_sizes[neighbor_indices][:, 1:]
-
-        # Better to normalize everything after the kNN and sampling
-        if self.config.normalize_coordinates:
-            surf_grid = normalize(surf_grid, s_max, s_min)
-            surface_coordinates = normalize(surface_coordinates, s_max, s_min)
-            surface_neighbors = normalize(surface_neighbors, s_max, s_min)
-            # Make sure to normalize the center of mass for the normals_com_surface calc
-            center_of_mass = normalize(center_of_mass, s_max, s_min)
-
-        pos_normals_com_surface = surface_coordinates - center_of_mass
-
-        ########################################################################
-        # Apply scaling to the targets, if desired:
-        ########################################################################
-        if self.config.scaling_type is not None and surface_fields is not None:
-            surface_fields = self.scale_model_targets(
-                surface_fields, self.config.surface_factors
-            )
-
-        return_dict.update(
-            {
-                "pos_surface_center_of_mass": pos_normals_com_surface,
-                "surface_mesh_centers": surface_coordinates,
-                "surface_mesh_neighbors": surface_neighbors,
-                "surface_normals": surface_normals,
-                "surface_neighbors_normals": surface_neighbors_normals,
-                "surface_areas": surface_sizes,
-                "surface_neighbors_areas": surface_neighbors_sizes,
-            }
-        )
-        if surface_fields is not None:
-            return_dict["surface_fields"] = surface_fields
-
-        return return_dict
-
-    def process_volume(
-        self,
-        c_min: torch.Tensor,
-        c_max: torch.Tensor,
-        volume_coordinates: torch.Tensor,
-        volume_grid: torch.Tensor,
-        center_of_mass: torch.Tensor,
-        stl_vertices: torch.Tensor,
-        stl_indices: torch.Tensor,
-        volume_fields: torch.Tensor | None,
-    ) -> dict[str, torch.Tensor]:
-        """
-        Preprocess the volume data.
-
-        First, if configured, we reject points not in the volume bounding box.
-
-        Next, if sampling is enabled, we sample the volume points and apply that
-        sampling to the ground truth too, if it's present.
-
-        """
-        ########################################################################
-        # Reject points outside the volumetric BBox
-        ########################################################################
-        if self.config.sample_in_bbox:
-            # Remove points in the volume that are outside
-            # of the bbox area.
-            min_check = volume_coordinates[:] > c_min
-            max_check = volume_coordinates[:] < c_max
-
-            ids_in_bbox = min_check & max_check
-            ids_in_bbox = ids_in_bbox.all(dim=1)
-
-            volume_coordinates = volume_coordinates[ids_in_bbox]
-            if volume_fields is not None:
-                volume_fields = volume_fields[ids_in_bbox]
-
-        ########################################################################
-        # Apply sampling to the volume coordinates and fields
-        ########################################################################
-
-        if self.config.sampling:
-            # Generate a series of idx to sample the volume
-            # without replacement
-            volume_coordinates_sampled, idx_volume = shuffle_array(
-                volume_coordinates, self.config.volume_points_sample
-            )
-            volume_coordinates_sampled = volume_coordinates[idx_volume]
-            # In case too few points are in the sampled data (because the
-            # inputs were too few), pad the outputs:
-            if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample:
-                padding_size = (
-                    self.config.volume_points_sample
-                    - volume_coordinates_sampled.shape[0]
-                )
-
-                volume_coordinates_sampled = torch.nn.functional.pad(
-                    volume_coordinates_sampled,
-                    (0, 0, 0, 0, 0, padding_size),
-                    mode="constant",
-                    value=-10.0,
-                )
-
-            # Apply the same sampling to the targets, too:
-            if volume_fields is not None:
-                volume_fields = volume_fields[idx_volume]
-
-            volume_coordinates = volume_coordinates_sampled
-
-        ########################################################################
-        # Apply normalization to the coordinates, if desired:
-        ########################################################################
-        if self.config.normalize_coordinates:
-            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
-            grid = normalize(volume_grid, c_max, c_min)
-            # This is used later in the SDF, apply the same scaling to the mesh
-            # coordinates:
-            normed_vertices = normalize(stl_vertices, c_max, c_min)
-        else:
-            grid = volume_grid
-            normed_vertices = stl_vertices
-
-        ########################################################################
-        # Apply scaling to the targets, if desired:
-        ########################################################################
-        if self.config.scaling_type is not None and volume_fields is not None:
-            volume_fields = self.scale_model_targets(
-                volume_fields, self.config.volume_factors
-            )
-
-        ########################################################################
-        # Compute Signed Distance Function for volumetric quantities
-        # Note - the SDF happens here, after volume data processing finishes,
-        # because we need to use the (maybe) normalized volume coordinates and grid
-        ########################################################################
-
-        # SDF calculation on the volume grid using WARP
-        sdf_grid, _ = signed_distance_field(
-            normed_vertices,
-            stl_indices,
-            grid,
-            use_sign_winding_number=True,
-        )
-
-        # Get the SDF of all the selected volume coordinates,
-        # And keep the closest point to each one.
-        sdf_nodes, sdf_node_closest_point = signed_distance_field(
-            normed_vertices,
-            stl_indices,
-            volume_coordinates,
-            use_sign_winding_number=True,
-        )
-        sdf_nodes = sdf_nodes.reshape((-1, 1))
-
-        # Use the closest point from the mesh to compute the volume encodings:
-        pos_normals_closest_vol, pos_normals_com_vol = self.calculate_volume_encoding(
-            c_min, c_max, volume_coordinates, sdf_node_closest_point, center_of_mass
-        )
-
-        return_dict = {
-            "volume_mesh_centers": volume_coordinates,
-            "sdf_nodes": sdf_nodes,
-            "grid": grid,
-            "sdf_grid": sdf_grid,
-            "pos_volume_closest": pos_normals_closest_vol,
-            "pos_volume_center_of_mass": pos_normals_com_vol,
-        }
-
-        if volume_fields is not None:
-            return_dict["volume_fields"] = volume_fields
-
-        return return_dict
-
-    def calculate_volume_encoding(
-        self,
-        c_min: torch.Tensor,
-        c_max: torch.Tensor,
-        volume_coordinates: torch.Tensor,
-        sdf_node_closest_point: torch.Tensor,
-        center_of_mass: torch.Tensor,
-    ):
-        if self.config.normalize_coordinates:
-            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
-            sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
-            center_of_mass = normalize(center_of_mass, c_max, c_min)
-
-        pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
-        pos_normals_com_vol = volume_coordinates - center_of_mass
-
-        return pos_normals_closest_vol, pos_normals_com_vol
-
-    @torch.no_grad()
-    def process_data(self, data_dict):
-        # Start building the preprocessed return dict:
-        return_dict = {
-            "global_params_values": data_dict["global_params_values"],
-            "global_params_reference": data_dict["global_params_reference"],
-        }
-
-        ########################################################################
-        # Process the core STL information
-        ########################################################################
-
-        # This function gets information about the surface scale,
-        # and decides what the surface grid will be:
-
-        stl_coordinates = data_dict["stl_coordinates"]
-
-        s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids(
-            stl_coordinates
-        )
-
-        if isinstance(stl_coordinates, ShardTensor):
-            mesh = stl_coordinates._spec.mesh
-            # Then, replicate the bounding box along the mesh if present.
-            s_max = scatter_tensor(
-                s_max,
-                0,
-                mesh=mesh,
-                placements=[
-                    Replicate(),
-                ],
-                global_shape=s_max.shape,
-                dtype=s_max.dtype,
-                requires_grad=False,
-            )
-            s_min = scatter_tensor(
-                s_min,
-                0,
-                mesh=mesh,
-                placements=[
-                    Replicate(),
-                ],
-                global_shape=s_min.shape,
-                dtype=s_min.dtype,
-                requires_grad=False,
-            )
-            surf_grid = scatter_tensor(
-                surf_grid,
-                0,
-                mesh=mesh,
-                placements=[
-                    Replicate(),
-                ],
-                global_shape=surf_grid.shape,
-                dtype=surf_grid.dtype,
-                requires_grad=False,
-            )
-
-        return_dict["surf_grid"] = surf_grid
-
-        # We always need to calculate the SDF on the surface grid:
-        # This is for the SDF Later:
-        if self.config.normalize_coordinates:
-            normed_vertices = normalize(data_dict["stl_coordinates"], s_max, s_min)
-        else:
-            normed_vertices = data_dict["stl_coordinates"]
-
-        # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
-        mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
-
-        # Compute signed distance function for the surface grid:
-        sdf_surf_grid, _ = signed_distance_field(
-            mesh_vertices=normed_vertices,
-            mesh_indices=mesh_indices_flattened,
-            input_points=surf_grid,
-            use_sign_winding_number=True,
-        )
-        return_dict["sdf_surf_grid"] = sdf_surf_grid
-
-        # Store this only if normalization is active:
-        if self.config.normalize_coordinates:
-            return_dict["surface_min_max"] = torch.stack([s_min, s_max])
-
-        # This is a center of mass computation for the stl surface,
-        # using the size of each mesh point as weight.
-        center_of_mass = calculate_center_of_mass(
-            data_dict["stl_centers"], data_dict["stl_areas"]
-        )
-
-        # This will apply downsampling if needed to the geometry coordinates
-        geom_centers = self.downsample_geometry(
-            stl_vertices=data_dict["stl_coordinates"],
-        )
-        return_dict["geometry_coordinates"] = geom_centers
-
-        ########################################################################
-        # Determine the volumetric bounds of the data:
-        ########################################################################
-        # Compute the min/max for volume an the unnomralized grid:
-        c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids(s_min, s_max)
-
-        # For volume data, we store this only if normalizing coordinates:
-        if self.model_type == "volume" or self.model_type == "combined":
-            if self.config.normalize_coordinates:
-                return_dict["volume_min_max"] = torch.stack([c_min, c_max])
-
-        if self.model_type == "volume" or self.model_type == "combined":
-            volume_fields_raw = (
-                data_dict["volume_fields"] if "volume_fields" in data_dict else None
-            )
-            volume_dict = self.process_volume(
-                c_min,
-                c_max,
-                volume_coordinates=data_dict["volume_mesh_centers"],
-                volume_grid=volume_grid,
-                center_of_mass=center_of_mass,
-                stl_vertices=data_dict["stl_coordinates"],
-                stl_indices=mesh_indices_flattened,
-                volume_fields=volume_fields_raw,
-            )
-
-            return_dict.update(volume_dict)
-
-        if self.model_type == "surface" or self.model_type == "combined":
-            surface_fields_raw = (
-                data_dict["surface_fields"] if "surface_fields" in data_dict else None
-            )
-            surface_dict = self.process_surface(
-                s_min,
-                s_max,
-                c_min,
-                c_max,
-                center_of_mass=center_of_mass,
-                surf_grid=surf_grid,
-                surface_coordinates=data_dict["surface_mesh_centers"],
-                surface_normals=data_dict["surface_normals"],
-                surface_sizes=data_dict["surface_areas"],
-                stl_vertices=data_dict["stl_coordinates"],
-                stl_indices=mesh_indices_flattened,
-                surface_fields=surface_fields_raw,
-            )
-
-            return_dict.update(surface_dict)
-
-        return return_dict
-
-    def scale_model_targets(
-        self, fields: torch.Tensor, factors: torch.Tensor
-    ) -> torch.Tensor:
-        """
-        Scale the model targets based on the configured scaling factors.
-        """
-        if self.config.scaling_type == "mean_std_scaling":
-            field_mean = self.config.volume_factors[0]
-            field_std = self.config.volume_factors[1]
-            return standardize(fields, field_mean, field_std)
-        elif self.config.scaling_type == "min_max_scaling":
-            field_min = self.config.volume_factors[1]
-            field_max = self.config.volume_factors[0]
-            return normalize(fields, field_max, field_min)
-
-    def unscale_model_outputs(
-        self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None
-    ):
-        """
-        Unscale the model outputs based on the configured scaling factors.
-
-        The unscaling is included here to make it a consistent interface regardless
-        of the scaling factors and type used.
-
-        """
-
-        if volume_fields is not None:
-            if self.config.scaling_type == "mean_std_scaling":
-                vol_mean = self.config.volume_factors[0]
-                vol_std = self.config.volume_factors[1]
-                volume_fields = unstandardize(volume_fields, vol_mean, vol_std)
-            elif self.config.scaling_type == "min_max_scaling":
-                vol_min = self.config.volume_factors[1]
-                vol_max = self.config.volume_factors[0]
-                volume_fields = unnormalize(volume_fields, vol_max, vol_min)
-        if surface_fields is not None:
-            if self.config.scaling_type == "mean_std_scaling":
-                surf_mean = self.config.surface_factors[0]
-                surf_std = self.config.surface_factors[1]
-                surface_fields = unstandardize(surface_fields, surf_mean, surf_std)
-            elif self.config.scaling_type == "min_max_scaling":
-                surf_min = self.config.surface_factors[1]
-                surf_max = self.config.surface_factors[0]
-                surface_fields = unnormalize(surface_fields, surf_max, surf_min)
-
-        return volume_fields, surface_fields
-
-    def set_dataset(self, dataset: Iterable) -> None:
-        """
-        Pass a dataset to the datapipe to enable iterating over both in one pass.
-        """
-        self.dataset = dataset
-
-    def __len__(self):
-        if self.dataset is not None:
-            return len(self.dataset)
-        else:
-            return 0
-
-    def __getitem__(self, idx):
-        """
-        Function for fetching and processing a single file's data.
-
-        Domino, in general, expects one example per file and the files
-        are relatively large due to the mesh size.
-
-        Requires the user to have set a dataset via `set_dataset`.
-        """
-        if self.dataset is None:
-            raise ValueError("Dataset is not present")
-
-        # Get the data from the dataset.
-        # Under the hood, this may be fetching preloaded data.
-        data_dict = self.dataset[idx]
-
-        return self.__call__(data_dict)
-
-    def __call__(self, data_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
-        """
-        Process the incoming data dictionary.
-        - Processes the data
-        - moves it to GPU
-        - adds a batch dimension
-
-        Args:
-            data_dict: Dictionary containing the data to process as torch.Tensors.
-
-        Returns:
-            Dictionary containing the processed data as torch.Tensors.
-
-        """
-        data_dict = self.process_data(data_dict)
-
-        # If the data is not on the target device, put it there:
-        for key, value in data_dict.items():
-            if value.device != self.output_device:
-                data_dict[key] = value.to(self.output_device)
-
-        # Add a batch dimension to the data_dict
-        data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()}
-
-        return data_dict
-
-    def __iter__(self):
-        if self.dataset is None:
-            raise ValueError(
-                "Dataset is not present, can not use the datapipe as an iterator."
-            )
-
-        for i, batch in enumerate(self.dataset):
-            yield self.__call__(batch)
-
-
-def compute_scaling_factors(
-    cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-    """
-    Using the dataset at the path, compute the mean, std, min, and max of the target keys.
-
-    Args:
-        cfg: Hydra configuration object containing all parameters
-        input_path: Path to the dataset to load.
-        target_keys: List of keys to compute the mean, std, min, and max of.
-        use_cache: (deprecated) This argument has no effect.
-    """
-
-    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-
-    dataset = DrivaerMLDataset(
-        data_dir=input_path,
-        keys_to_read=target_keys,
-        output_device=device,
-    )
-
-    mean, std, min_val, max_val = compute_mean_std_min_max(
-        dataset,
-        field_keys=target_keys,
-    )
-
-    return mean, std, min_val, max_val
-
-
-class CachedDoMINODataset(Dataset):
-    """
-    Dataset for reading cached DoMINO data files, with optional resampling.
-    Acts as a drop-in replacement for DoMINODataPipe.
-    """
-
-    # @nvtx_annotate(message="CachedDoMINODataset __init__")
-    def __init__(
-        self,
-        data_path: Union[str, Path],
-        phase: Literal["train", "val", "test"] = "train",
-        sampling: bool = False,
-        volume_points_sample: Optional[int] = None,
-        surface_points_sample: Optional[int] = None,
-        geom_points_sample: Optional[int] = None,
-        model_type=None,  # Model_type, surface, volume or combined
-        deterministic_seed=False,
-        surface_sampling_algorithm="area_weighted",
-    ):
-        super().__init__()
-
-        self.model_type = model_type
-        if deterministic_seed:
-            np.random.seed(42)
-
-        if isinstance(data_path, str):
-            data_path = Path(data_path)
-        self.data_path = data_path.expanduser()
-
-        if not self.data_path.exists():
-            raise AssertionError(f"Path {self.data_path} does not exist")
-        if not self.data_path.is_dir():
-            raise AssertionError(f"Path {self.data_path} is not a directory")
-
-        self.deterministic_seed = deterministic_seed
-        self.sampling = sampling
-        self.volume_points = volume_points_sample
-        self.surface_points = surface_points_sample
-        self.geom_points = geom_points_sample
-        self.surface_sampling_algorithm = surface_sampling_algorithm
-
-        self.filenames = get_filenames(self.data_path, exclude_dirs=True)
-
-        total_files = len(self.filenames)
-
-        self.phase = phase
-        self.indices = np.array(range(total_files))
-
-        np.random.shuffle(self.indices)
-
-        if not self.filenames:
-            raise AssertionError(f"No cached files found in {self.data_path}")
-
-    def __len__(self):
-        return len(self.indices)
-
-    # @nvtx_annotate(message="CachedDoMINODataset __getitem__")
-    def __getitem__(self, idx):
-        if self.deterministic_seed:
-            np.random.seed(idx)
-        nvtx.range_push("Load cached file")
-
-        index = self.indices[idx]
-        cfd_filename = self.filenames[index]
-
-        filepath = self.data_path / cfd_filename
-        result = np.load(filepath, allow_pickle=True).item()
-        result = {
-            k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v
-            for k, v in result.items()
-        }
-
-        nvtx.range_pop()
-        if not self.sampling:
-            return result
-
-        nvtx.range_push("Sample points")
-
-        # Sample volume points if present
-        if "volume_mesh_centers" in result and self.volume_points:
-            coords_sampled, idx_volume = shuffle_array(
-                result["volume_mesh_centers"], self.volume_points
-            )
-            if coords_sampled.shape[0] < self.volume_points:
-                coords_sampled = pad(
-                    coords_sampled, self.volume_points, pad_value=-10.0
-                )
-
-            result["volume_mesh_centers"] = coords_sampled
-            for key in [
-                "volume_fields",
-                "pos_volume_closest",
-                "pos_volume_center_of_mass",
-                "sdf_nodes",
-            ]:
-                if key in result:
-                    result[key] = result[key][idx_volume]
-
-        # Sample surface points if present
-        if "surface_mesh_centers" in result and self.surface_points:
-            if self.surface_sampling_algorithm == "area_weighted":
-                coords_sampled, idx_surface = shuffle_array(
-                    points=result["surface_mesh_centers"],
-                    n_points=self.surface_points,
-                    weights=result["surface_areas"],
-                )
-            else:
-                coords_sampled, idx_surface = shuffle_array(
-                    result["surface_mesh_centers"], self.surface_points
-                )
-
-            if coords_sampled.shape[0] < self.surface_points:
-                coords_sampled = pad(
-                    coords_sampled, self.surface_points, pad_value=-10.0
-                )
-
-            ii = result["neighbor_indices"]
-            result["surface_mesh_neighbors"] = result["surface_mesh_centers"][ii]
-            result["surface_neighbors_normals"] = result["surface_normals"][ii]
-            result["surface_neighbors_areas"] = result["surface_areas"][ii]
-
-            result["surface_mesh_centers"] = coords_sampled
-
-            for key in [
-                "surface_fields",
-                "surface_areas",
-                "surface_normals",
-                "pos_surface_center_of_mass",
-                "surface_mesh_neighbors",
-                "surface_neighbors_normals",
-                "surface_neighbors_areas",
-            ]:
-                if key in result:
-                    result[key] = result[key][idx_surface]
-
-            del result["neighbor_indices"]
-
-        # Sample geometry points if present
-        if "geometry_coordinates" in result and self.geom_points:
-            coords_sampled, _ = shuffle_array(
-                result["geometry_coordinates"], self.geom_points
-            )
-            if coords_sampled.shape[0] < self.geom_points:
-                coords_sampled = pad(coords_sampled, self.geom_points, pad_value=-100.0)
-            result["geometry_coordinates"] = coords_sampled
-
-        nvtx.range_pop()
-        return result
-
-
-def create_domino_dataset(
-    cfg: DictConfig,
-    phase: Literal["train", "val", "test"],
-    keys_to_read: list[str],
-    keys_to_read_if_available: dict[str, torch.Tensor],
-    vol_factors: list[float],
-    surf_factors: list[float],
-    normalize_coordinates: bool = True,
-    sample_in_bbox: bool = True,
-    sampling: bool = True,
-    device_mesh: torch.distributed.DeviceMesh | None = None,
-    placements: dict[str, torch.distributed.tensor.Placement] | None = None,
-):
-    model_type = cfg.model.model_type
-    if phase == "train":
-        input_path = cfg.data.input_dir
-        dataloader_cfg = cfg.train.dataloader
-    elif phase == "val":
-        input_path = cfg.data.input_dir_val
-        dataloader_cfg = cfg.val.dataloader
-    elif phase == "test":
-        input_path = cfg.eval.test_path
-        dataloader_cfg = None
-    else:
-        raise ValueError(f"Invalid phase {phase}")
-
-    if cfg.data_processor.use_cache:
-        return CachedDoMINODataset(
-            input_path,
-            phase=phase,
-            sampling=sampling,
-            volume_points_sample=cfg.model.volume_points_sample,
-            surface_points_sample=cfg.model.surface_points_sample,
-            geom_points_sample=cfg.model.geom_points_sample,
-            model_type=cfg.model.model_type,
-            surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
-        )
-    else:
-        # The dataset path works in two pieces:
-        # There is a core "dataset" which is loading data and moving to GPU
-        # And there is the preprocess step, here.
-
-        # Optionally, and for backwards compatibility, the preprocess
-        # object can accept a dataset which will enable it as an iterator.
-        # The iteration function will loop over the dataset, preprocess the
-        # output, and return it.
-
-        overrides = {}
-        if hasattr(cfg.data, "gpu_preprocessing"):
-            overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing
-
-        if hasattr(cfg.data, "gpu_output"):
-            overrides["gpu_output"] = cfg.data.gpu_output
-
-        dm = DistributedManager()
-
-        if cfg.data.gpu_preprocessing:
-            device = dm.device
-            consumer_stream = torch.cuda.default_stream()
-        else:
-            device = torch.device("cpu")
-            consumer_stream = None
-
-        if dataloader_cfg is not None:
-            preload_depth = dataloader_cfg.preload_depth
-            pin_memory = dataloader_cfg.pin_memory
-        else:
-            preload_depth = 2
-            pin_memory = False
-
-        dataset = DrivaerMLDataset(
-            data_dir=input_path,
-            keys_to_read=keys_to_read,
-            keys_to_read_if_available=keys_to_read_if_available,
-            output_device=device,
-            preload_depth=preload_depth,
-            pin_memory=pin_memory,
-            device_mesh=device_mesh,
-            placements=placements,
-            consumer_stream=consumer_stream,
-        )
-
-        datapipe = DoMINODataPipe(
-            input_path,
-            phase=phase,
-            grid_resolution=cfg.model.interp_res,
-            normalize_coordinates=normalize_coordinates,
-            sampling=sampling,
-            sample_in_bbox=sample_in_bbox,
-            volume_points_sample=cfg.model.volume_points_sample,
-            surface_points_sample=cfg.model.surface_points_sample,
-            geom_points_sample=cfg.model.geom_points_sample,
-            volume_factors=vol_factors,
-            surface_factors=surf_factors,
-            scaling_type=cfg.model.normalization,
-            model_type=model_type,
-            bounding_box_dims=cfg.data.bounding_box,
-            bounding_box_dims_surf=cfg.data.bounding_box_surface,
-            num_surface_neighbors=cfg.model.num_neighbors_surface,
-            surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
-            **overrides,
-        )
-
-        datapipe.set_dataset(dataset)
-
-        return datapipe
-
-
-if __name__ == "__main__":
-    fm_data = DoMINODataPipe(
-        data_path="/code/processed_data/new_models_1/",
-        phase="train",
-        sampling=False,
-        sample_in_bbox=False,
-    )
diff --git a/physicsnemo/datapipes/cae/domino_sharded_datapipe.py b/physicsnemo/datapipes/cae/domino_sharded_datapipe.py
deleted file mode 100644
index fe2b0d5fcf..0000000000
--- a/physicsnemo/datapipes/cae/domino_sharded_datapipe.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dataclasses import asdict
-
-import torch
-
-from physicsnemo.utils.version_check import check_module_requirements
-
-from .domino_datapipe import DoMINODataPipe
-
-# Prevent importing this module if the minimum version of pytorch is not met.
-check_module_requirements("physicsnemo.distributed.shard_tensor")
-
-from torch.distributed.tensor.placement_types import (  # noqa: E402
-    Replicate,
-    Shard,
-)
-
-from physicsnemo.distributed.shard_tensor import ShardTensor  # noqa: E402
-
-
-class ShardedDoMINODataPipe(DoMINODataPipe):
-    """
-    An extension of the DoMINODataPipe for domain parallel training.
-
-    How this works:
-    1. the preprocessing is done in cupy or numpy in the base class, which we
-       want to keep.
-    2. Dataloading is done on one file per idx in __getitem__.  For sharded data,
-       we want to load one file per mesh and shard or replicate the data as needed.
-    3. The sharding can be either on the grid or the point clouds.  We shard the grids
-       after loading point data, so data loading only worries about the point clouds.
-    4. For numpy files (.npz, .npy), each rank reads the whole file and takes only
-       the data it needs, in the end.  Because data loading is the bulk of the time,
-       this preprocesses everything independently and then shards.
-    5. For Zarr files, each rank can read slices of the data independently.  So
-       infer the chunk size, based on the number of ranks in the mesh and sharding,
-       and then read the right slice.
-    6. For some of the pipeline, we need the full data.  So it gets gathered locally.
-    7. After preprocessing, the data is chunked into appropriate shards and sent out.
-    8. This file provides a wrapper function for the collate function (like a decorator)
-       that will turn appropriate cupy into tensors and then into shard tensors.
-
-    """
-
-    def __init__(
-        self,
-        input_path,
-        model_type,
-        domain_mesh,
-        shard_point_cloud,
-        shard_grid,
-        **config_overrides,
-    ):
-        # if 'gpu_output' not in config_overrides:
-        config_overrides["gpu_output"] = True
-
-        # First, initialize the super class.
-        super().__init__(
-            input_path,
-            model_type,
-            **config_overrides,
-        )
-
-        self.domain_mesh = domain_mesh
-
-        self.shard_point_cloud = shard_point_cloud
-        self.shard_grid = shard_grid
-
-        # These are keys that are point-like
-        self.point_cloud_keys = [
-            "volume_fields",
-            "pos_volume_closest",
-            "pos_volume_center_of_mass",
-            "pos_surface_center_of_mass",
-            "geometry_coordinates",
-            "surface_mesh_centers",
-            "surface_mesh_neighbors",
-            "sdf_nodes",
-            "surface_normals",
-            "surface_neighbors_normals",
-            "surface_areas",
-            "surface_neighbors_areas",
-            "volume_mesh_centers",
-            "surface_fields",
-        ]
-
-        # These keys are grid-like
-        self.grid_keys = [
-            "grid",
-            "surf_grid",
-            "sdf_grid",
-            "sdf_surf_grid",
-        ]
-
-        # These keys are scalar-like and should never be sharded
-        self.scalar_keys = [
-            "global_params_values",
-            "global_params_reference",
-            "surface_min_max",
-            "volume_min_max",
-            "length_scale",
-        ]
-
-    def __getitem__(self, idx):
-        single_dict = super().__getitem__(idx)
-
-        # Here, we're assuming that the data is already replicated.
-        # Turn all the pieces of the dict into ShardTensors with that placement.
-        default_placement = [
-            Replicate(),
-        ]
-        for key, value in single_dict.items():
-            if isinstance(value, torch.Tensor):
-                single_dict[key] = ShardTensor.from_local(
-                    value, self.domain_mesh, default_placement
-                )
-
-        # # Now, shard the data.
-        sharding = [
-            Shard(0),
-        ]
-        if self.shard_point_cloud:
-            for key in self.point_cloud_keys:
-                if key in single_dict:
-                    single_dict[key] = single_dict[key].redistribute(
-                        placements=sharding
-                    )
-
-        if self.shard_grid:
-            for key in self.grid_keys:
-                if key in single_dict:
-                    single_dict[key] = single_dict[key].redistribute(
-                        placements=sharding
-                    )
-
-        return single_dict
-
-
-def create_sharded_domino_dataset(
-    base_dataset,
-    domain_mesh,
-    shard_point_cloud,
-    shard_grid,
-):
-    # Pull off the data path, model type, and config_dict:
-    data_path = base_dataset.config.data_path
-    model_type = base_dataset.model_type
-    config_dict = asdict(base_dataset.config)
-
-    # Make sure the input path is not included in the config_dict:
-    config_dict.pop("data_path")
-
-    # Use the configuration of the base dataset to create a sharded dataset:
-    return ShardedDoMINODataPipe(
-        input_path=data_path,
-        model_type=model_type,
-        domain_mesh=domain_mesh,
-        shard_point_cloud=shard_point_cloud,
-        shard_grid=shard_grid,
-        **config_dict,
-    )
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index 1e6ae62f81..bb9a17041b 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -793,6 +793,9 @@ def _move_to_gpu(
 
         with torch.cuda.stream(self._data_loader_stream):
             for key in data.keys():
+                if data[key].device == self.output_device:
+                    result[key] = data[key]
+                    continue
                 if self.pin_memory:
                     result[key] = (
                         data[key].pin_memory().to(self.output_device, non_blocking=True)

From b9964179a22347a66df07b2bc26cbca7cd182ec5 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 30 Sep 2025 16:15:07 +0000
Subject: [PATCH 60/98] Remove printouts.

---
 examples/cfd/external_aerodynamics/domino/src/train.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 3763ce68ec..ffadc0403c 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -184,10 +184,6 @@ def train_epoch(
     with Profiler():
         io_start_time = time.perf_counter()
         for i_batch, sampled_batched in enumerate(dataloader):
-            for key in sampled_batched.keys():
-                print(
-                    f"{key} has shape {sampled_batched[key].shape} and autograd fn {sampled_batched[key].autograd_fn if hasattr(sampled_batched[key], 'autograd_fn') else None}"
-                )
             io_end_time = time.perf_counter()
             if add_physics_loss:
                 autocast_enabled = False

From f7aab1272ddc98759011c19c2184c384f0483cf3 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 30 Sep 2025 17:46:23 +0000
Subject: [PATCH 61/98] Add unified gpu memory interface that correctly places
 memory pools onto the right device.

---
 .../domino/src/benchmark_dataloader.py        |  10 +-
 .../domino/src/inference_on_stl.py            |   9 +-
 .../external_aerodynamics/domino/src/train.py |  41 ++-----
 physicsnemo/datapipes/cae/domino_datapipe.py  |   2 +-
 physicsnemo/utils/memory.py                   | 114 ++++++++++++++++++
 5 files changed, 129 insertions(+), 47 deletions(-)
 create mode 100644 physicsnemo/utils/memory.py

diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index 090fbf361c..345f78fa2c 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -40,14 +40,8 @@
 from hydra.utils import to_absolute_path
 from omegaconf import DictConfig, OmegaConf
 
-DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", False)
-if not DISABLE_RMM:
-    import rmm
-    from rmm.allocators.torch import rmm_torch_allocator
-    import torch
-
-    rmm.reinitialize(pool_allocator=True)
-    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+# This will set up the cupy-ecosystem and pytorch to share memory pools
+from physicsnemo.utils.memory import unified_gpu_memory
 
 
 import torch.distributed as dist
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index a55f703d66..bee8c1cd2f 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -42,13 +42,8 @@
 from omegaconf import DictConfig, OmegaConf
 import torch
 
-DISABLE_RMM = os.environ.get("DISABLE_RMM", False)
-if not DISABLE_RMM:
-    import rmm
-    from rmm.allocators.torch import rmm_torch_allocator
-
-    rmm.reinitialize(pool_allocator=True)
-    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+# This will set up the cupy-ecosystem and pytorch to share memory pools
+from physicsnemo.utils.memory import unified_gpu_memory
 
 import torchinfo
 import torch.distributed as dist
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index ffadc0403c..94e184f3b4 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -38,27 +38,8 @@
 from hydra.utils import to_absolute_path
 from omegaconf import DictConfig, OmegaConf
 
-
-def srt2bool(val: str):
-    if isinstance(val, bool):
-        return val
-    if val.lower() in ["true", "1", "yes", "y"]:
-        return True
-    elif val.lower() in ["false", "0", "no", "n"]:
-        return False
-    else:
-        raise ValueError(f"Invalid boolean value: {val}")
-
-
-DISABLE_RMM = srt2bool(os.environ.get("DOMINO_DISABLE_RMM", False))
-
-if not DISABLE_RMM:
-    import rmm
-    from rmm.allocators.torch import rmm_torch_allocator
-    import torch
-
-    rmm.reinitialize(pool_allocator=True)
-    torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+# This will set up the cupy-ecosystem and pytorch to share memory pools
+from physicsnemo.utils.memory import unified_gpu_memory
 
 import torchinfo
 import torch.distributed as dist
@@ -477,16 +458,14 @@ def main(cfg: DictConfig) -> None:
     ######################################################
     # Load checkpoint if available
     ######################################################
-
-    # init_epoch = load_checkpoint(
-    #     to_absolute_path(cfg.resume_dir),
-    #     models=model,
-    #     optimizer=optimizer,
-    #     scheduler=scheduler,
-    #     scaler=scaler,
-    #     device=dist.device,
-    # )
-    init_epoch = 0
+    init_epoch = load_checkpoint(
+        to_absolute_path(cfg.resume_dir),
+        models=model,
+        optimizer=optimizer,
+        scheduler=scheduler,
+        scaler=scaler,
+        device=dist.device,
+    )
 
     if init_epoch != 0:
         init_epoch += 1  # Start with the next epoch
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 5feae7e118..c9df6bceb0 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -1173,7 +1173,7 @@ def create_domino_dataset(
             preload_depth = dataloader_cfg.preload_depth
             pin_memory = dataloader_cfg.pin_memory
         else:
-            preload_depth = 2
+            preload_depth = 1
             pin_memory = False
 
         dataset = DrivaerMLDataset(
diff --git a/physicsnemo/utils/memory.py b/physicsnemo/utils/memory.py
new file mode 100644
index 0000000000..54ceb5061c
--- /dev/null
+++ b/physicsnemo/utils/memory.py
@@ -0,0 +1,114 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import torch
+
+try:
+    import rmm
+
+    RMM_AVAILABLE = True
+except ImportError:
+    RMM_AVAILABLE = False
+
+try:
+    import cupy
+
+    CUPY_AVAILABLE = True
+except ImportError:
+    CUPY_AVAILABLE = False
+
+"""
+Using a unifed gpu memory provider, we consolidate the pool into just a
+single allocator for cupy/rapids and torch.  Ideally, we add warp to this someday.
+
+To use this, you need to add the following to your code at or near the top
+(before allocating any GPU memory):
+
+```python
+from physicsnemo.utils.memory import unified_gpu_memory
+```
+
+"""
+
+
+def srt2bool(val: str):
+    if isinstance(val, bool):
+        return val
+    if val.lower() in ["true", "1", "yes", "y"]:
+        return True
+    elif val.lower() in ["false", "0", "no", "n"]:
+        return False
+    else:
+        raise ValueError(f"Invalid boolean value: {val}")
+
+
+DISABLE_RMM = srt2bool(os.environ.get("PHYSICSNEMO_DISABLE_RMM", False))
+
+
+def _setup_unified_gpu_memory():
+    # Skip if RMM is disabled
+    if RMM_AVAILABLE and not DISABLE_RMM:
+        # First, determine the local rank so that we allocate on the right device.
+        # These are meant to be tested in the same order as DistributedManager
+        # We can't actually initialize it, though, since we have to unify mallocs
+        # before torch init.
+        PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD = os.environ.get(
+            "PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD", None
+        )
+        if PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD is None:
+            for method in ["LOCAL_RANK", "OMPI_COMM_WORLD_LOCAL_RANK", "SLURM_LOCALID"]:
+                if os.environ.get(method) is not None:
+                    local_rank = int(os.environ.get(method))
+                    break
+        else:
+            if PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD == "ENV":
+                local_rank = int(os.environ.get("LOCAL_RANK"))
+            elif PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD == "SLURM":
+                local_rank = int(os.environ.get("SLURM_LOCALID"))
+            elif PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD == "OPENMPI":
+                local_rank = int(os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK"))
+            else:
+                raise ValueError(
+                    f"Unknown initialization method: {PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD}"
+                )
+
+        # Initialize RMM
+        rmm.reinitialize(
+            pool_allocator=True, devices=local_rank, initial_pool_size="1024MB"
+        )
+
+        # Set PyTorch allocator if available
+        from rmm.allocators.torch import rmm_torch_allocator
+
+        if torch.cuda.is_available():
+            torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+
+        # Set CuPy allocator if available
+        if CUPY_AVAILABLE:
+            from rmm.allocators.cupy import rmm_cupy_allocator
+
+            cupy.cuda.set_allocator(rmm_cupy_allocator)
+
+
+# This is what gets executed when someone does "from memory import unified_gpu_memory"
+
+
+def __getattr__(name):
+    if name == "unified_gpu_memory":
+        return _setup_unified_gpu_memory()
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")

From e36bf9b41da6ec53d461e7a1cd0b814c4404fad0 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 1 Oct 2025 15:48:14 +0000
Subject: [PATCH 62/98] Fix indexing error in the dataset that was leading to
 GPU memory leaking.

---
 physicsnemo/datapipes/cae/drivaer_ml_dataset.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index bb9a17041b..af844b2656 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -934,15 +934,23 @@ def __iter__(self):
     def __next__(self):
         N = len(self.indices) if hasattr(self, "indices") else len(self._filenames)
 
+        # Iteration bounds are based on the counter, not the random-access index
         if self.i >= N:
             self.i = 0
             raise StopIteration
 
-        for i in range(self.preload_depth):
-            if N > i + 1:
-                self.preload(self.i + i)
+        # This is the file random access index
+        target_index = self.idx_to_index(self.i)
+
+        # Before returning, put the next two target indexes into the queue:
+        for preload_i in range(self.preload_depth):
+            next_iteration_index = self.i + preload_i + 1
+            if N > next_iteration_index:
+                preload_idx = self.idx_to_index(next_iteration_index)
+                self.preload(preload_idx)
 
-        data = self.__getitem__(self.i)
+        # Send up the random-access data:
+        data = self.__getitem__(target_index)
 
         self.i += 1
 

From 5240b33bfc9cb751f5bfc8602cfcf03398601ba8 Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Wed, 1 Oct 2025 08:53:33 -0700
Subject: [PATCH 63/98] fix in scaling factors calculation

---
 .../external_aerodynamics/domino/src/conf/config.yaml  | 10 +++++-----
 examples/cfd/external_aerodynamics/domino/src/train.py |  9 ++++-----
 physicsnemo/datapipes/cae/domino_datapipe.py           |  8 ++++----
 physicsnemo/models/domino/model.py                     |  8 --------
 4 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index aa17995327..26870b461f 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -71,8 +71,8 @@ variables:
 # │         Data Configs                      │
 # └───────────────────────────────────────────┘  
 data: # Input directory for training and validation data
-  input_dir: /user/data/aws_data_all/
-  input_dir_val: /user/data/aws_data_all_val/
+  input_dir: /lustre/rranade/modulus_dev/data/aws_data_all/
+  input_dir_val: /lustre/rranade/modulus_dev/data/aws_data_all_val/
   bounding_box: # Bounding box dimensions for computational domain
     min: [-3.5, -2.25, -0.32]
     max: [8.5, 2.25, 3.00]
@@ -84,7 +84,7 @@ data: # Input directory for training and validation data
   normalize_coordinates: true
   sample_in_bbox: true
   sampling: true
-  scaling_factors: outputs/AWS_Dataset/1/scaling_factors/scaling_factors.pkl
+  scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl
 
 # ┌───────────────────────────────────────────┐
 # │          Domain Parallelism Settings      │
@@ -174,10 +174,10 @@ model:
 # └───────────────────────────────────────────┘  
 train: # Training configurable parameters
   epochs: 1000
-  checkpoint_interval: 1
+  checkpoint_interval: 50
   dataloader:
     batch_size: 1
-    preload_depth: 2
+    preload_depth: 1
     pin_memory: True # if the preprocessing is outputing GPU data, set this to false
   sampler:
     shuffle: true
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 94e184f3b4..4bf52bfb2e 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -152,6 +152,7 @@ def train_epoch(
     eqn: Any = None,
     bounding_box: torch.Tensor | None = None,
     vol_factors: torch.Tensor | None = None,
+    surf_factors: torch.Tensor | None = None,
     add_physics_loss=False,
 ):
     dist = DistributedManager()
@@ -284,8 +285,8 @@ def main(cfg: DictConfig) -> None:
             f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
         )
 
-    vol_factors = scaling_factors.mean["volume_fields"]
-    surf_factors = scaling_factors.mean["surface_fields"]
+    vol_factors = np.asarray([scaling_factors.max_val["volume_fields"], scaling_factors.min_val["volume_fields"]])
+    surf_factors = np.asarray([scaling_factors.max_val["surface_fields"], scaling_factors.min_val["surface_fields"]])
     vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
 
     ######################################################
@@ -509,7 +510,7 @@ def main(cfg: DictConfig) -> None:
         else:
             surface_scaling_loss = cfg.model.surf_loss_scaling
 
-        # model.train(True)
+        model.train(True)
         epoch_start_time = time.perf_counter()
         avg_loss = train_epoch(
             dataloader=train_dataloader,
@@ -537,8 +538,6 @@ def main(cfg: DictConfig) -> None:
         )
         epoch_end_time = time.perf_counter()
 
-        return
-
         model.eval()
         avg_vloss = validation_step(
             dataloader=val_dataloader,
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index c9df6bceb0..e4fd314695 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -820,12 +820,12 @@ def scale_model_targets(
         Scale the model targets based on the configured scaling factors.
         """
         if self.config.scaling_type == "mean_std_scaling":
-            field_mean = self.config.volume_factors[0]
-            field_std = self.config.volume_factors[1]
+            field_mean = factors[0]
+            field_std = factors[1]
             return standardize(fields, field_mean, field_std)
         elif self.config.scaling_type == "min_max_scaling":
-            field_min = self.config.volume_factors[1]
-            field_max = self.config.volume_factors[0]
+            field_min = factors[1]
+            field_max = factors[0]
             return normalize(fields, field_max, field_min)
 
     def unscale_model_outputs(
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 94f6adaaba..e0ec7b772c 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -308,14 +308,6 @@ def __init__(
             hops=model_parameters.geometry_rep.geo_conv.surface_hops,
             model_parameters=model_parameters,
         )
-
-        self.geo_rep_surface1 = GeometryRep(
-            input_features=input_features,
-            radii=model_parameters.geometry_rep.geo_conv.volume_radii,
-            neighbors_in_radius=model_parameters.geometry_rep.geo_conv.volume_neighbors_in_radius,
-            model_parameters=model_parameters,
-        )
-
         # Basis functions for surface and volume
         base_layer_nn = model_parameters.nn_basis_functions.base_layer
         if self.output_features_surf is not None:

From fec26d5e8ebfd25f71714810150b04912a45affe Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Thu, 2 Oct 2025 09:40:36 -0700
Subject: [PATCH 64/98] small fixes in datapipe and model

---
 physicsnemo/datapipes/cae/domino_datapipe.py | 26 ++++++--------------
 physicsnemo/models/domino/model.py           |  1 +
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index e4fd314695..8a0aa5e035 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -449,11 +449,7 @@ def process_surface(
             )
 
             if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample:
-                surface_coordinates_sampled = pad(
-                    surface_coordinates_sampled,
-                    self.config.surface_points_sample,
-                    pad_value=-10.0,
-                )
+                raise ValueError("Sampled points is more than points in the surface mesh")
 
             # Select out the sampled points for non-neighbor arrays:
             if surface_fields is not None:
@@ -485,7 +481,7 @@ def process_surface(
 
         # Better to normalize everything after the kNN and sampling
         if self.config.normalize_coordinates:
-            surf_grid = normalize(surf_grid, s_max, s_min)
+            # surf_grid = normalize(surf_grid, s_max, s_min)
             surface_coordinates = normalize(surface_coordinates, s_max, s_min)
             surface_neighbors = normalize(surface_neighbors, s_max, s_min)
             # Make sure to normalize the center of mass for the normals_com_surface calc
@@ -567,17 +563,7 @@ def process_volume(
             # In case too few points are in the sampled data (because the
             # inputs were too few), pad the outputs:
             if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample:
-                padding_size = (
-                    self.config.volume_points_sample
-                    - volume_coordinates_sampled.shape[0]
-                )
-
-                volume_coordinates_sampled = torch.nn.functional.pad(
-                    volume_coordinates_sampled,
-                    (0, 0, 0, 0, 0, padding_size),
-                    mode="constant",
-                    value=-10.0,
-                )
+                raise ValueError("Sampled points is more than points in the volume mesh")
 
             # Apply the same sampling to the targets, too:
             if volume_fields is not None:
@@ -594,6 +580,7 @@ def process_volume(
             # This is used later in the SDF, apply the same scaling to the mesh
             # coordinates:
             normed_vertices = normalize(stl_vertices, c_max, c_min)
+            center_of_mass = normalize(center_of_mass, c_max, c_min)
         else:
             grid = volume_grid
             normed_vertices = stl_vertices
@@ -658,9 +645,9 @@ def calculate_volume_encoding(
         center_of_mass: torch.Tensor,
     ):
         if self.config.normalize_coordinates:
-            volume_coordinates = normalize(volume_coordinates, c_max, c_min)
+            # volume_coordinates = normalize(volume_coordinates, c_max, c_min)
             sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
-            center_of_mass = normalize(center_of_mass, c_max, c_min)
+            # center_of_mass = normalize(center_of_mass, c_max, c_min)
 
         pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
         pos_normals_com_vol = volume_coordinates - center_of_mass
@@ -731,6 +718,7 @@ def process_data(self, data_dict):
         # This is for the SDF Later:
         if self.config.normalize_coordinates:
             normed_vertices = normalize(data_dict["stl_coordinates"], s_max, s_min)
+            surf_grid = normalize(surf_grid, s_max, s_min)
         else:
             normed_vertices = data_dict["stl_coordinates"]
 
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index e0ec7b772c..aea2e91ad4 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -308,6 +308,7 @@ def __init__(
             hops=model_parameters.geometry_rep.geo_conv.surface_hops,
             model_parameters=model_parameters,
         )
+        
         # Basis functions for surface and volume
         base_layer_nn = model_parameters.nn_basis_functions.base_layer
         if self.output_features_surf is not None:

From 073a3f97a41fc5d4101badbd3b7986d91b645734 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 7 Oct 2025 13:42:52 +0000
Subject: [PATCH 65/98] Fix factor calculations

---
 .../external_aerodynamics/domino/src/utils.py    | 11 ++++++++---
 physicsnemo/datapipes/cae/domino_datapipe.py     | 16 +++++++++++-----
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py
index 6d05c90bfc..12259641e3 100644
--- a/examples/cfd/external_aerodynamics/domino/src/utils.py
+++ b/examples/cfd/external_aerodynamics/domino/src/utils.py
@@ -107,10 +107,15 @@ def get_keys_to_read(
 
     # If these keys are in the config, use them, else provide defaults in
     # case they aren't in the dataset:
-    # TODO
+    cfg_params_vec = []
+    for key in cfg.variables.global_parameters:
+        if cfg.variables.global_parameters[key].type == "vector":
+            cfg_params_vec.append(*cfg.variables.global_parameters[key].reference)
+        else:
+            cfg_params_vec.append(cfg.variables.global_parameters[key].reference)
     keys_to_read_if_available = {
-        "global_params_values": torch.tensor([[30.0], [1.226]]),
-        "global_params_reference": torch.tensor([[30.0], [1.226]]),
+        "global_params_values": torch.tensor(cfg_params_vec).reshape(-1, 1),
+        "global_params_reference": torch.tensor(cfg_params_vec).reshape(-1, 1),
     }
 
     # Volume keys:
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 8a0aa5e035..cbd6296495 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -478,6 +478,10 @@ def process_surface(
             surface_neighbors = full_surface_coordinates[neighbor_indices][:, 1:]
             surface_neighbors_normals = full_surface_normals[neighbor_indices][:, 1:]
             surface_neighbors_sizes = full_surface_sizes[neighbor_indices][:, 1:]
+        else:
+            surface_neighbors = surface_coordinates
+            surface_neighbors_normals = surface_normals
+            surface_neighbors_sizes = surface_sizes
 
         # Better to normalize everything after the kNN and sampling
         if self.config.normalize_coordinates:
@@ -817,7 +821,9 @@ def scale_model_targets(
             return normalize(fields, field_max, field_min)
 
     def unscale_model_outputs(
-        self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None
+        self,
+        volume_fields: torch.Tensor | None = None,
+        surface_fields: torch.Tensor | None = None,
     ):
         """
         Unscale the model outputs based on the configured scaling factors.
@@ -833,8 +839,8 @@ def unscale_model_outputs(
                 vol_std = self.config.volume_factors[1]
                 volume_fields = unstandardize(volume_fields, vol_mean, vol_std)
             elif self.config.scaling_type == "min_max_scaling":
-                vol_min = self.config.volume_factors[1]
-                vol_max = self.config.volume_factors[0]
+                vol_min = self.config.volume_factors[0]
+                vol_max = self.config.volume_factors[1]
                 volume_fields = unnormalize(volume_fields, vol_max, vol_min)
         if surface_fields is not None:
             if self.config.scaling_type == "mean_std_scaling":
@@ -842,8 +848,8 @@ def unscale_model_outputs(
                 surf_std = self.config.surface_factors[1]
                 surface_fields = unstandardize(surface_fields, surf_mean, surf_std)
             elif self.config.scaling_type == "min_max_scaling":
-                surf_min = self.config.surface_factors[1]
-                surf_max = self.config.surface_factors[0]
+                surf_min = self.config.surface_factors[0]
+                surf_max = self.config.surface_factors[1]
                 surface_fields = unnormalize(surface_fields, surf_max, surf_min)
 
         return volume_fields, surface_fields

From 594b9edf9f980c672d5ff6e1b13be1c374b988d6 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 7 Oct 2025 19:09:23 +0000
Subject: [PATCH 66/98] Enable sliced reading of volumetric data.

---
 physicsnemo/datapipes/cae/domino_datapipe.py  |  39 ++--
 .../datapipes/cae/drivaer_ml_dataset.py       | 168 ++++++++++++++----
 physicsnemo/utils/domino/utils.py             |   2 +-
 3 files changed, 162 insertions(+), 47 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index cbd6296495..9ee479223d 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -87,6 +87,9 @@ class DoMINODataConfig:
             attributes that are arraylike.
         volume_variables: (Volume specific) Names of volume variables.
         volume_points_sample: (Volume specific) Number of volume points to sample per batch.
+        volume_sample_from_disk: (Volume specific) If the volume data is in a shuffled state on disk,
+            read contiguous chunks of the data rather than the entire volume data.  This greatly
+            accelerates IO in bandwidth limited systems or when the volumetric data is very large.
         volume_factors: (Volume specific) Non-dimensionalization factors for volume variables scaling.
             If set, and scaling_type is:
             - min_max_scaling -> rescale volume_fields to the min/max set here
@@ -138,6 +141,7 @@ class DoMINODataConfig:
     # Volume specific variables:
     volume_variables: Optional[Sequence] = ("UMean", "pMean")
     volume_points_sample: int = 1024
+    volume_sample_from_disk: bool = False
     volume_factors: Optional[Sequence] = None
     bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None
 
@@ -282,16 +286,20 @@ def __init__(
         # Ensure the volume and surface scaling factors are torch tensors
         # and on the right device:
         if self.config.volume_factors is not None:
-            self.config.volume_factors = torch.tensor(
-                self.config.volume_factors,
-                device=self.preproc_device,
-                dtype=torch.float32,
+            if not isinstance(self.config.volume_factors, torch.Tensor):
+                self.config.volume_factors = torch.from_numpy(
+                    self.config.volume_factors
+                )
+            self.config.volume_factors = self.config.volume_factors.to(
+                self.preproc_device, dtype=torch.float32
             )
         if self.config.surface_factors is not None:
-            self.config.surface_factors = torch.tensor(
-                self.config.surface_factors,
-                device=self.preproc_device,
-                dtype=torch.float32,
+            if not isinstance(self.config.surface_factors, torch.Tensor):
+                self.config.surface_factors = torch.from_numpy(
+                    self.config.surface_factors
+                )
+            self.config.surface_factors = self.config.surface_factors.to(
+                self.preproc_device, dtype=torch.float32
             )
 
         self.dataset = None
@@ -449,7 +457,9 @@ def process_surface(
             )
 
             if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample:
-                raise ValueError("Sampled points is more than points in the surface mesh")
+                raise ValueError(
+                    "Sampled points is more than points in the surface mesh"
+                )
 
             # Select out the sampled points for non-neighbor arrays:
             if surface_fields is not None:
@@ -557,6 +567,8 @@ def process_volume(
         # Apply sampling to the volume coordinates and fields
         ########################################################################
 
+        # If the volume data has been sampled from disk, directly, then
+        # still apply sampling.  We over-pull from disk deliberately.
         if self.config.sampling:
             # Generate a series of idx to sample the volume
             # without replacement
@@ -567,7 +579,9 @@ def process_volume(
             # In case too few points are in the sampled data (because the
             # inputs were too few), pad the outputs:
             if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample:
-                raise ValueError("Sampled points is more than points in the volume mesh")
+                raise ValueError(
+                    "Sampled points is more than points in the volume mesh"
+                )
 
             # Apply the same sampling to the targets, too:
             if volume_fields is not None:
@@ -860,6 +874,10 @@ def set_dataset(self, dataset: Iterable) -> None:
         """
         self.dataset = dataset
 
+        if self.config.volume_sample_from_disk:
+            # We deliberately double the data to read compared to the sampling size:
+            self.dataset.set_volume_sampling_size(2 * self.config.volume_points_sample)
+
     def __len__(self):
         if self.dataset is not None:
             return len(self.dataset)
@@ -1198,6 +1216,7 @@ def create_domino_dataset(
             model_type=model_type,
             bounding_box_dims=cfg.data.bounding_box,
             bounding_box_dims_surf=cfg.data.bounding_box_surface,
+            volume_sample_from_disk=cfg.data.volume_sample_from_disk,
             num_surface_neighbors=cfg.model.num_neighbors_surface,
             surface_sampling_algorithm=cfg.model.surface_sampling_algorithm,
             **overrides,
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index af844b2656..faa62f6ed6 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -78,6 +78,8 @@ def __init__(
         self.keys_to_read = keys_to_read
         self.keys_to_read_if_available = keys_to_read_if_available
 
+        self.volume_sampling_size = None
+
     @abstractmethod
     def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         """
@@ -145,6 +147,45 @@ def _get_slice_boundaries(
 
         return global_chunk_start, global_chunk_stop, chunk_sizes
 
+    def set_volume_sampling_size(self, volume_sampling_size: int):
+        """
+        Set the volume sampling size.  When set, the readers will
+        assume the volumetric data is shuffled on disk and read only
+        contiguous chunks of the data up to the sampling size.
+
+
+        Args:
+            volume_sampling_size: The total size of the volume sampling.
+
+        """
+        self.volume_sampling_size = volume_sampling_size
+
+    def select_random_sections_from_slice(
+        self,
+        slice_start: int,
+        slice_stop: int,
+        n_points: int,
+    ) -> slice:
+        """
+
+        select the contiguous chunks of the volume data to read.
+
+        Args:
+            n_volume_points: The number of points to sample from the volume.
+
+        Returns:
+            A tuple of the start and stop indices of the contiguous chunks.
+        """
+
+        if slice_stop - slice_start < n_points:
+            raise ValueError(
+                f"Slice size {slice_stop - slice_start} is less than the number of points {n_points}"
+            )
+
+        # Choose a random start point that will fit the entire n_points region:
+        start = np.random.randint(slice_start, slice_stop - n_points)
+        return slice(start, start + n_points)
+
 
 class NpyFileReader(BackendReader):
     """
@@ -178,6 +219,14 @@ def read_file_sharded(
     ) -> dict[str, ShardTensor]:
         pass
 
+    def set_volume_sampling_size(self, volume_sampling_size: int):
+        """
+        This is not supported for npy files.
+        """
+        raise NotImplementedError(
+            "volume sampling directly from disk is not supported for npy files."
+        )
+
 
 class NpzFileReader(BackendReader):
     """
@@ -202,7 +251,25 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         if len(keys_missing) > 0:
             raise ValueError(f"Keys {keys_missing} not found in file {filename}")
 
-        data = {key: torch.from_numpy(in_data[key][:]) for key in self.keys_to_read}
+        # Make sure to select the slice outside of the loop.
+        if self.volume_sampling_size is not None:
+            volume_slice = self.select_random_sections_from_slice(
+                0,
+                in_data["volume_mesh_centers"].shape[0],
+                self.volume_sampling_size,
+            )
+        else:
+            volume_slice = slice(0, in_data["volume_mesh_centers"].shape[0])
+
+        # This is a slower basic way to do this, to be improved:
+        data = {}
+        for key in self.keys_to_read:
+            if "volume" not in key:
+                data[key] = torch.from_numpy(in_data[key][:])
+            else:
+                data[key] = torch.from_numpy(in_data[key][volume_slice])
+
+        # data = {key: torch.from_numpy(in_data[key][:]) for key in self.keys_to_read}
 
         return self.fill_optional_keys(data)
 
@@ -211,6 +278,14 @@ def read_file_sharded(
     ) -> dict[str, ShardTensor]:
         pass
 
+    def set_volume_sampling_size(self, volume_sampling_size: int):
+        """
+        This is not supported for npz files.
+        """
+        raise NotImplementedError(
+            "volume sampling directly from disk is not supported for npz files."
+        )
+
 
 class ZarrFileReader(BackendReader):
     """
@@ -235,8 +310,23 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         if len(missing_keys) > 0:
             raise ValueError(f"Keys {missing_keys} not found in file {filename}")
 
+        # Make sure to select the slice outside of the loop.
+        if self.volume_sampling_size is not None:
+            volume_slice = self.select_random_sections_from_slice(
+                0,
+                group["volume_mesh_centers"].shape[0],
+                self.volume_sampling_size,
+            )
+        else:
+            volume_slice = slice(0, group["volume_mesh_centers"].shape[0])
+
         # This is a slower basic way to do this, to be improved:
-        data = {key: torch.from_numpy(group[key][:]) for key in self.keys_to_read}
+        data = {}
+        for key in self.keys_to_read:
+            if "volume" not in key:
+                data[key] = torch.from_numpy(group[key][:])
+            else:
+                data[key] = torch.from_numpy(group[key][volume_slice])
 
         return self.fill_optional_keys(data)
 
@@ -436,6 +526,14 @@ def read_data_from_vtp(self, vtp_path: str) -> dict:
 
             raise NotImplementedError("Not implemented yet.")
 
+        def set_volume_sampling_size(self, volume_sampling_size: int):
+            """
+            This is not supported for vtk files.
+            """
+            raise NotImplementedError(
+                "volume sampling directly from disk is not supported for vtk files."
+            )
+
 
 if TENSORSTORE_AVAILABLE:
 
@@ -452,7 +550,7 @@ def __init__(
             super().__init__(keys_to_read, keys_to_read_if_available)
 
             self.spec_template = {
-                "driver": "zarr2",
+                "driver": "auto",
                 "kvstore": {
                     "driver": "file",
                     "path": None,
@@ -463,6 +561,7 @@ def __init__(
                 {
                     "cache_pool": {"total_bytes_limit": 10_000_000},
                     "data_copy_concurrency": {"limit": 72},
+                    "file_io_concurrency": {"limit": 72},
                 }
             )
 
@@ -486,16 +585,31 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
                 key: read_futures[key].result() for key in read_futures.keys()
             }
 
+            # Make sure to select the slice outside of the loop.
+            # We need
+            if self.volume_sampling_size is not None:
+                volume_slice = self.select_random_sections_from_slice(
+                    0,
+                    read_futures["volume_mesh_centers"].shape[0],
+                    self.volume_sampling_size,
+                )
+            else:
+                volume_slice = slice(0, read_futures["volume_mesh_centers"].shape[0])
+
             # Trigger an async read of each data item:
             # (Each item will be a numpy ndarray after this:)
-            read_futures = {
-                key: read_futures[key].read() for key in read_futures.keys()
-            }
+            tensor_futures = {}
+            for key in self.keys_to_read:
+                if "volume" not in key:
+                    tensor_futures[key] = read_futures[key].read()
+                # For the volume data, read the slice:
+                else:
+                    tensor_futures[key] = read_futures[key][volume_slice].read()
 
             # Convert them to torch tensors:
             # (make sure to block for the result)
             data = {
-                key: torch.as_tensor(read_futures[key].result(), dtype=torch.float32)
+                key: torch.as_tensor(tensor_futures[key].result(), dtype=torch.float32)
                 for key in self.keys_to_read
             }
 
@@ -844,35 +958,6 @@ def _convert_to_shard_tensors(
 
         return result
 
-        # result = {}
-
-        # for key, tensor in tensors.items():
-        #     # Create a ShardTensor with whatever layout the data is actually in:
-        #     st = ShardTensor.__new__(
-        #         ShardTensor,
-        #         local_tensor=tensor,
-        #         spec=self.tensor_specs[key],
-        #         requires_grad=False,  # By default, the data pipe output doesn't need a grad.
-        #     )
-
-        #     # Find out the desired placement:
-        #     if tensor.numel() > 1:
-        #         if isinstance(self.placements, dict):
-        #             target_placement = self.placements[key]
-        #         else:
-        #             target_placement = self.placements
-        #     else:
-        #         target_placement = (Replicate(),)
-
-        #     # Redistribute if necessary:
-        #     # (Recall that this is one dimensional mesh only)
-        #     if st._spec.placements[0] != target_placement[0]:
-        #         st = st.redistribute(placements=target_placement)
-
-        #     result[key] = st
-
-        # return result
-
     def preload(self, idx: int) -> None:
         """
         Asynchronously preload the data for the given index (up to CPU, not GPU).
@@ -1013,6 +1098,17 @@ def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]:
 
         return data
 
+    def set_volume_sampling_size(self, volume_sampling_size: int):
+        """
+        Set the volume sampling size.  When set, the readers will
+        assume the volumetric data is shuffled on disk and read only
+        contiguous chunks of the data up to the sampling size.
+
+        Args:
+            volume_sampling_size: The total size of the volume sampling.
+        """
+        self.file_reader.set_volume_sampling_size(volume_sampling_size)
+
 
 def compute_mean_std_min_max(
     dataset: DrivaerMLDataset, field_keys: list[str], max_samples: int = 20
diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index fc3af36334..8b7a982142 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -679,7 +679,7 @@ def create_grid(
     ]
 
     # Combine them with meshgrid:
-    xv, yv, zv = torch.meshgrid(*dd)
+    xv, yv, zv = torch.meshgrid(*dd, indexing="ij")
 
     xv = xv.unsqueeze(-1)
     yv = yv.unsqueeze(-1)

From 4c67de6d6a88bed33bbb44d5bb3d30d0c861c4e3 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 7 Oct 2025 19:19:46 +0000
Subject: [PATCH 67/98] Update scaling factor calculation and loading ... much
 simpler now.

---
 .../domino/src/benchmark_dataloader.py        | 34 +++--------
 .../domino/src/conf/config.yaml               |  3 +-
 .../domino/src/inference_on_stl.py            | 13 +---
 .../external_aerodynamics/domino/src/train.py | 22 ++-----
 .../external_aerodynamics/domino/src/utils.py | 59 ++++++++++++++++++-
 5 files changed, 75 insertions(+), 56 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index 345f78fa2c..e3f24a5fff 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -70,10 +70,12 @@
 from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
 import time
 
-from utils import ScalingFactors, get_keys_to_read, coordinate_distributed_environment
-
-# Initialize NVML
-nvmlInit()
+from utils import (
+    ScalingFactors,
+    get_keys_to_read,
+    coordinate_distributed_environment,
+    load_scaling_factors,
+)
 
 
 from physicsnemo.utils.profiling import profile, Profiler
@@ -96,7 +98,7 @@ def benchmark_io_epoch(
     start_time = time.perf_counter()
     for i_batch, sample_batched in enumerate(dataloader):
         # for key in sample_batched.keys():
-        #     print(f"{key}: {sample_batched[key].shape}")
+        #     print(f"Key {key} shape: {sample_batched[key].shape} with mean {sample_batched[key].mean()} and std {sample_batched[key].std()} ")
 
         # Gather data and report
         elapsed_time = time.perf_counter() - start_time
@@ -136,19 +138,7 @@ def main(cfg: DictConfig) -> None:
     ################################
     # Get scaling factors
     ################################
-    pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl"
-
-    try:
-        scaling_factors = ScalingFactors.load(pickle_path)
-        logger.info(f"Scaling factors loaded from: {pickle_path}")
-    except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
-        )
-
-    vol_factors = scaling_factors.mean["volume_fields"]
-    surf_factors = scaling_factors.mean["surface_fields"]
-    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
+    vol_factors, surf_factors = load_scaling_factors(cfg)
 
     keys_to_read, keys_to_read_if_available = get_keys_to_read(
         cfg, model_type, get_ground_truth=True
@@ -170,18 +160,12 @@ def main(cfg: DictConfig) -> None:
         train_dataset, num_replicas=data_mesh.size(), rank=data_mesh.get_local_rank()
     )
 
-    # train_dataloader = DataLoader(
-    #     train_dataset,
-    #     sampler=train_sampler,
-    #     **cfg.train.dataloader,
-    # )
-
     for epoch in range(0, cfg.train.epochs):
         start_time = time.perf_counter()
         logger.info(f"Device {dist.device}, epoch {epoch}:")
 
         train_sampler.set_epoch(epoch)
-        print(f"indices: {list(train_sampler)}")
+
         train_dataset.dataset.set_indices(list(train_sampler))
 
         epoch_start_time = time.perf_counter()
diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index 26870b461f..9a404ff240 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -82,9 +82,10 @@ data: # Input directory for training and validation data
   gpu_preprocessing: true
   gpu_output: true
   normalize_coordinates: true
-  sample_in_bbox: true
+  sample_in_bbox: True
   sampling: true
   scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl
+  volume_sample_from_disk: true
 
 # ┌───────────────────────────────────────────┐
 # │          Domain Parallelism Settings      │
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index bee8c1cd2f..9707a7e6d1 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -486,18 +486,7 @@ def main(cfg: DictConfig) -> None:
     # Get scaling factors
     # Likely, you want to reuse the scaling factors from training.
     ######################################################
-    pickle_path = os.path.join(cfg.data.scaling_factors)
-
-    try:
-        scaling_factors = ScalingFactors.load(pickle_path)
-        logger.info(f"Scaling factors loaded from: {pickle_path}")
-    except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
-        )
-
-    vol_factors = scaling_factors.mean["volume_fields"]
-    surf_factors = scaling_factors.mean["surface_fields"]
+    vol_factors, surf_factors = load_scaling_factors(cfg)
 
     ######################################################
     # Configure the model
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 4bf52bfb2e..954114ae46 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -78,7 +78,7 @@
 
 
 from loss import compute_loss_dict
-from utils import get_num_vars
+from utils import get_num_vars, load_scaling_factors
 
 
 def validation_step(
@@ -275,19 +275,7 @@ def main(cfg: DictConfig) -> None:
     ######################################################
     # Get scaling factors - precompute them if this fails!
     ######################################################
-    pickle_path = os.path.join(cfg.data.scaling_factors)
-
-    try:
-        scaling_factors = ScalingFactors.load(pickle_path)
-        logger.info(f"Scaling factors loaded from: {pickle_path}")
-    except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
-        )
-
-    vol_factors = np.asarray([scaling_factors.max_val["volume_fields"], scaling_factors.min_val["volume_fields"]])
-    surf_factors = np.asarray([scaling_factors.max_val["surface_fields"], scaling_factors.min_val["surface_fields"]])
-    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
+    vol_factors, surf_factors = load_scaling_factors(cfg)
 
     ######################################################
     # Configure the model
@@ -334,7 +322,7 @@ def main(cfg: DictConfig) -> None:
             torch.from_numpy(
                 np.stack([bounding_box["max"], bounding_box["min"]], axis=0)
             )
-            .to(vol_factors_tensor.dtype)
+            .to(vol_factors.dtype)
             .to(dist.device)
         )
 
@@ -529,7 +517,7 @@ def main(cfg: DictConfig) -> None:
             first_deriv=first_deriv,
             eqn=eqn,
             bounding_box=bounding_box,
-            vol_factors=vol_factors_tensor,
+            vol_factors=vol_factors,
             add_physics_loss=add_physics_loss,
         )
         epoch_end_time = time.perf_counter()
@@ -553,7 +541,7 @@ def main(cfg: DictConfig) -> None:
             first_deriv=first_deriv,
             eqn=eqn,
             bounding_box=bounding_box,
-            vol_factors=vol_factors_tensor,
+            vol_factors=vol_factors,
             add_physics_loss=add_physics_loss,
         )
 
diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py
index 12259641e3..4cc04d0d3c 100644
--- a/examples/cfd/external_aerodynamics/domino/src/utils.py
+++ b/examples/cfd/external_aerodynamics/domino/src/utils.py
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+
 from dataclasses import dataclass
 from typing import Dict, Optional, Any
 import numpy as np
@@ -160,7 +162,8 @@ def coordinate_distributed_environment(cfg: DictConfig):
         placements: dict[str, torch.distributed.tensor.Placement]: The placements for the data set
     """
 
-    DistributedManager.initialize()
+    if not DistributedManager.is_initialized():
+        DistributedManager.initialize()
     dist = DistributedManager()
 
     # Default to no domain parallelism:
@@ -287,3 +290,57 @@ def summary(self) -> str:
             summary.append(f"  Max: {max_val}")
 
         return "\n".join(summary)
+
+
+def load_scaling_factors(
+    cfg: DictConfig, logger=None
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Load scaling factors from the configuration."""
+    pickle_path = os.path.join(cfg.data.scaling_factors)
+
+    try:
+        scaling_factors = ScalingFactors.load(pickle_path)
+        if logger is not None:
+            logger.info(f"Scaling factors loaded from: {pickle_path}")
+    except FileNotFoundError:
+        raise FileNotFoundError(
+            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
+        )
+
+    if cfg.model.normalization == "min_max_scaling":
+        vol_factors = np.asarray(
+            [
+                scaling_factors.max_val["volume_fields"],
+                scaling_factors.min_val["volume_fields"],
+            ]
+        )
+        surf_factors = np.asarray(
+            [
+                scaling_factors.max_val["surface_fields"],
+                scaling_factors.min_val["surface_fields"],
+            ]
+        )
+    elif cfg.model.normalization == "mean_std_scaling":
+        vol_factors = np.asarray(
+            [
+                scaling_factors.mean["volume_fields"],
+                scaling_factors.std["volume_fields"],
+            ]
+        )
+        surf_factors = np.asarray(
+            [
+                scaling_factors.mean["surface_fields"],
+                scaling_factors.std["surface_fields"],
+            ]
+        )
+    else:
+        raise ValueError(f"Invalid normalization mode: {cfg.model.normalization}")
+
+    vol_factors_tensor = torch.from_numpy(vol_factors)
+    surf_factors_tensor = torch.from_numpy(surf_factors)
+
+    dm = DistributedManager()
+    vol_factors_tensor = vol_factors_tensor.to(dm.device, dtype=torch.float32)
+    surf_factors_tensor = surf_factors_tensor.to(dm.device, dtype=torch.float32)
+
+    return vol_factors_tensor, surf_factors_tensor

From 316dfe61175b5f06605d9a1143c7d8f6190e5a74 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 8 Oct 2025 18:16:03 +0000
Subject: [PATCH 68/98] Fix volume encoding calculation.  Make sure surface
 grid is normalized

---
 physicsnemo/datapipes/cae/domino_datapipe.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 9ee479223d..d72c0193f3 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -637,7 +637,7 @@ def process_volume(
 
         # Use the closest point from the mesh to compute the volume encodings:
         pos_normals_closest_vol, pos_normals_com_vol = self.calculate_volume_encoding(
-            c_min, c_max, volume_coordinates, sdf_node_closest_point, center_of_mass
+            volume_coordinates, sdf_node_closest_point, center_of_mass
         )
 
         return_dict = {
@@ -656,17 +656,10 @@ def process_volume(
 
     def calculate_volume_encoding(
         self,
-        c_min: torch.Tensor,
-        c_max: torch.Tensor,
         volume_coordinates: torch.Tensor,
         sdf_node_closest_point: torch.Tensor,
         center_of_mass: torch.Tensor,
     ):
-        if self.config.normalize_coordinates:
-            # volume_coordinates = normalize(volume_coordinates, c_max, c_min)
-            sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
-            # center_of_mass = normalize(center_of_mass, c_max, c_min)
-
         pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point
         pos_normals_com_vol = volume_coordinates - center_of_mass
 
@@ -730,8 +723,6 @@ def process_data(self, data_dict):
                 requires_grad=False,
             )
 
-        return_dict["surf_grid"] = surf_grid
-
         # We always need to calculate the SDF on the surface grid:
         # This is for the SDF Later:
         if self.config.normalize_coordinates:
@@ -740,6 +731,8 @@ def process_data(self, data_dict):
         else:
             normed_vertices = data_dict["stl_coordinates"]
 
+        return_dict["surf_grid"] = surf_grid
+
         # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
         mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
 

From cc9a56630dc68061aa768ecd6c368a63040b0f70 Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Wed, 8 Oct 2025 11:13:42 -0700
Subject: [PATCH 69/98] fixing bugs and refactoring test

---
 .../domino/src/compute_statistics.py          |   1 +
 .../domino/src/conf/config.yaml               |   9 +-
 .../external_aerodynamics/domino/src/test.py  | 298 +++++++++---------
 .../external_aerodynamics/domino/src/train.py |   4 +
 physicsnemo/datapipes/cae/domino_datapipe.py  |  16 +-
 .../datapipes/cae/drivaer_ml_dataset.py       |   4 +-
 6 files changed, 175 insertions(+), 157 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
index d3516dff0f..991105492e 100644
--- a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
+++ b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py
@@ -104,6 +104,7 @@ def main(cfg: DictConfig) -> None:
             cfg=cfg,
             input_path=cfg.data.input_dir,
             target_keys=target_keys,
+            max_samples=cfg.data.max_samples_for_statistics,
         )
         mean = {k: m.cpu().numpy() for k, m in mean.items()}
         std = {k: s.cpu().numpy() for k, s in std.items()}
diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index 9a404ff240..13c00823b8 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -62,10 +62,10 @@ variables:
   global_parameters:
     inlet_velocity:
       type: vector
-      reference: [38.89] # vector [30, 0, 0] should be specified as [30], while [30, 30, 0] should be [30, 30].
+      reference: [30.00] # vector [30, 0, 0] should be specified as [30], while [30, 30, 0] should be [30, 30].
     air_density:
       type: scalar
-      reference: 1.0
+      reference: 1.205
 
 # ┌───────────────────────────────────────────┐
 # │         Data Configs                      │
@@ -86,6 +86,7 @@ data: # Input directory for training and validation data
   sampling: true
   scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl
   volume_sample_from_disk: true
+  max_samples_for_statistics: 200
 
 # ┌───────────────────────────────────────────┐
 # │          Domain Parallelism Settings      │
@@ -175,7 +176,7 @@ model:
 # └───────────────────────────────────────────┘  
 train: # Training configurable parameters
   epochs: 1000
-  checkpoint_interval: 50
+  checkpoint_interval: 2
   dataloader:
     batch_size: 1
     preload_depth: 1
@@ -206,7 +207,7 @@ eval: # Testing configurable parameters
   test_path: /user/testing_data # Dir for testing data in raw format (vtp, vtu ,stls)
   save_path: /user/predicted_data # Dir to save predicted results in raw format (vtp, vtu)
   checkpoint_name: DoMINO.0.455.pt # Name of checkpoint to select from saved checkpoints
-  scaling_param_path: /user/scaling_params
+  scaling_param_path: /lustre/rranade/modulus_dev/corey_fork/physicsnemo/examples/cfd/external_aerodynamics/domino/outputs/DrivAerML_Dataset/
   refine_stl: False # Automatically refine STL during inference
   #TODO -  This was hardcoded anyways, remove it.
   # stencil_size: 7 # Stencil size for evaluating surface and volume model
diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py
index c799e83f64..e5423207a6 100644
--- a/examples/cfd/external_aerodynamics/domino/src/test.py
+++ b/examples/cfd/external_aerodynamics/domino/src/test.py
@@ -54,7 +54,10 @@
 from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataPipe
 from physicsnemo.models.domino.model import DoMINO
 from physicsnemo.utils.domino.utils import *
+from physicsnemo.utils.domino.vtk_file_utils import *
 from physicsnemo.utils.sdf import signed_distance_field
+from physicsnemo.utils.neighbors import knn
+from utils import ScalingFactors
 
 # AIR_DENSITY = 1.205
 # STREAM_VELOCITY = 30.00
@@ -84,7 +87,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
 
     with torch.no_grad():
         point_batch_size = 256000
-        data_dict = dict_to_device(data_dict, device)
+        # data_dict = dict_to_device(data_dict, device)
 
         # Non-dimensionalization factors
         length_scale = data_dict["length_scale"]
@@ -110,11 +113,14 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
             p_grid = data_dict["grid"]
             sdf_grid = data_dict["sdf_grid"]
             # Scaling factors
-            vol_max = data_dict["volume_min_max"][:, 1]
-            vol_min = data_dict["volume_min_max"][:, 0]
+            if "volume_min_max" in data_dict.keys():
+                vol_max = data_dict["volume_min_max"][:, 1]
+                vol_min = data_dict["volume_min_max"][:, 0]
+                geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
+            else:
+                geo_centers_vol = geo_centers
 
             # Normalize based on computational domain
-            geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
             encoding_g_vol = model.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid)
 
         if output_features_surf is not None:
@@ -147,7 +153,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
             pos_volume_center_of_mass = data_dict["pos_volume_center_of_mass"]
             p_grid = data_dict["grid"]
 
-            prediction_vol = np.zeros_like(target_vol.cpu().numpy())
+            prediction_vol = torch.zeros_like(target_vol)
             num_points = volume_mesh_centers.shape[1]
             subdomain_points = int(np.floor(num_points / point_batch_size))
 
@@ -166,14 +172,13 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                     pos_normals_com_batch = pos_volume_center_of_mass[
                         :, start_idx:end_idx
                     ]
-                    geo_encoding_local = model.geo_encoding_local(
+                    geo_encoding_local = model.volume_local_geo_encodings(
                         0.5 * encoding_g_vol,
                         volume_mesh_centers_batch,
                         p_grid,
-                        mode="volume",
                     )
                     if cfg.model.use_sdf_in_basis_func:
-                        pos_encoding = torch.cat(
+                        pos_encoding_all = torch.cat(
                             (
                                 sdf_nodes_batch,
                                 pos_volume_closest_batch,
@@ -182,38 +187,36 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                             axis=-1,
                         )
                     else:
-                        pos_encoding = pos_normals_com_batch
-                    pos_encoding = model.position_encoder(
-                        pos_encoding, eval_mode="volume"
+                        pos_encoding_all = pos_normals_com_batch
+
+                    pos_encoding = model.fc_p_vol(
+                        pos_encoding_all
                     )
-                    tpredictions_batch = model.calculate_solution(
+                    tpredictions_batch = model.solution_calculator_vol(
                         volume_mesh_centers_batch,
                         geo_encoding_local,
                         pos_encoding,
                         global_params_values,
                         global_params_reference,
-                        num_sample_points=cfg.model.num_neighbors_volume,
-                        eval_mode="volume",
                     )
                     running_tloss_vol += loss_fn(tpredictions_batch, target_batch)
-                    prediction_vol[:, start_idx:end_idx] = (
-                        tpredictions_batch.cpu().numpy()
-                    )
+                    prediction_vol[:, start_idx:end_idx] = tpredictions_batch
 
             prediction_vol = unnormalize(prediction_vol, vol_factors[0], vol_factors[1])
+            # print(np.amax(prediction_vol, axis=(0, 1)), np.amin(prediction_vol, axis=(0, 1)))
 
             prediction_vol[:, :, :3] = (
-                prediction_vol[:, :, :3] * stream_velocity[0, 0].cpu().numpy()
+                prediction_vol[:, :, :3] * stream_velocity[0, 0]
             )
             prediction_vol[:, :, 3] = (
                 prediction_vol[:, :, 3]
-                * stream_velocity[0, 0].cpu().numpy() ** 2.0
-                * air_density[0, 0].cpu().numpy()
+                * stream_velocity[0, 0] ** 2.0
+                * air_density[0, 0]
             )
             prediction_vol[:, :, 4] = (
                 prediction_vol[:, :, 4]
-                * stream_velocity[0, 0].cpu().numpy()
-                * length_scale[0].cpu().numpy()
+                * stream_velocity[0, 0]
+                * length_scale[0]
             )
         else:
             prediction_vol = None
@@ -236,7 +239,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
             subdomain_points = int(np.floor(num_points / point_batch_size))
 
             target_surf = data_dict["surface_fields"]
-            prediction_surf = np.zeros_like(target_surf.cpu().numpy())
+            prediction_surf = torch.zeros_like(target_surf)
 
             start_time = time.time()
 
@@ -262,18 +265,16 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                     pos_surface_center_of_mass_batch = pos_surface_center_of_mass[
                         :, start_idx:end_idx
                     ]
-                    geo_encoding_local = model.geo_encoding_local(
+                    geo_encoding_local = model.surface_local_geo_encodings(
                         0.5 * encoding_g_surf,
                         surface_mesh_centers_batch,
                         s_grid,
-                        mode="surface",
                     )
-                    pos_encoding = pos_surface_center_of_mass_batch
-                    pos_encoding = model.position_encoder(
-                        pos_encoding, eval_mode="surface"
+                    pos_encoding = model.fc_p_surf(
+                        pos_surface_center_of_mass_batch
                     )
 
-                    tpredictions_batch = model.calculate_solution_with_neighbors(
+                    tpredictions_batch = model.solution_calculator_surf(
                         surface_mesh_centers_batch,
                         geo_encoding_local,
                         pos_encoding,
@@ -284,20 +285,16 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                         surface_neighbors_areas_batch,
                         global_params_values,
                         global_params_reference,
-                        num_sample_points=cfg.model.num_neighbors_surface,
                     )
 
                     running_tloss_surf += loss_fn(tpredictions_batch, target_batch)
-                    prediction_surf[:, start_idx:end_idx] = (
-                        tpredictions_batch.cpu().numpy()
-                    )
+                    prediction_surf[:, start_idx:end_idx] = tpredictions_batch
 
             prediction_surf = (
                 unnormalize(prediction_surf, surf_factors[0], surf_factors[1])
-                * stream_velocity[0, 0].cpu().numpy() ** 2.0
-                * air_density[0, 0].cpu().numpy()
+                * stream_velocity[0, 0] ** 2.0
+                * air_density[0, 0]
             )
-
         else:
             prediction_surf = None
 
@@ -346,22 +343,26 @@ def main(cfg: DictConfig):
         else:
             global_features += 1
 
-    vol_save_path = os.path.join(
-        cfg.eval.scaling_param_path, "volume_scaling_factors.npy"
-    )
-    surf_save_path = os.path.join(
-        cfg.eval.scaling_param_path, "surface_scaling_factors.npy"
-    )
-    if os.path.exists(vol_save_path):
-        vol_factors = np.load(vol_save_path)
-    else:
-        vol_factors = None
+    ######################################################
+    # Get scaling factors - precompute them if this fails!
+    ######################################################
+    pickle_path = os.path.join(cfg.data.scaling_factors)
 
-    if os.path.exists(surf_save_path):
-        surf_factors = np.load(surf_save_path)
-    else:
-        surf_factors = None
+    try:
+        scaling_factors = ScalingFactors.load(pickle_path)
+    except FileNotFoundError:
+        raise FileNotFoundError(
+            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
+        )
 
+    # vol_factors = np.asarray([scaling_factors.max_val["volume_fields"], scaling_factors.min_val["volume_fields"]])
+    # surf_factors = np.asarray([scaling_factors.max_val["surface_fields"], scaling_factors.min_val["surface_fields"]])
+    
+    vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]])
+    surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]])
+
+    vol_factors = torch.from_numpy(vol_factors).to(dist.device)
+    surf_factors = torch.from_numpy(surf_factors).to(dist.device)
     print("Vol factors:", vol_factors)
     print("Surf factors:", surf_factors)
 
@@ -434,6 +435,12 @@ def main(cfg: DictConfig):
         stl_sizes = np.array(stl_sizes.cell_data["Area"], dtype=np.float32)
         stl_centers = np.array(mesh_stl.cell_centers().points, dtype=np.float32)
 
+        # Convert to torch tensors and load on device
+        stl_vertices = torch.from_numpy(stl_vertices).to(torch.float32).to(dist.device)
+        stl_sizes = torch.from_numpy(stl_sizes).to(torch.float32).to(dist.device)
+        stl_centers = torch.from_numpy(stl_centers).to(torch.float32).to(dist.device)
+        mesh_indices_flattened = torch.from_numpy(mesh_indices_flattened).to(torch.int32).to(dist.device)
+
         # Center of mass calculation
         center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes)
 
@@ -446,27 +453,28 @@ def main(cfg: DictConfig):
             bounding_box_dims_surf.append(np.asarray(cfg.data.bounding_box_surface.min))
             s_max = np.float32(bounding_box_dims_surf[0])
             s_min = np.float32(bounding_box_dims_surf[1])
+            s_max = torch.from_numpy(s_max).to(torch.float32).to(dist.device)
+            s_min = torch.from_numpy(s_min).to(torch.float32).to(dist.device)
 
         nx, ny, nz = cfg.model.interp_res
 
-        surf_grid = create_grid(s_max, s_min, [nx, ny, nz])
-        surf_grid_reshaped = surf_grid.reshape(nx * ny * nz, 3)
+        surf_grid = create_grid(s_max, s_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device))
+
+        normed_stl_vertices_cp = normalize(stl_vertices, s_max, s_min)
+        surf_grid_normed = normalize(surf_grid, s_max, s_min)
 
         # SDF calculation on the grid using WARP
+        time_start = time.time()
         sdf_surf_grid = signed_distance_field(
-            cp.asarray(stl_vertices).astype(cp.float32),
-            cp.asarray(mesh_indices_flattened).astype(cp.int32),
-            cp.asarray(surf_grid_reshaped).astype(cp.float32),
+            normed_stl_vertices_cp,
+            mesh_indices_flattened,
+            surf_grid_normed,
             use_sign_winding_number=True,
-            return_cupy=False,
-        ).reshape(nx, ny, nz)
-
-        surf_grid = np.float32(surf_grid)
-        sdf_surf_grid = np.float32(sdf_surf_grid)
-        surf_grid_max_min = np.float32(np.asarray([s_min, s_max]))
-        if cfg.model.normalize_coordinates:
-            sdf_surf_grid = normalize(sdf_surf_grid, xp.amax(surf_grid), xp.amin(surf_grid))
-
+        )
+        sdf_surf_grid = sdf_surf_grid[0]
+        
+        surf_grid_max_min = torch.stack([s_min, s_max])
+        
         # Get global parameters and global parameters scaling from config.yaml
         global_params_names = list(cfg.variables.global_parameters.keys())
         global_params_reference = {
@@ -494,6 +502,7 @@ def main(cfg: DictConfig):
         global_params_reference = np.array(
             global_params_reference_list, dtype=np.float32
         )
+        global_params_reference = torch.from_numpy(global_params_reference).to(dist.device)
 
         # Define the list of global parameter values for each simulation.
         # Note: The user must ensure that the values provided here correspond to the
@@ -509,7 +518,10 @@ def main(cfg: DictConfig):
                 raise ValueError(
                     f"Global parameter {key} not supported for  this dataset"
                 )
-        global_params_values = np.array(global_params_values_list, dtype=np.float32)
+        global_params_values_list = np.array(
+            global_params_values_list, dtype=np.float32
+        )
+        global_params_values = torch.from_numpy(global_params_values_list).to(dist.device)
 
         # Read VTP
         if model_type == "surface" or model_type == "combined":
@@ -537,18 +549,17 @@ def main(cfg: DictConfig):
             surface_normals = (
                 surface_normals / np.linalg.norm(surface_normals, axis=1)[:, np.newaxis]
             )
-
-            if cfg.model.normalize_coordinates:
-                surface_coordinates = normalize(surface_coordinates, s_max, s_min)
-                surf_grid = normalize(surf_grid, s_max, s_min)
-                center_of_mass_normalized = normalize(center_of_mass, s_max, s_min)
-            else:
-                center_of_mass_normalized = center_of_mass
+            surface_coordinates = torch.from_numpy(surface_coordinates).to(torch.float32).to(dist.device)
+            surface_normals = torch.from_numpy(surface_normals).to(torch.float32).to(dist.device)
+            surface_sizes = torch.from_numpy(surface_sizes).to(torch.float32).to(dist.device)
 
             if cfg.model.num_neighbors_surface > 1:
-                interp_func = KDTree(surface_coordinates)
-                dd, ii = interp_func.query(
-                    surface_coordinates, k=cfg.model.num_neighbors_surface
+
+                time_start = time.time()
+                ii, dd = knn(
+                    points=surface_coordinates,
+                    queries=surface_coordinates,
+                    k=cfg.model.num_neighbors_surface,
                 )
 
                 surface_neighbors = surface_coordinates[ii]
@@ -563,12 +574,15 @@ def main(cfg: DictConfig):
                 surface_neighbors_normals = surface_normals
                 surface_neighbors_sizes = surface_sizes
 
-            
+            if cfg.data.normalize_coordinates:
+                surface_coordinates = normalize(surface_coordinates, s_max, s_min)
+                surf_grid = normalize(surf_grid, s_max, s_min)
+                center_of_mass_normalized = normalize(center_of_mass, s_max, s_min)
+                surface_neighbors = normalize(surface_neighbors, s_max, s_min)
+            else:
+                center_of_mass_normalized = center_of_mass
             pos_surface_center_of_mass = surface_coordinates - center_of_mass_normalized
 
-            # surface_coordinates = normalize(surface_coordinates, s_max, s_min)
-            # surface_neighbors = normalize(surface_neighbors, s_max, s_min)
-
         else:
             surface_coordinates = None
             surface_fields = None
@@ -589,13 +603,13 @@ def main(cfg: DictConfig):
                 polydata_vol, volume_variable_names
             )
             volume_fields = np.concatenate(volume_fields, axis=-1)
+            volume_coordinates = torch.from_numpy(volume_coordinates).to(torch.float32).to(dist.device)
+            volume_fields = torch.from_numpy(volume_fields).to(torch.float32).to(dist.device)
 
             bounding_box_dims = []
             bounding_box_dims.append(np.asarray(cfg.data.bounding_box.max))
             bounding_box_dims.append(np.asarray(cfg.data.bounding_box.min))
 
-            v_max = np.amax(volume_coordinates, 0)
-            v_min = np.amin(volume_coordinates, 0)
             if bounding_box_dims is None:
                 c_max = s_max + (s_max - s_min) / 2
                 c_min = s_min - (s_max - s_min) / 2
@@ -603,46 +617,50 @@ def main(cfg: DictConfig):
             else:
                 c_max = np.float32(bounding_box_dims[0])
                 c_min = np.float32(bounding_box_dims[1])
+                c_max = torch.from_numpy(c_max).to(dist.device)
+                c_min = torch.from_numpy(c_min).to(dist.device)
 
             # Generate a grid of specified resolution to map the bounding box
             # The grid is used for capturing structured geometry features and SDF representation of geometry
-            grid = create_grid(c_max, c_min, [nx, ny, nz])
-            grid_reshaped = grid.reshape(nx * ny * nz, 3)
+            grid = create_grid(c_max, c_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device))
+
+            if cfg.data.normalize_coordinates:
+                volume_coordinates = normalize(volume_coordinates, c_max, c_min)
+                grid = normalize(grid, c_max, c_min)
+                center_of_mass_normalized = normalize(center_of_mass, c_max, c_min)
+                normed_stl_vertices_vol = normalize(stl_vertices, c_max, c_min)
+            else:
+                center_of_mass_normalized = center_of_mass
 
             # SDF calculation on the grid using WARP
+            time_start = time.time()
             sdf_grid = signed_distance_field(
-                cp.asarray(stl_vertices).astype(cp.float32),
-                cp.asarray(mesh_indices_flattened).astype(cp.int32),
-                cp.asarray(grid_reshaped).astype(cp.float32),
+                normed_stl_vertices_vol,
+                mesh_indices_flattened,
+                grid,
                 use_sign_winding_number=True,
-                return_cupy=False,
-            ).reshape(nx, ny, nz)
-
+            )
+            sdf_grid = sdf_grid[0]
+            
             # SDF calculation
+            time_start = time.time()
             sdf_nodes, sdf_node_closest_point = signed_distance_field(
-                cp.asarray(stl_vertices).astype(cp.float32),
-                cp.asarray(mesh_indices_flattened).astype(cp.int32),
-                cp.asarray(volume_coordinates).astype(cp.float32),
-                include_hit_points=True,
+                normed_stl_vertices_vol,
+                mesh_indices_flattened,
+                volume_coordinates,
                 use_sign_winding_number=True,
-                return_cupy=False,
             )
             sdf_nodes = sdf_nodes.reshape(-1, 1)
             vol_grid_max_min = np.asarray([c_min, c_max])
 
-            if cfg.model.normalize_coordinates:
-                volume_coordinates = normalize(volume_coordinates, c_max, c_min)
-                grid = normalize(grid, c_max, c_min)
-                center_of_mass_normalized = normalize(center_of_mass, c_max, c_min)
-                sdf_grid = normalize(sdf_grid, xp.amax(grid), xp.amin(grid))
-                sdf_nodes = normalize(sdf_nodes, xp.amax(grid), xp.amin(grid))
+            if cfg.data.normalize_coordinates:
                 sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
             else:
-                center_of_mass_normalized = center_of_mass
+                sdf_node_closest_point = sdf_node_closest_point
 
             pos_volume_closest = volume_coordinates - sdf_node_closest_point
             pos_volume_center_of_mass = volume_coordinates - center_of_mass_normalized
-
+            
         else:
             volume_coordinates = None
             volume_fields = None
@@ -651,7 +669,8 @@ def main(cfg: DictConfig):
 
         # print(f"Processed sdf and normalized")
 
-        geom_centers = np.float32(stl_vertices)
+        geom_centers = stl_vertices
+        # print(f"Geom centers max: {np.amax(geom_centers, axis=0)}, min: {np.amin(geom_centers, axis=0)}")
 
         if model_type == "combined":
             # Add the parameters to the dictionary
@@ -676,35 +695,27 @@ def main(cfg: DictConfig):
                 "surface_fields": surface_fields,
                 "volume_min_max": vol_grid_max_min,
                 "surface_min_max": surf_grid_max_min,
-                "length_scale": np.array(length_scale, dtype=np.float32),
-                "global_params_values": np.expand_dims(
-                    np.array(global_params_values, dtype=np.float32), -1
-                ),
-                "global_params_reference": np.expand_dims(
-                    np.array(global_params_reference, dtype=np.float32), -1
-                ),
+                "length_scale": length_scale,
+                "global_params_values": torch.unsqueeze(global_params_values, -1),
+                "global_params_reference": torch.unsqueeze(global_params_reference, -1),
             }
         elif model_type == "surface":
             data_dict = {
-                "pos_surface_center_of_mass": np.float32(pos_surface_center_of_mass),
-                "geometry_coordinates": np.float32(geom_centers),
-                "surf_grid": np.float32(surf_grid),
-                "sdf_surf_grid": np.float32(sdf_surf_grid),
-                "surface_mesh_centers": np.float32(surface_coordinates),
-                "surface_mesh_neighbors": np.float32(surface_neighbors),
-                "surface_normals": np.float32(surface_normals),
-                "surface_neighbors_normals": np.float32(surface_neighbors_normals),
-                "surface_areas": np.float32(surface_sizes),
-                "surface_neighbors_areas": np.float32(surface_neighbors_sizes),
-                "surface_fields": np.float32(surface_fields),
-                "surface_min_max": np.float32(surf_grid_max_min),
-                "length_scale": np.array(length_scale, dtype=np.float32),
-                "global_params_values": np.expand_dims(
-                    np.array(global_params_values, dtype=np.float32), -1
-                ),
-                "global_params_reference": np.expand_dims(
-                    np.array(global_params_reference, dtype=np.float32), -1
-                ),
+                "pos_surface_center_of_mass": pos_surface_center_of_mass,
+                "geometry_coordinates": geom_centers,
+                "surf_grid": surf_grid,
+                "sdf_surf_grid": sdf_surf_grid,
+                "surface_mesh_centers": surface_coordinates,
+                "surface_mesh_neighbors": surface_neighbors,
+                "surface_normals": surface_normals,
+                "surface_neighbors_normals": surface_neighbors_normals,
+                "surface_areas": surface_sizes,
+                "surface_neighbors_areas": surface_neighbors_sizes,
+                "surface_fields": surface_fields,
+                "surface_min_max": surf_grid_max_min,
+                "length_scale": length_scale,
+                "global_params_values": torch.unsqueeze(global_params_values, -1),
+                "global_params_reference": torch.unsqueeze(global_params_reference, -1),
             }
         elif model_type == "volume":
             data_dict = {
@@ -720,17 +731,13 @@ def main(cfg: DictConfig):
                 "volume_mesh_centers": volume_coordinates,
                 "volume_min_max": vol_grid_max_min,
                 "surface_min_max": surf_grid_max_min,
-                "length_scale": np.array(length_scale, dtype=np.float32),
-                "global_params_values": np.expand_dims(
-                    np.array(global_params_values, dtype=np.float32), -1
-                ),
-                "global_params_reference": np.expand_dims(
-                    np.array(global_params_reference, dtype=np.float32), -1
-                ),
+                "length_scale": length_scale,
+                "global_params_values": torch.unsqueeze(global_params_values, -1),
+                "global_params_reference": torch.unsqueeze(global_params_reference, -1),
             }
 
         data_dict = {
-            key: torch.from_numpy(np.expand_dims(np.float32(value), 0))
+            key: torch.unsqueeze(value, 0)
             for key, value in data_dict.items()
         }
 
@@ -807,7 +814,7 @@ def main(cfg: DictConfig):
             prediction_vol = prediction_vol[0]
             c_min = vol_grid_max_min[0]
             c_max = vol_grid_max_min[1]
-            volume_coordinates = unnormalize(volume_coordinates, c_max, c_min)
+            volume_coordinates = unnormalize_np(volume_coordinates, c_max, c_min)
             ids_in_bbox = np.where(
                 (volume_coordinates[:, 0] < c_min[0])
                 | (volume_coordinates[:, 0] > c_max[0])
@@ -827,31 +834,32 @@ def main(cfg: DictConfig):
             )
             l2_volume_all.append(np.sqrt(l2_error) / np.sqrt(l2_gt))
 
+        # import pdb; pdb.set_trace()
         if prediction_surf is not None:
-            surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 0:1])
+            surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 0:1].cpu().numpy())
             surfParam_vtk.SetName(f"{surface_variable_names[0]}Pred")
             celldata_all.GetCellData().AddArray(surfParam_vtk)
 
-            surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 1:])
+            surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 1:].cpu().numpy())
             surfParam_vtk.SetName(f"{surface_variable_names[1]}Pred")
             celldata_all.GetCellData().AddArray(surfParam_vtk)
 
-            write_to_vtp(celldata_all, vtp_pred_save_path)
+            # write_to_vtp(celldata_all, vtp_pred_save_path)
 
         if prediction_vol is not None:
-            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 0:3])
+            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 0:3].cpu().numpy())
             volParam_vtk.SetName(f"{volume_variable_names[0]}Pred")
             polydata_vol.GetPointData().AddArray(volParam_vtk)
 
-            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 3:4])
+            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 3:4].cpu().numpy())
             volParam_vtk.SetName(f"{volume_variable_names[1]}Pred")
             polydata_vol.GetPointData().AddArray(volParam_vtk)
 
-            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 4:5])
+            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 4:5].cpu().numpy())
             volParam_vtk.SetName(f"{volume_variable_names[2]}Pred")
             polydata_vol.GetPointData().AddArray(volParam_vtk)
 
-            write_to_vtu(polydata_vol, vtu_pred_save_path)
+            # write_to_vtu(polydata_vol, vtu_pred_save_path)
 
     l2_surface_all = np.asarray(l2_surface_all)  # num_files, 4
     l2_volume_all = np.asarray(l2_volume_all)  # num_files, 4
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 954114ae46..eb6331331e 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -276,6 +276,10 @@ def main(cfg: DictConfig) -> None:
     # Get scaling factors - precompute them if this fails!
     ######################################################
     vol_factors, surf_factors = load_scaling_factors(cfg)
+    
+    vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]])
+    surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]])
+    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
 
     ######################################################
     # Configure the model
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index d72c0193f3..5902b9d857 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -481,7 +481,7 @@ def process_surface(
                 queries=surface_coordinates,
                 k=self.config.num_surface_neighbors,
             )
-
+            # print(f"Full surface coordinates shape: {full_surface_coordinates.shape}")
             # Pull out the neighbor elements.
             # Note that `neighbor_indices` is the index into the original,
             # full sized tensors (full_surface_coordinates, etc).
@@ -495,10 +495,8 @@ def process_surface(
 
         # Better to normalize everything after the kNN and sampling
         if self.config.normalize_coordinates:
-            # surf_grid = normalize(surf_grid, s_max, s_min)
             surface_coordinates = normalize(surface_coordinates, s_max, s_min)
             surface_neighbors = normalize(surface_neighbors, s_max, s_min)
-            # Make sure to normalize the center of mass for the normals_com_surface calc
             center_of_mass = normalize(center_of_mass, s_max, s_min)
 
         pos_normals_com_surface = surface_coordinates - center_of_mass
@@ -595,13 +593,12 @@ def process_volume(
         if self.config.normalize_coordinates:
             volume_coordinates = normalize(volume_coordinates, c_max, c_min)
             grid = normalize(volume_grid, c_max, c_min)
-            # This is used later in the SDF, apply the same scaling to the mesh
-            # coordinates:
             normed_vertices = normalize(stl_vertices, c_max, c_min)
             center_of_mass = normalize(center_of_mass, c_max, c_min)
         else:
             grid = volume_grid
             normed_vertices = stl_vertices
+            center_of_mass = center_of_mass
 
         ########################################################################
         # Apply scaling to the targets, if desired:
@@ -744,7 +741,8 @@ def process_data(self, data_dict):
             use_sign_winding_number=True,
         )
         return_dict["sdf_surf_grid"] = sdf_surf_grid
-
+        return_dict["surf_grid"] = surf_grid
+        
         # Store this only if normalization is active:
         if self.config.normalize_coordinates:
             return_dict["surface_min_max"] = torch.stack([s_min, s_max])
@@ -810,6 +808,9 @@ def process_data(self, data_dict):
 
             return_dict.update(surface_dict)
 
+        for key, value in return_dict.items():
+            print(f"Key: {key}, Max: {torch.amax(value)}, Min: {torch.amin(value)}")
+        exit()
         return return_dict
 
     def scale_model_targets(
@@ -932,7 +933,7 @@ def __iter__(self):
 
 
 def compute_scaling_factors(
-    cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None
+    cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None, max_samples=20,
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     """
     Using the dataset at the path, compute the mean, std, min, and max of the target keys.
@@ -956,6 +957,7 @@ def compute_scaling_factors(
     mean, std, min_val, max_val = compute_mean_std_min_max(
         dataset,
         field_keys=target_keys,
+        max_samples=max_samples,
     )
 
     return mean, std, min_val, max_val
diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index faa62f6ed6..c4bd4b8590 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -1177,9 +1177,11 @@ def compute_mean_std_min_max(
             # Update min/max
             batch_min = field_data.amin(dim=(0))
             batch_max = field_data.amax(dim=(0))
+            
             min_val[field_key] = torch.minimum(min_val[field_key], batch_min)
-            max_val[field_key] = torch.maximum(max_val[field_key], batch_max)
 
+            max_val[field_key] = torch.maximum(max_val[field_key], batch_max)   
+                     
             # Update running mean and M2 (Welford's algorithm)
             delta = batch_mean - mean[field_key]
             N[field_key] += batch_n  # batch_n should also be torch.int64

From 961d4ba8d83d66af079c95c556848c2f445be650 Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Wed, 8 Oct 2025 11:37:38 -0700
Subject: [PATCH 70/98] remove print command

---
 physicsnemo/datapipes/cae/domino_datapipe.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 5902b9d857..1a8e7e6164 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -808,9 +808,6 @@ def process_data(self, data_dict):
 
             return_dict.update(surface_dict)
 
-        for key, value in return_dict.items():
-            print(f"Key: {key}, Max: {torch.amax(value)}, Min: {torch.amin(value)}")
-        exit()
         return return_dict
 
     def scale_model_targets(

From 14be02f71f5dd7ea0f05cbf19986e63d6a1fa895 Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Wed, 8 Oct 2025 12:24:31 -0700
Subject: [PATCH 71/98] fixing issues in test

---
 examples/cfd/external_aerodynamics/domino/src/test.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py
index e5423207a6..cbdd6ca4ba 100644
--- a/examples/cfd/external_aerodynamics/domino/src/test.py
+++ b/examples/cfd/external_aerodynamics/domino/src/test.py
@@ -430,7 +430,8 @@ def main(cfg: DictConfig):
             :, 1:
         ]  # Assuming triangular elements
         mesh_indices_flattened = stl_faces.flatten()
-        length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0))
+        length_scale = np.array(np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)), dtype=np.float32)
+        length_scale = torch.from_numpy(length_scale).to(torch.float32).to(dist.device)
         stl_sizes = mesh_stl.compute_cell_sizes(length=False, area=True, volume=False)
         stl_sizes = np.array(stl_sizes.cell_data["Area"], dtype=np.float32)
         stl_centers = np.array(mesh_stl.cell_centers().points, dtype=np.float32)
@@ -552,6 +553,7 @@ def main(cfg: DictConfig):
             surface_coordinates = torch.from_numpy(surface_coordinates).to(torch.float32).to(dist.device)
             surface_normals = torch.from_numpy(surface_normals).to(torch.float32).to(dist.device)
             surface_sizes = torch.from_numpy(surface_sizes).to(torch.float32).to(dist.device)
+            surface_fields = torch.from_numpy(surface_fields).to(torch.float32).to(dist.device)
 
             if cfg.model.num_neighbors_surface > 1:
 
@@ -651,7 +653,7 @@ def main(cfg: DictConfig):
                 use_sign_winding_number=True,
             )
             sdf_nodes = sdf_nodes.reshape(-1, 1)
-            vol_grid_max_min = np.asarray([c_min, c_max])
+            vol_grid_max_min = torch.stack([c_min, c_max])
 
             if cfg.data.normalize_coordinates:
                 sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
@@ -844,7 +846,7 @@ def main(cfg: DictConfig):
             surfParam_vtk.SetName(f"{surface_variable_names[1]}Pred")
             celldata_all.GetCellData().AddArray(surfParam_vtk)
 
-            # write_to_vtp(celldata_all, vtp_pred_save_path)
+            write_to_vtp(celldata_all, vtp_pred_save_path)
 
         if prediction_vol is not None:
             volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 0:3].cpu().numpy())
@@ -859,7 +861,7 @@ def main(cfg: DictConfig):
             volParam_vtk.SetName(f"{volume_variable_names[2]}Pred")
             polydata_vol.GetPointData().AddArray(volParam_vtk)
 
-            # write_to_vtu(polydata_vol, vtu_pred_save_path)
+            write_to_vtu(polydata_vol, vtu_pred_save_path)
 
     l2_surface_all = np.asarray(l2_surface_all)  # num_files, 4
     l2_volume_all = np.asarray(l2_volume_all)  # num_files, 4

From eb62dce25d3a79c2ad6323f139d0f6256d0e61af Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Wed, 8 Oct 2025 12:52:41 -0700
Subject: [PATCH 72/98] fixing errors in test.py

---
 .../external_aerodynamics/domino/src/test.py  | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py
index cbdd6ca4ba..93fd16c25c 100644
--- a/examples/cfd/external_aerodynamics/domino/src/test.py
+++ b/examples/cfd/external_aerodynamics/domino/src/test.py
@@ -748,41 +748,41 @@ def main(cfg: DictConfig):
         )
 
         if prediction_surf is not None:
-            surface_sizes = np.expand_dims(surface_sizes, -1)
+            surface_sizes = torch.unsqueeze(surface_sizes, -1)
 
-            pres_x_pred = np.sum(
+            pres_x_pred = torch.sum(
                 prediction_surf[0, :, 0] * surface_normals[:, 0] * surface_sizes[:, 0]
             )
-            shear_x_pred = np.sum(prediction_surf[0, :, 1] * surface_sizes[:, 0])
+            shear_x_pred = torch.sum(prediction_surf[0, :, 1] * surface_sizes[:, 0])
 
-            pres_x_true = np.sum(
+            pres_x_true = torch.sum(
                 surface_fields[:, 0] * surface_normals[:, 0] * surface_sizes[:, 0]
             )
-            shear_x_true = np.sum(surface_fields[:, 1] * surface_sizes[:, 0])
+            shear_x_true = torch.sum(surface_fields[:, 1] * surface_sizes[:, 0])
 
-            force_x_pred = np.sum(
+            force_x_pred = torch.sum(
                 prediction_surf[0, :, 0] * surface_normals[:, 0] * surface_sizes[:, 0]
                 - prediction_surf[0, :, 1] * surface_sizes[:, 0]
             )
-            force_x_true = np.sum(
+            force_x_true = torch.sum(
                 surface_fields[:, 0] * surface_normals[:, 0] * surface_sizes[:, 0]
                 - surface_fields[:, 1] * surface_sizes[:, 0]
             )
 
-            force_y_pred = np.sum(
+            force_y_pred = torch.sum(
                 prediction_surf[0, :, 0] * surface_normals[:, 1] * surface_sizes[:, 0]
                 - prediction_surf[0, :, 2] * surface_sizes[:, 0]
             )
-            force_y_true = np.sum(
+            force_y_true = torch.sum(
                 surface_fields[:, 0] * surface_normals[:, 1] * surface_sizes[:, 0]
                 - surface_fields[:, 2] * surface_sizes[:, 0]
             )
 
-            force_z_pred = np.sum(
+            force_z_pred = torch.sum(
                 prediction_surf[0, :, 0] * surface_normals[:, 2] * surface_sizes[:, 0]
                 - prediction_surf[0, :, 3] * surface_sizes[:, 0]
             )
-            force_z_true = np.sum(
+            force_z_true = torch.sum(
                 surface_fields[:, 0] * surface_normals[:, 2] * surface_sizes[:, 0]
                 - surface_fields[:, 3] * surface_sizes[:, 0]
             )
@@ -801,14 +801,14 @@ def main(cfg: DictConfig):
                 ]
             )
 
-            l2_gt = np.mean(np.square(surface_fields), (0))
-            l2_error = np.mean(np.square(prediction_surf[0] - surface_fields), (0))
-            l2_surface_all.append(np.sqrt(l2_error / l2_gt))
+            l2_gt = torch.mean(torch.square(surface_fields), (0))
+            l2_error = torch.mean(torch.square(prediction_surf[0] - surface_fields), (0))
+            l2_surface_all.append(torch.sqrt(l2_error / l2_gt))
 
             print(
                 "Surface L-2 norm:",
                 dirname,
-                np.sqrt(l2_error) / np.sqrt(l2_gt),
+                torch.sqrt(l2_error) / torch.sqrt(l2_gt),
             )
 
         if prediction_vol is not None:
@@ -816,8 +816,8 @@ def main(cfg: DictConfig):
             prediction_vol = prediction_vol[0]
             c_min = vol_grid_max_min[0]
             c_max = vol_grid_max_min[1]
-            volume_coordinates = unnormalize_np(volume_coordinates, c_max, c_min)
-            ids_in_bbox = np.where(
+            volume_coordinates = unnormalize(volume_coordinates, c_max, c_min)
+            ids_in_bbox = torch.where(
                 (volume_coordinates[:, 0] < c_min[0])
                 | (volume_coordinates[:, 0] > c_max[0])
                 | (volume_coordinates[:, 1] < c_min[1])
@@ -827,14 +827,14 @@ def main(cfg: DictConfig):
             )
             target_vol[ids_in_bbox] = 0.0
             prediction_vol[ids_in_bbox] = 0.0
-            l2_gt = np.mean(np.square(target_vol), (0))
-            l2_error = np.mean(np.square(prediction_vol - target_vol), (0))
+            l2_gt = torch.mean(torch.square(target_vol), (0))
+            l2_error = torch.mean(torch.square(prediction_vol - target_vol), (0))
             print(
                 "Volume L-2 norm:",
                 dirname,
-                np.sqrt(l2_error) / np.sqrt(l2_gt),
+                torch.sqrt(l2_error) / torch.sqrt(l2_gt),
             )
-            l2_volume_all.append(np.sqrt(l2_error) / np.sqrt(l2_gt))
+            l2_volume_all.append(torch.sqrt(l2_error) / torch.sqrt(l2_gt))
 
         # import pdb; pdb.set_trace()
         if prediction_surf is not None:

From bac53655a2a37c413cfbb10497700b2bdac4abcd Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 8 Oct 2025 13:54:50 -0700
Subject: [PATCH 73/98] Update volumetric sub sampling so that it is more
 robust when not reading volume data.

---
 .../datapipes/cae/drivaer_ml_dataset.py       | 55 +++++++++++--------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
index c4bd4b8590..6d46a40879 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
@@ -80,6 +80,8 @@ def __init__(
 
         self.volume_sampling_size = None
 
+        self.is_volumetric = any(["volume" in key for key in self.keys_to_read])
+
     @abstractmethod
     def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
         """
@@ -252,14 +254,15 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
             raise ValueError(f"Keys {keys_missing} not found in file {filename}")
 
         # Make sure to select the slice outside of the loop.
-        if self.volume_sampling_size is not None:
-            volume_slice = self.select_random_sections_from_slice(
-                0,
-                in_data["volume_mesh_centers"].shape[0],
-                self.volume_sampling_size,
-            )
-        else:
-            volume_slice = slice(0, in_data["volume_mesh_centers"].shape[0])
+        if self.is_volumetric:
+            if self.volume_sampling_size is not None:
+                volume_slice = self.select_random_sections_from_slice(
+                    0,
+                    in_data["volume_mesh_centers"].shape[0],
+                    self.volume_sampling_size,
+                )
+            else:
+                volume_slice = slice(0, in_data["volume_mesh_centers"].shape[0])
 
         # This is a slower basic way to do this, to be improved:
         data = {}
@@ -311,14 +314,15 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
             raise ValueError(f"Keys {missing_keys} not found in file {filename}")
 
         # Make sure to select the slice outside of the loop.
-        if self.volume_sampling_size is not None:
-            volume_slice = self.select_random_sections_from_slice(
-                0,
-                group["volume_mesh_centers"].shape[0],
-                self.volume_sampling_size,
-            )
-        else:
-            volume_slice = slice(0, group["volume_mesh_centers"].shape[0])
+        if self.is_volumetric:
+            if self.volume_sampling_size is not None:
+                volume_slice = self.select_random_sections_from_slice(
+                    0,
+                    group["volume_mesh_centers"].shape[0],
+                    self.volume_sampling_size,
+                )
+            else:
+                volume_slice = slice(0, group["volume_mesh_centers"].shape[0])
 
         # This is a slower basic way to do this, to be improved:
         data = {}
@@ -587,14 +591,17 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]:
 
             # Make sure to select the slice outside of the loop.
             # We need
-            if self.volume_sampling_size is not None:
-                volume_slice = self.select_random_sections_from_slice(
-                    0,
-                    read_futures["volume_mesh_centers"].shape[0],
-                    self.volume_sampling_size,
-                )
-            else:
-                volume_slice = slice(0, read_futures["volume_mesh_centers"].shape[0])
+            if self.is_volumetric:
+                if self.volume_sampling_size is not None:
+                    volume_slice = self.select_random_sections_from_slice(
+                        0,
+                        read_futures["volume_mesh_centers"].shape[0],
+                        self.volume_sampling_size,
+                    )
+                else:
+                    volume_slice = slice(
+                        0, read_futures["volume_mesh_centers"].shape[0]
+                    )
 
             # Trigger an async read of each data item:
             # (Each item will be a numpy ndarray after this:)

From 2ceefc75381004e9319841065465716a7f176aec Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 8 Oct 2025 13:59:21 -0700
Subject: [PATCH 74/98] Make sure differentiable loss tensors are detached
 before transfer to CPU

---
 examples/cfd/external_aerodynamics/domino/src/train.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index eb6331331e..0f08b95cfa 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -204,7 +204,7 @@ def train_epoch(
                 optimizer.zero_grad()
 
             # Gather data and report
-            running_loss += loss.item()
+            running_loss += loss.detach().item()
             elapsed_time = time.perf_counter() - start_time
             io_time = io_end_time - io_start_time
             start_time = time.perf_counter()
@@ -223,7 +223,9 @@ def train_epoch(
             )
             loss_string += (
                 "  "
-                + f"\t".join([f"{l.item():<10.3e}" for l in loss_dict.values()])
+                + f"\t".join(
+                    [f"{l.detach().item():<10.3e}" for l in loss_dict.values()]
+                )
                 + "\n"
             )
 
@@ -237,7 +239,7 @@ def train_epoch(
     last_loss = running_loss / (i_batch + 1)  # loss per batch
     if dist.rank == 0:
         logger.info(
-            f" Device {device},  batch: {i_batch + 1}, loss norm: {loss.item():.5f}"
+            f" Device {device},  batch: {i_batch + 1}, loss norm: {loss.detach().item():.5f}"
         )
         tb_x = epoch_index * len(dataloader) + i_batch + 1
         tb_writer.add_scalar("Loss/train", last_loss, tb_x)

From d955d87797a58f2e6a2c3a9e35caff9bb2fb5684 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 8 Oct 2025 13:59:53 -0700
Subject: [PATCH 75/98] remove printouts.

---
 .../external_aerodynamics/domino/src/benchmark_dataloader.py   | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index e3f24a5fff..339363195a 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -97,9 +97,6 @@ def benchmark_io_epoch(
     gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle)
     start_time = time.perf_counter()
     for i_batch, sample_batched in enumerate(dataloader):
-        # for key in sample_batched.keys():
-        #     print(f"Key {key} shape: {sample_batched[key].shape} with mean {sample_batched[key].mean()} and std {sample_batched[key].std()} ")
-
         # Gather data and report
         elapsed_time = time.perf_counter() - start_time
         start_time = time.perf_counter()

From d05e6531a28ea617e4af166aeb6ed393ffe13018 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Thu, 9 Oct 2025 12:10:47 -0700
Subject: [PATCH 76/98] Increase data reading size, for sub-sampling.

---
 physicsnemo/datapipes/cae/domino_datapipe.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 1a8e7e6164..93e7d15b93 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -742,7 +742,7 @@ def process_data(self, data_dict):
         )
         return_dict["sdf_surf_grid"] = sdf_surf_grid
         return_dict["surf_grid"] = surf_grid
-        
+
         # Store this only if normalization is active:
         if self.config.normalize_coordinates:
             return_dict["surface_min_max"] = torch.stack([s_min, s_max])
@@ -867,7 +867,7 @@ def set_dataset(self, dataset: Iterable) -> None:
 
         if self.config.volume_sample_from_disk:
             # We deliberately double the data to read compared to the sampling size:
-            self.dataset.set_volume_sampling_size(2 * self.config.volume_points_sample)
+            self.dataset.set_volume_sampling_size(10 * self.config.volume_points_sample)
 
     def __len__(self):
         if self.dataset is not None:
@@ -930,7 +930,11 @@ def __iter__(self):
 
 
 def compute_scaling_factors(
-    cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None, max_samples=20,
+    cfg: DictConfig,
+    input_path: str,
+    target_keys: list[str],
+    use_cache=None,
+    max_samples=20,
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     """
     Using the dataset at the path, compute the mean, std, min, and max of the target keys.

From 06ca0850071f18f81f52630588126e327168c543 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 10 Oct 2025 06:46:39 -0700
Subject: [PATCH 77/98] Add more tests to the datapipe for domino

---
 physicsnemo/datapipes/cae/domino_datapipe.py |   2 -
 test/datapipes/test_domino_datapipe.py       | 340 +++++++++++++++----
 2 files changed, 281 insertions(+), 61 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 93e7d15b93..867a14a38d 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -728,8 +728,6 @@ def process_data(self, data_dict):
         else:
             normed_vertices = data_dict["stl_coordinates"]
 
-        return_dict["surf_grid"] = surf_grid
-
         # For SDF calculations, make sure the mesh_indices_flattened is an integer array:
         mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32)
 
diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py
index bb463792ff..4b8b3df0bb 100644
--- a/test/datapipes/test_domino_datapipe.py
+++ b/test/datapipes/test_domino_datapipe.py
@@ -27,7 +27,7 @@
 from pytest_utils import import_or_fail
 from scipy.spatial import ConvexHull
 
-from physicsnemo.datapipes.cae.domino_datapipe2 import (
+from physicsnemo.datapipes.cae.domino_datapipe import (
     CachedDoMINODataset,
     DoMINODataConfig,
     DoMINODataPipe,
@@ -427,14 +427,31 @@ def test_domino_datapipe_coordinate_normalization(
     sample = dataset[0]
     validate_sample_structure(sample, model_type, gpu_output=True)
 
-    v_coords = sample["volume_mesh_centers"]
-    s_coords = sample["surface_mesh_centers"]
+    # Check all the volume coordinates:
+    for volume_key in ["volume_mesh_centers"]:
+        coords = sample[volume_key]
+        check_tensor_normalization(
+            coords, normalize_coordinates, sample_in_bbox, is_surface=False
+        )
+
+    # Check all the surface coordinates:
+    for surface_key in ["surface_mesh_centers", "surface_mesh_neighbors"]:
+        coords = sample[surface_key]
+        if surface_key == "surface_mesh_neighbors":
+            coords = coords.reshape((1, -1, 3))
+        check_tensor_normalization(
+            coords, normalize_coordinates, sample_in_bbox, is_surface=True
+        )
+
+
+def check_tensor_normalization(
+    tensor, normalize_coordinates, sample_in_bbox, is_surface
+):
+    """Check if a tensor is normalized properly."""
 
     # Batch size is 1 here, but in principle this could be a loop:
-    v_min = torch.min(v_coords[0], dim=0).values
-    v_max = torch.max(v_coords[0], dim=0).values
-    s_min = torch.min(s_coords[0], dim=0).values
-    s_max = torch.max(s_coords[0], dim=0).values
+    t_min = torch.min(tensor[0], dim=0).values
+    t_max = torch.max(tensor[0], dim=0).values
 
     # If normalization is enabled, coordinates should be in [-2, 2] range
     if normalize_coordinates:
@@ -443,12 +460,12 @@ def test_domino_datapipe_coordinate_normalization(
             # that were already inside the box should be present.
 
             # That means that all values should be between -1 and 1
-            assert v_min[0] >= -1
-            assert v_min[1] >= -1
-            assert v_min[2] >= -1
-            assert v_max[0] <= 1
-            assert v_max[1] <= 1
-            assert v_max[2] <= 1
+            assert t_min[0] >= -1
+            assert t_min[1] >= -1
+            assert t_min[2] >= -1
+            assert t_max[0] <= 1
+            assert t_max[1] <= 1
+            assert t_max[2] <= 1
 
         else:
             # When normalizing the coordinates, the values of the bbox
@@ -463,56 +480,248 @@ def test_domino_datapipe_coordinate_normalization(
             # So, field_range = (2 - -1) = 3
             # new_val = 2 * (5 - -1)/ 3 - 1 = 3
 
-            vol_x_rescale = 1 / (VOL_BBOX_XMAX - VOL_BBOX_XMIN)
-            vol_y_rescale = 1 / (VOL_BBOX_YMAX - VOL_BBOX_YMIN)
-            vol_z_rescale = 1 / (VOL_BBOX_ZMAX - VOL_BBOX_ZMIN)
-
-            assert v_min[0] >= 2 * (DATA_XMIN - VOL_BBOX_XMIN) * vol_x_rescale - 1
-            assert v_min[1] >= 2 * (DATA_YMIN - VOL_BBOX_YMIN) * vol_y_rescale - 1
-            assert v_min[2] >= 2 * (DATA_ZMIN - VOL_BBOX_ZMIN) * vol_z_rescale - 1
-            assert v_max[0] <= 2 * (DATA_XMAX - VOL_BBOX_XMIN) * vol_x_rescale - 1
-            assert v_max[1] <= 2 * (DATA_YMAX - VOL_BBOX_YMIN) * vol_y_rescale - 1
-            assert v_max[2] <= 2 * (DATA_ZMAX - VOL_BBOX_ZMIN) * vol_z_rescale - 1
-
-            surf_x_rescale = 1 / (SURF_BBOX_XMAX - SURF_BBOX_XMIN)
-            surf_y_rescale = 1 / (SURF_BBOX_YMAX - SURF_BBOX_YMIN)
-            surf_z_rescale = 1 / (SURF_BBOX_ZMAX - SURF_BBOX_ZMIN)
-
-            assert s_min[0] >= 2 * (DATA_XMIN - SURF_BBOX_XMIN) * surf_x_rescale - 1
-            assert s_min[1] >= 2 * (DATA_YMIN - SURF_BBOX_YMIN) * surf_y_rescale - 1
-            assert s_min[2] >= 2 * (DATA_ZMIN - SURF_BBOX_ZMIN) * surf_z_rescale - 1
-            assert s_max[0] <= 2 * (DATA_XMAX - SURF_BBOX_XMIN) * surf_x_rescale - 1
-            assert s_max[1] <= 2 * (DATA_YMAX - SURF_BBOX_YMIN) * surf_y_rescale - 1
-            assert s_max[2] <= 2 * (DATA_ZMAX - SURF_BBOX_ZMIN) * surf_z_rescale - 1
+            if is_surface:
+                x_rescale = 1 / (SURF_BBOX_XMAX - SURF_BBOX_XMIN)
+                y_rescale = 1 / (SURF_BBOX_YMAX - SURF_BBOX_YMIN)
+                z_rescale = 1 / (SURF_BBOX_ZMAX - SURF_BBOX_ZMIN)
+                target_min_x = 2 * (DATA_XMIN - SURF_BBOX_XMIN) * x_rescale - 1
+                target_min_y = 2 * (DATA_YMIN - SURF_BBOX_YMIN) * y_rescale - 1
+                target_min_z = 2 * (DATA_ZMIN - SURF_BBOX_ZMIN) * z_rescale - 1
+                target_max_x = 2 * (DATA_XMAX - SURF_BBOX_XMIN) * x_rescale - 1
+                target_max_y = 2 * (DATA_YMAX - SURF_BBOX_YMIN) * y_rescale - 1
+                target_max_z = 2 * (DATA_ZMAX - SURF_BBOX_ZMIN) * z_rescale - 1
+            else:
+                x_rescale = 1 / (VOL_BBOX_XMAX - VOL_BBOX_XMIN)
+                y_rescale = 1 / (VOL_BBOX_YMAX - VOL_BBOX_YMIN)
+                z_rescale = 1 / (VOL_BBOX_ZMAX - VOL_BBOX_ZMIN)
+                target_min_x = 2 * (DATA_XMIN - VOL_BBOX_XMIN) * x_rescale - 1
+                target_min_y = 2 * (DATA_YMIN - VOL_BBOX_YMIN) * y_rescale - 1
+                target_min_z = 2 * (DATA_ZMIN - VOL_BBOX_ZMIN) * z_rescale - 1
+                target_max_x = 2 * (DATA_XMAX - VOL_BBOX_XMIN) * x_rescale - 1
+                target_max_y = 2 * (DATA_YMAX - VOL_BBOX_YMIN) * y_rescale - 1
+                target_max_z = 2 * (DATA_ZMAX - VOL_BBOX_ZMIN) * z_rescale - 1
+
+            assert t_min[0] >= target_min_x
+            assert t_min[1] >= target_min_y
+            assert t_min[2] >= target_min_z
+            assert t_max[0] <= target_max_x
+            assert t_max[1] <= target_max_y
+            assert t_max[2] <= target_max_z
 
     else:
         if sample_in_bbox:
             # We've sampled in the bbox but NOT normalized.
             # So, the values should exclusively be in the BBOX ranges:
-            assert v_min[0] >= VOL_BBOX_XMIN
-            assert v_min[1] >= VOL_BBOX_YMIN
-            assert v_min[2] >= VOL_BBOX_ZMIN
-            assert v_max[0] <= VOL_BBOX_XMAX
-            assert v_max[1] <= VOL_BBOX_YMAX
-            assert v_max[2] <= VOL_BBOX_ZMAX
-
-            assert s_min[0] >= SURF_BBOX_XMIN
-            assert s_min[1] >= SURF_BBOX_YMIN
-            assert s_min[2] >= SURF_BBOX_ZMIN
-            assert s_max[0] <= SURF_BBOX_XMAX
-            assert s_max[1] <= SURF_BBOX_YMAX
-            assert s_max[2] <= SURF_BBOX_ZMAX
+
+            if is_surface:
+                assert t_min[0] >= SURF_BBOX_XMIN
+                assert t_min[1] >= SURF_BBOX_YMIN
+                assert t_min[2] >= SURF_BBOX_ZMIN
+                assert t_max[0] <= SURF_BBOX_XMAX
+                assert t_max[1] <= SURF_BBOX_YMAX
+                assert t_max[2] <= SURF_BBOX_ZMAX
+            else:
+                assert t_min[0] >= VOL_BBOX_XMIN
+                assert t_min[1] >= VOL_BBOX_YMIN
+                assert t_min[2] >= VOL_BBOX_ZMIN
+                assert t_max[0] <= VOL_BBOX_XMAX
+                assert t_max[1] <= VOL_BBOX_YMAX
+                assert t_max[2] <= VOL_BBOX_ZMAX
 
         else:
             # Not sampling, and also
             # Not normalizing, values should be in data range only:
-            assert v_min[0] >= DATA_XMIN and v_max[0] <= DATA_XMAX
-            assert v_min[1] >= DATA_YMIN and v_max[1] <= DATA_YMAX
-            assert v_min[2] >= DATA_ZMIN and v_max[2] <= DATA_ZMAX
-            assert s_min[0] >= DATA_XMIN and s_max[0] <= DATA_XMAX
-            assert s_min[1] >= DATA_YMIN and s_max[1] <= DATA_YMAX
-            # Surface points always should be > 0
-            assert s_min[2] >= 0 and s_max[2] <= DATA_ZMAX
+            assert t_min[0] >= DATA_XMIN and t_max[0] <= DATA_XMAX
+            assert t_min[1] >= DATA_YMIN and t_max[1] <= DATA_YMAX
+
+            if is_surface:
+                # Surface points always should be > 0
+                assert t_min[2] >= 0 and t_max[2] <= DATA_ZMAX
+            else:
+                assert t_min[2] >= DATA_ZMIN and t_max[2] <= DATA_ZMAX
+
+    return True
+
+
+@pytest.mark.parametrize("model_type", ["surface"])
+@pytest.mark.parametrize("normalize_coordinates", [True, False])
+@pytest.mark.parametrize("sample_in_bbox", [True, False])
+def test_domino_datapipe_surface_normalization(
+    zarr_dataset, pytestconfig, model_type, normalize_coordinates, sample_in_bbox
+):
+    """Test normalization functionality.
+
+    This test is meant to make sure all the peripheral outputs are
+    normalized properly. FOcus on surface here.
+
+    We could do them all in one test but it gets unweildy, and if there
+    are failures it helps nail down exactly where.
+    """
+    cuda = torch.cuda.is_available()
+
+    dataset = create_basic_dataset(
+        zarr_dataset,
+        model_type,
+        gpu_preprocessing=cuda,
+        gpu_output=cuda,
+        normalize_coordinates=normalize_coordinates,
+        sampling=True,
+        sample_in_bbox=sample_in_bbox,
+    )
+
+    # Here's a list of values to check, and the behavior we expect:
+
+    # surf_grid - normalized by s_min, s_max
+    sample = dataset[0]
+    surf_grid = sample["surf_grid"]
+
+    # If normalizing, surf_grid should be between -1 and 1.
+    # Otherwise, should be between s_min and s_max
+    if not normalize_coordinates:
+        target_min = torch.tensor([SURF_BBOX_XMIN, SURF_BBOX_YMIN, SURF_BBOX_ZMIN])
+        target_max = torch.tensor([SURF_BBOX_XMAX, SURF_BBOX_YMAX, SURF_BBOX_ZMAX])
+    else:
+        target_min = torch.tensor([-1.0, -1.0, -1.0])
+        target_max = torch.tensor([1.0, 1.0, 1.0])
+
+    target_min = target_min.to(surf_grid.device)
+    target_max = target_max.to(surf_grid.device)
+
+    # Flatten all the grid coords:
+    surf_grid = surf_grid.reshape((-1, 3))
+
+    assert torch.all(surf_grid >= target_min)
+    assert torch.all(surf_grid <= target_max)
+
+    # sdf_surf_grid - should have max values less than || s_max - s_min||
+
+    max_norm_allowed = torch.norm(target_max - target_min)
+
+    sdf_surf_grid = sample["sdf_surf_grid"]
+    assert torch.all(sdf_surf_grid <= max_norm_allowed)
+    # (Negative values are ok but we don't really check that.)
+
+    # surface_min_max should only be in the dict if normaliztion is on:
+    if normalize_coordinates:
+        assert "surface_min_max" in sample
+        s_mm = sample["surface_min_max"]
+        assert s_mm.shape == (1, 2, 3)
+
+        assert torch.allclose(
+            s_mm[0, 0],
+            torch.tensor([SURF_BBOX_XMIN, SURF_BBOX_YMIN, SURF_BBOX_ZMIN]).to(
+                s_mm.device
+            ),
+        )
+        assert torch.allclose(
+            s_mm[0, 1],
+            torch.tensor([SURF_BBOX_XMAX, SURF_BBOX_YMAX, SURF_BBOX_ZMAX]).to(
+                s_mm.device
+            ),
+        )
+
+    else:
+        assert "surface_min_max" not in sample
+
+    # For the rest of the values, checks are straightforward:
+
+    assert torch.all(sample["surface_areas"] > 0)
+    assert torch.all(sample["surface_neighbors_areas"] > 0)
+
+    # No checks implemented on the following, yet:
+    # - pos_surface_center_of_mass
+
+
+@pytest.mark.parametrize("model_type", ["volume"])
+@pytest.mark.parametrize("normalize_coordinates", [True, False])
+@pytest.mark.parametrize("sample_in_bbox", [True, False])
+def test_domino_datapipe_volume_normalization(
+    zarr_dataset, pytestconfig, model_type, normalize_coordinates, sample_in_bbox
+):
+    """Test normalization functionality.
+
+    This test is meant to make sure all the peripheral outputs are
+    normalized properly. FOcus on volume here.
+
+    We could do them all in one test but it gets unweildy, and if there
+    are failures it helps nail down exactly where.
+    """
+    cuda = torch.cuda.is_available()
+
+    dataset = create_basic_dataset(
+        zarr_dataset,
+        model_type,
+        gpu_preprocessing=cuda,
+        gpu_output=cuda,
+        normalize_coordinates=normalize_coordinates,
+        sampling=True,
+        sample_in_bbox=sample_in_bbox,
+    )
+
+    # Here's a list of values to check, and the behavior we expect:
+
+    # grid - normalized by s_min, s_max
+    sample = dataset[0]
+    grid = sample["grid"]
+
+    # If normalizing, surf_grid should be between -1 and 1.
+    # Otherwise, should be between s_min and s_max
+    if not normalize_coordinates:
+        target_min = torch.tensor([VOL_BBOX_XMIN, VOL_BBOX_YMIN, VOL_BBOX_ZMIN])
+        target_max = torch.tensor([VOL_BBOX_XMAX, VOL_BBOX_YMAX, VOL_BBOX_ZMAX])
+    else:
+        target_min = torch.tensor([-1.0, -1.0, -1.0])
+        target_max = torch.tensor([1.0, 1.0, 1.0])
+
+    target_min = target_min.to(grid.device)
+    target_max = target_max.to(grid.device)
+
+    # Flatten all the grid coords:
+    grid = grid.reshape((-1, 3))
+
+    assert torch.all(grid >= target_min)
+    assert torch.all(grid <= target_max)
+
+    # sdf_grid - should have max values less than || s_max - s_min||
+
+    max_norm_allowed = torch.norm(target_max - target_min)
+
+    sdf_grid = sample["sdf_grid"]
+    assert torch.all(sdf_grid <= max_norm_allowed)
+    # (Negative values are ok but we don't really check that.)
+
+    # surface_min_max should only be in the dict if normaliztion is on:
+    if normalize_coordinates:
+        assert "volume_min_max" in sample
+        s_mm = sample["volume_min_max"]
+        assert s_mm.shape == (1, 2, 3)
+
+        assert torch.allclose(
+            s_mm[0, 0],
+            torch.tensor([VOL_BBOX_XMIN, VOL_BBOX_YMIN, VOL_BBOX_ZMIN]).to(s_mm.device),
+        )
+        assert torch.allclose(
+            s_mm[0, 1],
+            torch.tensor([VOL_BBOX_XMAX, VOL_BBOX_YMAX, VOL_BBOX_ZMAX]).to(s_mm.device),
+        )
+
+    else:
+        assert "volume_min_max" not in sample
+
+    sdf_nodes = sample["sdf_nodes"]
+    pos_volume_closest_norm = torch.norm(sample["pos_volume_closest"], dim=-1).reshape(
+        sdf_nodes.shape
+    )
+    assert torch.allclose(pos_volume_closest_norm, sdf_nodes)
+    # No checks implemented on the following, yet:
+    # - pos_volume_center_of_mass
+
+    # The center of mass should be inside the mesh.  So, the displacement
+    # from the center of mass should be exclusively larger than the sdf:
+    pos_volume_center_of_mass_norm = torch.norm(
+        sample["pos_volume_center_of_mass"], dim=-1
+    ).reshape(sdf_nodes.shape)
+    assert torch.all(pos_volume_center_of_mass_norm > sdf_nodes)
 
 
 @import_or_fail(["warp", "cupy", "cuml"])
@@ -575,18 +784,30 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf
 
 
 @import_or_fail(["warp", "cupy", "cuml"])
-@pytest.mark.parametrize("model_type", ["volume"])
+@pytest.mark.parametrize("model_type", ["volume", "surface", "combined"])
 @pytest.mark.parametrize("scaling_type", [None, "min_max_scaling", "mean_std_scaling"])
 def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestconfig):
     """Test field scaling functionality."""
     use_cuda = torch.cuda.is_available()
 
-    if scaling_type == "min_max_scaling":
-        volume_factors = [10.0, -10.0]  # [max, min]
-    elif scaling_type == "mean_std_scaling":
-        volume_factors = [0.0, 1.0]  # [mean, std]
+    if model_type in ["volume", "combined"]:
+        volume_factors = torch.tensor(
+            [
+                [10.0, -10.0, 10.0, 10.0, 10.0],
+                [10.0, -10.0, 10.0, 10.0, 10.0],
+            ]
+        )
     else:
         volume_factors = None
+    if model_type in ["surface", "combined"]:
+        surface_factors = torch.tensor(
+            [
+                [10.0, -10.0, 10.0, 10.0],
+                [10.0, -10.0, 10.0, 10.0],
+            ]
+        )
+    else:
+        surface_factors = None
 
     dataset = create_basic_dataset(
         zarr_dataset,
@@ -595,6 +816,7 @@ def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestc
         gpu_output=use_cuda,
         scaling_type=scaling_type,
         volume_factors=volume_factors,
+        surface_factors=surface_factors,
     )
 
     sample = dataset[0]

From 8a91a18a4f2fe513f8564ae70a40c1564ce26b92 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 10 Oct 2025 06:54:08 -0700
Subject: [PATCH 78/98] Rename DrivaerMLDataset to CAE Dataset.

---
 .../cae/{drivaer_ml_dataset.py => cae_dataset.py}    | 12 ++++++------
 physicsnemo/datapipes/cae/domino_datapipe.py         | 10 +++++-----
 test/datapipes/test_domino_datapipe.py               |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)
 rename physicsnemo/datapipes/cae/{drivaer_ml_dataset.py => cae_dataset.py} (99%)

diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py
similarity index 99%
rename from physicsnemo/datapipes/cae/drivaer_ml_dataset.py
rename to physicsnemo/datapipes/cae/cae_dataset.py
index 6d46a40879..4b3dd0bfde 100644
--- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py
+++ b/physicsnemo/datapipes/cae/cae_dataset.py
@@ -734,7 +734,7 @@ def is_vtk_directory(file: pathlib.Path) -> bool:
     )
 
 
-class DrivaerMLDataset:
+class CAEDataset:
     """
     Dataset reader for DrivaerML and similar datasets.  In general, this
     dataset supports reading dictionary-like data, and returning a
@@ -1118,7 +1118,7 @@ def set_volume_sampling_size(self, volume_sampling_size: int):
 
 
 def compute_mean_std_min_max(
-    dataset: DrivaerMLDataset, field_keys: list[str], max_samples: int = 20
+    dataset: CAEDataset, field_keys: list[str], max_samples: int = 20
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     """
     Compute the mean, standard deviation, minimum, and maximum for a specified field
@@ -1127,7 +1127,7 @@ def compute_mean_std_min_max(
     Uses a numerically stable online algorithm for mean and variance.
 
     Args:
-        dataset (DrivaerMLDataset): The dataset to process.
+        dataset (CAEDataset): The dataset to process.
         field_key (str): The key for the field to normalize.
 
     Returns:
@@ -1184,11 +1184,11 @@ def compute_mean_std_min_max(
             # Update min/max
             batch_min = field_data.amin(dim=(0))
             batch_max = field_data.amax(dim=(0))
-            
+
             min_val[field_key] = torch.minimum(min_val[field_key], batch_min)
 
-            max_val[field_key] = torch.maximum(max_val[field_key], batch_max)   
-                     
+            max_val[field_key] = torch.maximum(max_val[field_key], batch_max)
+
             # Update running mean and M2 (Welford's algorithm)
             delta = batch_mean - mean[field_key]
             N[field_key] += batch_n  # batch_n should also be torch.int64
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 867a14a38d..46f5848dc1 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -37,8 +37,8 @@
 from torch.distributed.tensor.placement_types import Replicate
 from torch.utils.data import Dataset
 
-from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
-    DrivaerMLDataset,
+from physicsnemo.datapipes.cae.cae_dataset import (
+    CAEDataset,
     compute_mean_std_min_max,
 )
 from physicsnemo.distributed import DistributedManager
@@ -229,7 +229,7 @@ def __init__(
         self.preproc_device = (
             dist.device if self.config.gpu_preprocessing else torch.device("cpu")
         )
-        # The drivaer_ml_dataset will automatically target this device
+        # The cae_dataset will automatically target this device
         # In an async transfer.
         self.output_device = (
             dist.device if self.config.gpu_output else torch.device("cpu")
@@ -946,7 +946,7 @@ def compute_scaling_factors(
 
     device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
-    dataset = DrivaerMLDataset(
+    dataset = CAEDataset(
         data_dir=input_path,
         keys_to_read=target_keys,
         keys_to_read_if_available={},
@@ -1182,7 +1182,7 @@ def create_domino_dataset(
             preload_depth = 1
             pin_memory = False
 
-        dataset = DrivaerMLDataset(
+        dataset = CAEDataset(
             data_dir=input_path,
             keys_to_read=keys_to_read,
             keys_to_read_if_available=keys_to_read_if_available,
diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py
index 4b8b3df0bb..a4f7159c1f 100644
--- a/test/datapipes/test_domino_datapipe.py
+++ b/test/datapipes/test_domino_datapipe.py
@@ -27,12 +27,12 @@
 from pytest_utils import import_or_fail
 from scipy.spatial import ConvexHull
 
+from physicsnemo.datapipes.cae.cae_dataset import CAEDataset
 from physicsnemo.datapipes.cae.domino_datapipe import (
     CachedDoMINODataset,
     DoMINODataConfig,
     DoMINODataPipe,
 )
-from physicsnemo.datapipes.cae.drivaer_ml_dataset import DrivaerMLDataset
 
 Tensor = torch.Tensor
 
@@ -293,7 +293,7 @@ def create_basic_dataset(
         "global_params_reference": torch.tensor([1.225, 10.0]),
     }
 
-    dataset = DrivaerMLDataset(
+    dataset = CAEDataset(
         data_dir=input_path,
         keys_to_read=keys_to_read,
         keys_to_read_if_available=keys_to_read_if_available,

From e151de0016fb6d0bcdc53ec704feaa7d9350132f Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 10 Oct 2025 07:40:08 -0700
Subject: [PATCH 79/98] Add metrics to printouts and tbfile.

---
 .../src/{ => deprecated}/train_sharded.py     |   0
 .../external_aerodynamics/domino/src/train.py |  80 +++++++++++-
 .../external_aerodynamics/domino/src/utils.py | 122 ++++++++++++++++++
 3 files changed, 195 insertions(+), 7 deletions(-)
 rename examples/cfd/external_aerodynamics/domino/src/{ => deprecated}/train_sharded.py (100%)

diff --git a/examples/cfd/external_aerodynamics/domino/src/train_sharded.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/train_sharded.py
similarity index 100%
rename from examples/cfd/external_aerodynamics/domino/src/train_sharded.py
rename to examples/cfd/external_aerodynamics/domino/src/deprecated/train_sharded.py
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 0f08b95cfa..eb8b8ba63d 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -31,6 +31,7 @@
 import os
 import re
 from typing import Literal, Any
+from tabulate import tabulate
 
 import apex
 import numpy as np
@@ -78,7 +79,7 @@
 
 
 from loss import compute_loss_dict
-from utils import get_num_vars, load_scaling_factors
+from utils import get_num_vars, load_scaling_factors, compute_l2, all_reduce_dict
 
 
 def validation_step(
@@ -86,6 +87,8 @@ def validation_step(
     model,
     device,
     logger,
+    tb_writer,
+    epoch_index,
     use_sdf_basis=False,
     use_surface_normals=False,
     integral_scaling_factor=1.0,
@@ -98,8 +101,11 @@ def validation_step(
     vol_factors: torch.Tensor | None = None,
     add_physics_loss=False,
 ):
+    dm = DistributedManager()
     running_vloss = 0.0
     with torch.no_grad():
+        metrics = None
+
         for i_batch, sample_batched in enumerate(dataloader):
             sampled_batched = dict_to_device(sample_batched, device)
 
@@ -127,8 +133,37 @@ def validation_step(
                 )
 
             running_vloss += loss.item()
+            local_metrics = compute_l2(
+                prediction_surf, prediction_vol, sampled_batched, dataloader
+            )
+            if metrics is None:
+                metrics = local_metrics
+            else:
+                metrics = {
+                    key: metrics[key] + local_metrics[key] for key in metrics.keys()
+                }
 
     avg_vloss = running_vloss / (i_batch + 1)
+    metrics = {key: metrics[key] / (i_batch + 1) for key in metrics.keys()}
+
+    metrics = all_reduce_dict(metrics, dm)
+
+    if dm.rank == 0:
+        logger.info(
+            f" Device {device},  batch: {i_batch + 1}, VAL loss norm: {loss.detach().item():.5f}"
+        )
+        tb_x = epoch_index
+        for key in metrics.keys():
+            tb_writer.add_scalar(f"L2 Metrics/val/{key}", metrics[key], tb_x)
+
+        metrics_table = tabulate(
+            [[k, v] for k, v in metrics.items()],
+            headers=["Metric", "Average Value"],
+            tablefmt="pretty",
+        )
+        logger.info(
+            f"\nEpoch {epoch_index} VALIDATION Average Metrics:\n{metrics_table}\n"
+        )
 
     return avg_vloss
 
@@ -155,7 +190,7 @@ def train_epoch(
     surf_factors: torch.Tensor | None = None,
     add_physics_loss=False,
 ):
-    dist = DistributedManager()
+    dm = DistributedManager()
 
     running_loss = 0.0
     last_loss = 0.0
@@ -165,6 +200,7 @@ def train_epoch(
     start_time = time.perf_counter()
     with Profiler():
         io_start_time = time.perf_counter()
+        metrics = None
         for i_batch, sampled_batched in enumerate(dataloader):
             io_end_time = time.perf_counter()
             if add_physics_loss:
@@ -195,6 +231,22 @@ def train_epoch(
                     add_physics_loss,
                 )
 
+                # Compute metrics:
+                if isinstance(prediction_vol, tuple):
+                    # This is if return_neighbors is on for volume:
+                    prediction_vol = prediction_vol[0]
+
+                local_metrics = compute_l2(
+                    prediction_surf, prediction_vol, sampled_batched, dataloader
+                )
+                if metrics is None:
+                    metrics = local_metrics
+                else:
+                    # Sum the running total:
+                    metrics = {
+                        key: metrics[key] + local_metrics[key] for key in metrics.keys()
+                    }
+
             loss = loss / loss_interval
             scaler.scale(loss).backward()
 
@@ -237,12 +289,25 @@ def train_epoch(
             io_start_time = time.perf_counter()
 
     last_loss = running_loss / (i_batch + 1)  # loss per batch
-    if dist.rank == 0:
+    # Normalize metrics:
+    metrics = {key: metrics[key] / (i_batch + 1) for key in metrics.keys()}
+    # reduce metrics across batch:
+    metrics = all_reduce_dict(metrics, dm)
+    if dm.rank == 0:
         logger.info(
             f" Device {device},  batch: {i_batch + 1}, loss norm: {loss.detach().item():.5f}"
         )
         tb_x = epoch_index * len(dataloader) + i_batch + 1
         tb_writer.add_scalar("Loss/train", last_loss, tb_x)
+        for key in metrics.keys():
+            tb_writer.add_scalar(f"L2 Metrics/train/{key}", metrics[key], epoch_index)
+
+        metrics_table = tabulate(
+            [[k, v] for k, v in metrics.items()],
+            headers=["Metric", "Average Value"],
+            tablefmt="pretty",
+        )
+        logger.info(f"\nEpoch {epoch_index} Average Metrics:\n{metrics_table}\n")
 
     return last_loss
 
@@ -278,10 +343,9 @@ def main(cfg: DictConfig) -> None:
     # Get scaling factors - precompute them if this fails!
     ######################################################
     vol_factors, surf_factors = load_scaling_factors(cfg)
-    
-    vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]])
-    surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]])
-    vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device)
+
+    # vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]])
+    # surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]])
 
     ######################################################
     # Configure the model
@@ -538,6 +602,8 @@ def main(cfg: DictConfig) -> None:
             model=model,
             device=dist.device,
             logger=logger,
+            tb_writer=writer,
+            epoch_index=epoch,
             use_sdf_basis=cfg.model.use_sdf_in_basis_func,
             use_surface_normals=cfg.model.use_surface_normals,
             integral_scaling_factor=initial_integral_factor,
diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py
index 4cc04d0d3c..9b742677b2 100644
--- a/examples/cfd/external_aerodynamics/domino/src/utils.py
+++ b/examples/cfd/external_aerodynamics/domino/src/utils.py
@@ -20,6 +20,7 @@
 from typing import Dict, Optional, Any
 import numpy as np
 import torch
+import torch.distributed as dist
 import pickle
 from pathlib import Path
 from typing import Literal
@@ -344,3 +345,124 @@ def load_scaling_factors(
     surf_factors_tensor = surf_factors_tensor.to(dm.device, dtype=torch.float32)
 
     return vol_factors_tensor, surf_factors_tensor
+
+
+def compute_l2(
+    pred_surface: torch.Tensor | None,
+    pred_volume: torch.Tensor | None,
+    batch,
+    dataloader,
+) -> dict[str, torch.Tensor]:
+    """
+    Compute the L2 norm between prediction and target.
+
+    Requires the dataloader to unscale back to original values
+    """
+
+    l2_dict = {}
+
+    if pred_surface is not None:
+        _, target_surface = dataloader.unscale_model_outputs(
+            surface_fields=batch["surface_fields"]
+        )
+        _, pred_surface = dataloader.unscale_model_outputs(surface_fields=pred_surface)
+        l2_surface = metrics_fn_surface(pred_surface, target_surface)
+        l2_dict.update(l2_surface)
+    if pred_volume is not None:
+        target_volume, _ = dataloader.unscale_model_outputs(
+            volume_fields=batch["volume_fields"]
+        )
+        pred_volume, _ = dataloader.unscale_model_outputs(volume_fields=pred_volume)
+        l2_volume = metrics_fn_volume(pred_volume, target_volume)
+        l2_dict.update(l2_volume)
+
+    return l2_dict
+
+
+def metrics_fn_surface(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+) -> dict[str, torch.Tensor]:
+    """
+    Computes L2 surface metrics between prediction and target.
+
+    Args:
+        pred: Predicted values (normalized).
+        target: Target values (normalized).
+
+    Returns:
+        Dictionary of L2 surface metrics for pressure and shear components.
+    """
+
+    l2_num = (pred - target) ** 2
+    l2_num = torch.sum(l2_num, dim=1)
+    l2_num = torch.sqrt(l2_num)
+
+    l2_denom = target**2
+    l2_denom = torch.sum(l2_denom, dim=1)
+    l2_denom = torch.sqrt(l2_denom)
+
+    l2 = l2_num / l2_denom
+
+    metrics = {
+        "l2_surf_pressure": torch.mean(l2[:, 0]),
+        "l2_shear_x": torch.mean(l2[:, 1]),
+        "l2_shear_y": torch.mean(l2[:, 2]),
+        "l2_shear_z": torch.mean(l2[:, 3]),
+    }
+
+    return metrics
+
+
+def metrics_fn_volume(
+    pred: torch.Tensor,
+    target: torch.Tensor,
+) -> dict[str, torch.Tensor]:
+    """
+    Computes L2 volume metrics between prediction and target.
+    """
+    l2_num = (pred - target) ** 2
+    l2_num = torch.sum(l2_num, dim=1)
+    l2_num = torch.sqrt(l2_num)
+
+    l2_denom = target**2
+    l2_denom = torch.sum(l2_denom, dim=1)
+    l2_denom = torch.sqrt(l2_denom)
+
+    l2 = l2_num / l2_denom
+
+    metrics = {
+        "l2_vol_pressure": torch.mean(l2[:, 0]),
+        "l2_velocity_x": torch.mean(l2[:, 1]),
+        "l2_velocity_y": torch.mean(l2[:, 2]),
+        "l2_velocity_z": torch.mean(l2[:, 3]),
+        "l2_nut": torch.mean(l2[:, 4]),
+    }
+
+    return metrics
+
+
+def all_reduce_dict(
+    metrics: dict[str, torch.Tensor], dm: DistributedManager
+) -> dict[str, torch.Tensor]:
+    """
+    Reduces a dictionary of metrics across all distributed processes.
+
+    Args:
+        metrics: Dictionary of metric names to torch.Tensor values.
+        dm: DistributedManager instance for distributed context.
+
+    Returns:
+        Dictionary of reduced metrics.
+    """
+    # TODO - update this to use domains and not the full world
+
+    if dm.world_size == 1:
+        return metrics
+
+    for key, value in metrics.items():
+        dist.all_reduce(value)
+        value = value / dm.world_size
+        metrics[key] = value
+
+    return metrics

From 6b2e8d9dea2a877b249b8dd6ff65d614057d94c3 Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Thu, 9 Oct 2025 18:10:41 -0700
Subject: [PATCH 80/98] cleaning up test and datapipe

---
 .../domino/src/conf/config.yaml               |  4 +-
 .../external_aerodynamics/domino/src/test.py  | 43 ++-------
 physicsnemo/datapipes/cae/domino_datapipe.py  | 96 +++++++++----------
 3 files changed, 56 insertions(+), 87 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index 13c00823b8..b5a3ebefbc 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -82,10 +82,10 @@ data: # Input directory for training and validation data
   gpu_preprocessing: true
   gpu_output: true
   normalize_coordinates: true
-  sample_in_bbox: True
+  sample_in_bbox: true
   sampling: true
   scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl
-  volume_sample_from_disk: true
+  volume_sample_from_disk: false
   max_samples_for_statistics: 200
 
 # ┌───────────────────────────────────────────┐
diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py
index 93fd16c25c..dc6b8ad27e 100644
--- a/examples/cfd/external_aerodynamics/domino/src/test.py
+++ b/examples/cfd/external_aerodynamics/domino/src/test.py
@@ -445,17 +445,8 @@ def main(cfg: DictConfig):
         # Center of mass calculation
         center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes)
 
-        if cfg.data.bounding_box_surface is None:
-            s_max = np.amax(stl_vertices, 0)
-            s_min = np.amin(stl_vertices, 0)
-        else:
-            bounding_box_dims_surf = []
-            bounding_box_dims_surf.append(np.asarray(cfg.data.bounding_box_surface.max))
-            bounding_box_dims_surf.append(np.asarray(cfg.data.bounding_box_surface.min))
-            s_max = np.float32(bounding_box_dims_surf[0])
-            s_min = np.float32(bounding_box_dims_surf[1])
-            s_max = torch.from_numpy(s_max).to(torch.float32).to(dist.device)
-            s_min = torch.from_numpy(s_min).to(torch.float32).to(dist.device)
+        s_max = torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.max)).to(torch.float32).to(dist.device)
+        s_min = torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.min)).to(torch.float32).to(dist.device)
 
         nx, ny, nz = cfg.model.interp_res
 
@@ -608,19 +599,8 @@ def main(cfg: DictConfig):
             volume_coordinates = torch.from_numpy(volume_coordinates).to(torch.float32).to(dist.device)
             volume_fields = torch.from_numpy(volume_fields).to(torch.float32).to(dist.device)
 
-            bounding_box_dims = []
-            bounding_box_dims.append(np.asarray(cfg.data.bounding_box.max))
-            bounding_box_dims.append(np.asarray(cfg.data.bounding_box.min))
-
-            if bounding_box_dims is None:
-                c_max = s_max + (s_max - s_min) / 2
-                c_min = s_min - (s_max - s_min) / 2
-                c_min[2] = s_min[2]
-            else:
-                c_max = np.float32(bounding_box_dims[0])
-                c_min = np.float32(bounding_box_dims[1])
-                c_max = torch.from_numpy(c_max).to(dist.device)
-                c_min = torch.from_numpy(c_min).to(dist.device)
+            c_max = torch.from_numpy(np.asarray(cfg.data.bounding_box.max)).to(torch.float32).to(dist.device)
+            c_min = torch.from_numpy(np.asarray(cfg.data.bounding_box.min)).to(torch.float32).to(dist.device)
 
             # Generate a grid of specified resolution to map the bounding box
             # The grid is used for capturing structured geometry features and SDF representation of geometry
@@ -655,11 +635,6 @@ def main(cfg: DictConfig):
             sdf_nodes = sdf_nodes.reshape(-1, 1)
             vol_grid_max_min = torch.stack([c_min, c_max])
 
-            if cfg.data.normalize_coordinates:
-                sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min)
-            else:
-                sdf_node_closest_point = sdf_node_closest_point
-
             pos_volume_closest = volume_coordinates - sdf_node_closest_point
             pos_volume_center_of_mass = volume_coordinates - center_of_mass_normalized
             
@@ -786,9 +761,9 @@ def main(cfg: DictConfig):
                 surface_fields[:, 0] * surface_normals[:, 2] * surface_sizes[:, 0]
                 - surface_fields[:, 3] * surface_sizes[:, 0]
             )
-            print("Drag=", dirname, force_x_pred, force_x_true)
-            print("Lift=", dirname, force_z_pred, force_z_true)
-            print("Side=", dirname, force_y_pred, force_y_true)
+            print("Drag=", dirname, force_x_pred.cpu().numpy(), force_x_true.cpu().numpy())
+            print("Lift=", dirname, force_z_pred.cpu().numpy(), force_z_true.cpu().numpy())
+            print("Side=", dirname, force_y_pred.cpu().numpy(), force_y_true.cpu().numpy())
             aero_forces_all.append(
                 [
                     dirname,
@@ -808,7 +783,7 @@ def main(cfg: DictConfig):
             print(
                 "Surface L-2 norm:",
                 dirname,
-                torch.sqrt(l2_error) / torch.sqrt(l2_gt),
+                np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()),
             )
 
         if prediction_vol is not None:
@@ -832,7 +807,7 @@ def main(cfg: DictConfig):
             print(
                 "Volume L-2 norm:",
                 dirname,
-                torch.sqrt(l2_error) / torch.sqrt(l2_gt),
+                np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()),
             )
             l2_volume_all.append(torch.sqrt(l2_error) / torch.sqrt(l2_gt))
 
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 46f5848dc1..7839ce4de3 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -304,15 +304,12 @@ def __init__(
 
         self.dataset = None
 
-    def compute_stl_scaling_and_surface_grids(
-        self,
-        stl_vertices: torch.Tensor,
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    def compute_stl_scaling_and_surface_grids(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """
         Compute the min and max for the defining mesh.
 
         If the user supplies a bounding box, we use that.  Otherwise,
-        it's created dynamically from the min/max of the stl vertices.
+        it raises an error.
 
         The returned min/max and grid are used for surface data.
         """
@@ -324,23 +321,17 @@ def compute_stl_scaling_and_surface_grids(
             s_min = self.config.bounding_box_dims_surf[1]
             surf_grid = self.default_surface_grid
         else:
-            # Create the grid dynamically
-            s_min = torch.amin(stl_vertices, 0)
-            s_max = torch.amax(stl_vertices, 0)
-            surf_grid = create_grid(s_max, s_min, self.config.grid_resolution)
+            raise ValueError("Bounding box dimensions are not set in config")
 
         return s_min, s_max, surf_grid
 
-    def compute_volume_scaling_and_grids(
-        self, s_min: torch.Tensor, s_max: torch.Tensor
-    ):
+    def compute_volume_scaling_and_grids(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """
         Compute the min and max and grid for volume data.
 
         If the user supplies a bounding box, we use that.  Otherwise,
-        it's created dynamically from the surface min/max.
+        it raises an error.
 
-        This will be 2x longer in x and y and the same in z as the surface bounding box.
         """
 
         # Determine the volume min / max locations
@@ -348,13 +339,8 @@ def compute_volume_scaling_and_grids(
             c_max = self.config.bounding_box_dims[0]
             c_min = self.config.bounding_box_dims[1]
             volume_grid = self.default_volume_grid
-
         else:
-            # Create the grid based on the surface grid
-            c_max = s_max + (s_max - s_min) / 2
-            c_min = s_min - (s_max - s_min) / 2
-            c_min[2] = s_min[2]
-            volume_grid = create_grid(c_max, c_min, self.config.grid_resolution)
+            raise ValueError("Bounding box dimensions are not set in config")
 
         return c_min, c_max, volume_grid
 
@@ -377,9 +363,7 @@ def downsample_geometry(
                 stl_vertices, geometry_points
             )
             if geometry_coordinates_sampled.shape[0] < geometry_points:
-                geometry_coordinates_sampled = pad(
-                    geometry_coordinates_sampled, geometry_points, pad_value=-100.0
-                )
+                raise ValueError("Sampled points is more than points in the surface mesh")
             geom_centers = geometry_coordinates_sampled
         else:
             geom_centers = stl_vertices
@@ -432,6 +416,8 @@ def process_surface(
             surface_sizes = surface_sizes[ids_in_bbox]
             if surface_fields is not None:
                 surface_fields = surface_fields[ids_in_bbox]
+            else:
+                raise ValueError("Surface fields are not present")
 
         ########################################################################
         # Perform Down sampling of the surface fields.
@@ -464,6 +450,8 @@ def process_surface(
             # Select out the sampled points for non-neighbor arrays:
             if surface_fields is not None:
                 surface_fields = surface_fields[idx_surface]
+            else:
+                raise ValueError("Surface fields are not present")
 
             # Subsample the normals and sizes:
             surface_normals = surface_normals[idx_surface]
@@ -560,6 +548,8 @@ def process_volume(
             volume_coordinates = volume_coordinates[ids_in_bbox]
             if volume_fields is not None:
                 volume_fields = volume_fields[ids_in_bbox]
+            else:
+                raise ValueError("Volume fields are not present")
 
         ########################################################################
         # Apply sampling to the volume coordinates and fields
@@ -584,6 +574,8 @@ def process_volume(
             # Apply the same sampling to the targets, too:
             if volume_fields is not None:
                 volume_fields = volume_fields[idx_volume]
+            else:
+                raise ValueError("Volume fields are not present")
 
             volume_coordinates = volume_coordinates_sampled
 
@@ -679,9 +671,7 @@ def process_data(self, data_dict):
 
         stl_coordinates = data_dict["stl_coordinates"]
 
-        s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids(
-            stl_coordinates
-        )
+        s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids()
 
         if isinstance(stl_coordinates, ShardTensor):
             mesh = stl_coordinates._spec.mesh
@@ -739,8 +729,7 @@ def process_data(self, data_dict):
             use_sign_winding_number=True,
         )
         return_dict["sdf_surf_grid"] = sdf_surf_grid
-        return_dict["surf_grid"] = surf_grid
-
+        
         # Store this only if normalization is active:
         if self.config.normalize_coordinates:
             return_dict["surface_min_max"] = torch.stack([s_min, s_max])
@@ -761,30 +750,11 @@ def process_data(self, data_dict):
         # Determine the volumetric bounds of the data:
         ########################################################################
         # Compute the min/max for volume an the unnomralized grid:
-        c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids(s_min, s_max)
-
-        # For volume data, we store this only if normalizing coordinates:
-        if self.model_type == "volume" or self.model_type == "combined":
-            if self.config.normalize_coordinates:
-                return_dict["volume_min_max"] = torch.stack([c_min, c_max])
-
-        if self.model_type == "volume" or self.model_type == "combined":
-            volume_fields_raw = (
-                data_dict["volume_fields"] if "volume_fields" in data_dict else None
-            )
-            volume_dict = self.process_volume(
-                c_min,
-                c_max,
-                volume_coordinates=data_dict["volume_mesh_centers"],
-                volume_grid=volume_grid,
-                center_of_mass=center_of_mass,
-                stl_vertices=data_dict["stl_coordinates"],
-                stl_indices=mesh_indices_flattened,
-                volume_fields=volume_fields_raw,
-            )
-
-            return_dict.update(volume_dict)
+        c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids()
 
+        ########################################################################
+        # Process the surface data
+        ########################################################################
         if self.model_type == "surface" or self.model_type == "combined":
             surface_fields_raw = (
                 data_dict["surface_fields"] if "surface_fields" in data_dict else None
@@ -806,6 +776,30 @@ def process_data(self, data_dict):
 
             return_dict.update(surface_dict)
 
+        ########################################################################
+        # Process the volume data
+        ########################################################################
+        # For volume data, we store this only if normalizing coordinates:
+        if self.model_type == "volume" or self.model_type == "combined":
+            return_dict["volume_min_max"] = torch.stack([c_min, c_max])
+
+        if self.model_type == "volume" or self.model_type == "combined":
+            volume_fields_raw = (
+                data_dict["volume_fields"] if "volume_fields" in data_dict else None
+            )
+            volume_dict = self.process_volume(
+                c_min,
+                c_max,
+                volume_coordinates=data_dict["volume_mesh_centers"],
+                volume_grid=volume_grid,
+                center_of_mass=center_of_mass,
+                stl_vertices=data_dict["stl_coordinates"],
+                stl_indices=mesh_indices_flattened,
+                volume_fields=volume_fields_raw,
+            )
+
+            return_dict.update(volume_dict)
+
         return return_dict
 
     def scale_model_targets(

From 0b721fc89bfbdc187fc28ee50e744e65548b8224 Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Tue, 14 Oct 2025 06:51:05 -0700
Subject: [PATCH 81/98] benchmarked code for accuracy, set configs, scaling
 factor calculation updated

---
 .../domino/src/conf/config.yaml               |  8 ++-
 .../external_aerodynamics/domino/src/test.py  | 53 ++++++++--------
 .../external_aerodynamics/domino/src/train.py |  5 +-
 physicsnemo/datapipes/cae/cae_dataset.py      | 60 ++++++++++++++-----
 physicsnemo/datapipes/cae/domino_datapipe.py  |  2 +-
 physicsnemo/models/domino/geometry_rep.py     | 19 ++++--
 physicsnemo/models/domino/model.py            | 14 ++++-
 7 files changed, 106 insertions(+), 55 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index b5a3ebefbc..6b7fa5cb3a 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -85,8 +85,8 @@ data: # Input directory for training and validation data
   sample_in_bbox: true
   sampling: true
   scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl
-  volume_sample_from_disk: false
-  max_samples_for_statistics: 200
+  volume_sample_from_disk: true
+  max_samples_for_statistics: 100
 
 # ┌───────────────────────────────────────────┐
 # │          Domain Parallelism Settings      │
@@ -118,7 +118,7 @@ model:
   use_surface_normals: true # Use surface normals and surface areas for surface computation?
   use_surface_area: true # Use only surface normals and not surface area
   integral_loss_scaling_factor: 100 # Scale integral loss by this factor
-  normalization: min_max_scaling # or mean_std_scaling
+  normalization: mean_std_scaling # or min_max_scaling
   encode_parameters: false # encode inlet velocity and air density in the model
   surf_loss_scaling: 5.0 # scale surface loss with this factor in combined mode
   vol_loss_scaling: 1.0 # scale volume loss with this factor in combined mode
@@ -144,6 +144,8 @@ model:
       processor_type: conv # conv or unet (conv is better; fno, fignet to be added)
       self_attention: false # can be used only with unet
       cross_attention: false # can be used only with unet
+      surface_sdf_scaling_factor: [0.01, 0.02, 0.04] # Scaling factor for SDF, smaller is more emphasis on surface
+      volume_sdf_scaling_factor: [0.04] # Scaling factor for SDF, smaller is more emphasis on surface
   nn_basis_functions: # Hyperparameters for basis function network
     base_layer: 512
     fourier_features: true
diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py
index dc6b8ad27e..2fa0de34f7 100644
--- a/examples/cfd/external_aerodynamics/domino/src/test.py
+++ b/examples/cfd/external_aerodynamics/domino/src/test.py
@@ -33,6 +33,9 @@
 from hydra.utils import to_absolute_path
 from omegaconf import DictConfig, OmegaConf
 
+# This will set up the cupy-ecosystem and pytorch to share memory pools
+from physicsnemo.utils.memory import unified_gpu_memory
+
 import numpy as np
 import cupy as cp
 
@@ -57,7 +60,7 @@
 from physicsnemo.utils.domino.vtk_file_utils import *
 from physicsnemo.utils.sdf import signed_distance_field
 from physicsnemo.utils.neighbors import knn
-from utils import ScalingFactors
+from utils import ScalingFactors, load_scaling_factors
 
 # AIR_DENSITY = 1.205
 # STREAM_VELOCITY = 30.00
@@ -202,7 +205,10 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                     running_tloss_vol += loss_fn(tpredictions_batch, target_batch)
                     prediction_vol[:, start_idx:end_idx] = tpredictions_batch
 
-            prediction_vol = unnormalize(prediction_vol, vol_factors[0], vol_factors[1])
+            if cfg.model.normalization == "min_max_scaling":
+                prediction_vol = unnormalize(prediction_vol, vol_factors[0], vol_factors[1])
+            elif cfg.model.normalization == "mean_std_scaling":
+                prediction_vol = unstandardize(prediction_vol, vol_factors[0], vol_factors[1])
             # print(np.amax(prediction_vol, axis=(0, 1)), np.amin(prediction_vol, axis=(0, 1)))
 
             prediction_vol[:, :, :3] = (
@@ -290,8 +296,12 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                     running_tloss_surf += loss_fn(tpredictions_batch, target_batch)
                     prediction_surf[:, start_idx:end_idx] = tpredictions_batch
 
+            if cfg.model.normalization == "min_max_scaling":
+                prediction_surf = unnormalize(prediction_surf, surf_factors[0], surf_factors[1])
+            elif cfg.model.normalization == "mean_std_scaling":
+                prediction_surf = unstandardize(prediction_surf, surf_factors[0], surf_factors[1])
             prediction_surf = (
-                unnormalize(prediction_surf, surf_factors[0], surf_factors[1])
+                prediction_surf
                 * stream_velocity[0, 0] ** 2.0
                 * air_density[0, 0]
             )
@@ -348,21 +358,7 @@ def main(cfg: DictConfig):
     ######################################################
     pickle_path = os.path.join(cfg.data.scaling_factors)
 
-    try:
-        scaling_factors = ScalingFactors.load(pickle_path)
-    except FileNotFoundError:
-        raise FileNotFoundError(
-            f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them."
-        )
-
-    # vol_factors = np.asarray([scaling_factors.max_val["volume_fields"], scaling_factors.min_val["volume_fields"]])
-    # surf_factors = np.asarray([scaling_factors.max_val["surface_fields"], scaling_factors.min_val["surface_fields"]])
-    
-    vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]])
-    surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]])
-
-    vol_factors = torch.from_numpy(vol_factors).to(dist.device)
-    surf_factors = torch.from_numpy(surf_factors).to(dist.device)
+    vol_factors, surf_factors = load_scaling_factors(cfg)
     print("Vol factors:", vol_factors)
     print("Surf factors:", surf_factors)
 
@@ -457,14 +453,13 @@ def main(cfg: DictConfig):
 
         # SDF calculation on the grid using WARP
         time_start = time.time()
-        sdf_surf_grid = signed_distance_field(
+        sdf_surf_grid, _ = signed_distance_field(
             normed_stl_vertices_cp,
             mesh_indices_flattened,
             surf_grid_normed,
             use_sign_winding_number=True,
         )
-        sdf_surf_grid = sdf_surf_grid[0]
-        
+
         surf_grid_max_min = torch.stack([s_min, s_max])
         
         # Get global parameters and global parameters scaling from config.yaml
@@ -549,6 +544,8 @@ def main(cfg: DictConfig):
             if cfg.model.num_neighbors_surface > 1:
 
                 time_start = time.time()
+                # print(f"file: {dirname}, surface coordinates shape: {surface_coordinates.shape}")
+                # try:
                 ii, dd = knn(
                     points=surface_coordinates,
                     queries=surface_coordinates,
@@ -562,6 +559,13 @@ def main(cfg: DictConfig):
                 surface_neighbors_normals = surface_neighbors_normals[:, 1:]
                 surface_neighbors_sizes = surface_sizes[ii]
                 surface_neighbors_sizes = surface_neighbors_sizes[:, 1:]
+                # except:
+                #     print(f"file: {dirname}, memory error in knn")
+                #     print("setting surface neighbors to 0")
+                #     surface_neighbors = surface_coordinates
+                #     surface_neighbors_normals = surface_normals
+                #     surface_neighbors_sizes = surface_sizes
+                #     cfg.model.num_neighbors_surface = 1
             else:
                 surface_neighbors = surface_coordinates
                 surface_neighbors_normals = surface_normals
@@ -616,13 +620,12 @@ def main(cfg: DictConfig):
 
             # SDF calculation on the grid using WARP
             time_start = time.time()
-            sdf_grid = signed_distance_field(
+            sdf_grid, _ = signed_distance_field(
                 normed_stl_vertices_vol,
                 mesh_indices_flattened,
                 grid,
                 use_sign_winding_number=True,
             )
-            sdf_grid = sdf_grid[0]
             
             # SDF calculation
             time_start = time.time()
@@ -778,7 +781,7 @@ def main(cfg: DictConfig):
 
             l2_gt = torch.mean(torch.square(surface_fields), (0))
             l2_error = torch.mean(torch.square(prediction_surf[0] - surface_fields), (0))
-            l2_surface_all.append(torch.sqrt(l2_error / l2_gt))
+            l2_surface_all.append(np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()))
 
             print(
                 "Surface L-2 norm:",
@@ -809,7 +812,7 @@ def main(cfg: DictConfig):
                 dirname,
                 np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()),
             )
-            l2_volume_all.append(torch.sqrt(l2_error) / torch.sqrt(l2_gt))
+            l2_volume_all.append(np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()))
 
         # import pdb; pdb.set_trace()
         if prediction_surf is not None:
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index eb8b8ba63d..37634b8715 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -343,9 +343,8 @@ def main(cfg: DictConfig) -> None:
     # Get scaling factors - precompute them if this fails!
     ######################################################
     vol_factors, surf_factors = load_scaling_factors(cfg)
-
-    # vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]])
-    # surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]])
+    
+    vol_factors_tensor = vol_factors
 
     ######################################################
     # Configure the model
diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py
index 4b3dd0bfde..a78157936a 100644
--- a/physicsnemo/datapipes/cae/cae_dataset.py
+++ b/physicsnemo/datapipes/cae/cae_dataset.py
@@ -1180,15 +1180,7 @@ def compute_mean_std_min_max(
             batch_mean = field_data.mean(axis=(0))
             batch_M2 = ((field_data - batch_mean) ** 2).sum(axis=(0))
             batch_n = field_data.shape[0]
-
-            # Update min/max
-            batch_min = field_data.amin(dim=(0))
-            batch_max = field_data.amax(dim=(0))
-
-            min_val[field_key] = torch.minimum(min_val[field_key], batch_min)
-
-            max_val[field_key] = torch.maximum(max_val[field_key], batch_max)
-
+                     
             # Update running mean and M2 (Welford's algorithm)
             delta = batch_mean - mean[field_key]
             N[field_key] += batch_n  # batch_n should also be torch.int64
@@ -1204,11 +1196,6 @@ def compute_mean_std_min_max(
         print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds")
         start = time.perf_counter()
 
-    global_end = time.perf_counter()
-    global_time = global_end - global_start
-
-    print(f"Total time: {global_time:.2f} seconds for {max_samples} samples")
-
     var = {}
     std = {}
     for field_key in field_keys:
@@ -1217,4 +1204,47 @@ def compute_mean_std_min_max(
         )  # Convert N to Python int for division
         std[field_key] = torch.sqrt(var[field_key])
 
-    return mean, std, min_val, max_val
+    start = time.perf_counter()
+    for i, data in enumerate(dataset):
+        if i >= max_samples:
+            break
+
+        for field_key in field_keys:
+            field_data = data[field_key]
+
+            batch_n = field_data.shape[0]
+
+            # # Update min/max
+            
+            mean_sample = mean[field_key]
+            std_sample = std[field_key]
+            # import pdb; pdb.set_trace()
+            mask = torch.ones_like(field_data, dtype=torch.bool)
+            for v in range(field_data.shape[-1]):
+                idx = (field_data[:, v] < mean_sample[v] - 12 * std_sample[v]) | (field_data[:, v] > mean_sample[v] + 12 * std_sample[v])
+                idx = torch.where(idx)
+                mask[idx] = False
+            
+            batch_min = []
+            batch_max = []
+            for v in range(field_data.shape[-1]):
+                batch_min.append(field_data[mask[:, v], v].min())
+                batch_max.append(field_data[mask[:, v], v].max())
+            
+            batch_min = torch.stack(batch_min)
+            batch_max = torch.stack(batch_max)
+
+            min_val[field_key] = torch.minimum(min_val[field_key], batch_min)
+            max_val[field_key] = torch.maximum(max_val[field_key], batch_max)
+                     
+        end = time.perf_counter()
+        iteration_time = end - start
+        print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds")
+        start = time.perf_counter()
+
+    global_end = time.perf_counter()
+    global_time = global_end - global_start
+
+    print(f"Total time: {global_time:.2f} seconds for {max_samples} samples")
+
+    return mean, std, min_val, max_val
\ No newline at end of file
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 7839ce4de3..99f086e024 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -859,7 +859,7 @@ def set_dataset(self, dataset: Iterable) -> None:
 
         if self.config.volume_sample_from_disk:
             # We deliberately double the data to read compared to the sampling size:
-            self.dataset.set_volume_sampling_size(10 * self.config.volume_points_sample)
+            self.dataset.set_volume_sampling_size(100 * self.config.volume_points_sample)
 
     def __len__(self):
         if self.dataset is not None:
diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py
index 0268e8f160..c2fbe9f606 100644
--- a/physicsnemo/models/domino/geometry_rep.py
+++ b/physicsnemo/models/domino/geometry_rep.py
@@ -28,7 +28,7 @@
 # from .encodings import fourier_encode
 
 
-def scale_sdf(sdf: torch.Tensor) -> torch.Tensor:
+def scale_sdf(sdf: torch.Tensor, scaling_factor: float = 0.04) -> torch.Tensor:
     """
     Scale a signed distance function (SDF) to emphasize surface regions.
 
@@ -42,7 +42,7 @@ def scale_sdf(sdf: torch.Tensor) -> torch.Tensor:
     Returns:
         Tensor with scaled SDF values in range [-1, 1]
     """
-    return sdf / (0.4 + torch.abs(sdf))
+    return sdf / (scaling_factor + torch.abs(sdf))
 
 
 class GeoConvOut(nn.Module):
@@ -263,6 +263,7 @@ def __init__(
         radii: Sequence[float],
         neighbors_in_radius,
         hops=1,
+        sdf_scaling_factor: Sequence[float] = [0.04],
         model_parameters=None,
         # activation_conv: nn.Module,
         # activation_processor: nn.Module,
@@ -281,6 +282,7 @@ def __init__(
         self.self_attention = geometry_rep.geo_processor.self_attention
         self.activation_conv = get_activation(geometry_rep.geo_conv.activation)
         self.activation_processor = geometry_rep.geo_processor.activation
+        self.sdf_scaling_factor = sdf_scaling_factor
 
         self.bq_warp = nn.ModuleList()
         self.geo_processors = nn.ModuleList()
@@ -389,7 +391,7 @@ def __init__(
         elif geometry_rep.geo_processor.processor_type == "conv":
             self.geo_processor_sdf = nn.Sequential(
                 GeoProcessor(
-                    input_filters=6,
+                    input_filters=5+len(self.sdf_scaling_factor),
                     output_filters=geometry_rep.geo_conv.base_neurons_out,
                     model_parameters=geometry_rep.geo_processor,
                 ),
@@ -465,15 +467,22 @@ def forward(
         if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf":
             # Expand SDF
             sdf = torch.unsqueeze(sdf, 1)
-            # Scaled sdf to emphasize near surface
-            scaled_sdf = scale_sdf(sdf)
             # Binary sdf
             binary_sdf = torch.where(sdf >= 0, 0.0, 1.0)
             # Gradients of SDF
             sdf_x, sdf_y, sdf_z = torch.gradient(sdf, dim=[2, 3, 4])
 
+            scaled_sdf = []
+            # Scaled sdf to emphasize near surface
+            for s in range(len(self.sdf_scaling_factor)):
+                s_sdf = scale_sdf(sdf, self.sdf_scaling_factor[s])
+                scaled_sdf.append(s_sdf)
+                
+            scaled_sdf = torch.cat(scaled_sdf, dim=1)
+
             # Process SDF and its computed features
             sdf = torch.cat((sdf, scaled_sdf, binary_sdf, sdf_x, sdf_y, sdf_z), 1)
+
             sdf_encoding = self.geo_processor_sdf(sdf)
             sdf_encoding = self.geo_processor_sdf_out(sdf_encoding)
 
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index aea2e91ad4..20be346dcb 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -30,7 +30,7 @@
 from .encodings import (
     MultiGeometryEncoding,
 )
-from .geometry_rep import GeometryRep
+from .geometry_rep import GeometryRep, scale_sdf
 from .mlps import AggregationModel
 from .solutions import SolutionCalculatorSurface, SolutionCalculatorVolume
 
@@ -298,6 +298,7 @@ def __init__(
             radii=model_parameters.geometry_rep.geo_conv.volume_radii,
             neighbors_in_radius=model_parameters.geometry_rep.geo_conv.volume_neighbors_in_radius,
             hops=model_parameters.geometry_rep.geo_conv.volume_hops,
+            sdf_scaling_factor=model_parameters.geometry_rep.geo_processor.volume_sdf_scaling_factor,
             model_parameters=model_parameters,
         )
 
@@ -306,6 +307,7 @@ def __init__(
             radii=model_parameters.geometry_rep.geo_conv.surface_radii,
             neighbors_in_radius=model_parameters.geometry_rep.geo_conv.surface_neighbors_in_radius,
             hops=model_parameters.geometry_rep.geo_conv.surface_hops,
+            sdf_scaling_factor=model_parameters.geometry_rep.geo_processor.surface_sdf_scaling_factor,
             model_parameters=model_parameters,
         )
         
@@ -351,8 +353,9 @@ def __init__(
         position_encoder_base_neurons = model_parameters.position_encoder.base_neurons
         self.activation = get_activation(model_parameters.activation)
         self.use_sdf_in_basis_func = model_parameters.use_sdf_in_basis_func
+        self.sdf_scaling_factor = model_parameters.geometry_rep.geo_processor.volume_sdf_scaling_factor
         if self.output_features_vol is not None:
-            inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3
+            inp_pos_vol = 7 + len(self.sdf_scaling_factor) if model_parameters.use_sdf_in_basis_func else 3
 
             self.fc_p_vol = FourierMLP(
                 input_features=inp_pos_vol,
@@ -519,13 +522,18 @@ def forward(self, data_dict):
 
             # SDF on volume mesh nodes
             sdf_nodes = data_dict["sdf_nodes"]
+            scaled_sdf_nodes = []
+            for i in range(len(self.sdf_scaling_factor)):
+                scaled_sdf_nodes.append(scale_sdf(sdf_nodes, self.sdf_scaling_factor[i]))
+            scaled_sdf_nodes = torch.cat(scaled_sdf_nodes, dim=-1)
+
             # Positional encoding based on closest point on surface to a volume node
             pos_volume_closest = data_dict["pos_volume_closest"]
             # Positional encoding based on center of mass of geometry to volume node
             pos_volume_center_of_mass = data_dict["pos_volume_center_of_mass"]
             if self.use_sdf_in_basis_func:
                 encoding_node_vol = torch.cat(
-                    (sdf_nodes, pos_volume_closest, pos_volume_center_of_mass), dim=-1
+                    (sdf_nodes, scaled_sdf_nodes, pos_volume_closest, pos_volume_center_of_mass), dim=-1
                 )
             else:
                 encoding_node_vol = pos_volume_center_of_mass

From 992c0872b6cd560efb9c3ec3a2a15b20e29e233c Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Tue, 14 Oct 2025 10:57:51 -0700
Subject: [PATCH 82/98] fixing merge issue in datapipe

---
 .../cfd/external_aerodynamics/domino/src/conf/config.yaml   | 6 +++---
 physicsnemo/datapipes/cae/domino_datapipe.py                | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index 6b7fa5cb3a..b1b25515b4 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -71,8 +71,8 @@ variables:
 # │         Data Configs                      │
 # └───────────────────────────────────────────┘  
 data: # Input directory for training and validation data
-  input_dir: /lustre/rranade/modulus_dev/data/aws_data_all/
-  input_dir_val: /lustre/rranade/modulus_dev/data/aws_data_all_val/
+  input_dir: /user/data/aws_data_all/
+  input_dir_val: /user/data/aws_data_all_val/
   bounding_box: # Bounding box dimensions for computational domain
     min: [-3.5, -2.25, -0.32]
     max: [8.5, 2.25, 3.00]
@@ -209,7 +209,7 @@ eval: # Testing configurable parameters
   test_path: /user/testing_data # Dir for testing data in raw format (vtp, vtu ,stls)
   save_path: /user/predicted_data # Dir to save predicted results in raw format (vtp, vtu)
   checkpoint_name: DoMINO.0.455.pt # Name of checkpoint to select from saved checkpoints
-  scaling_param_path: /lustre/rranade/modulus_dev/corey_fork/physicsnemo/examples/cfd/external_aerodynamics/domino/outputs/DrivAerML_Dataset/
+  scaling_param_path: /user/scaling_params
   refine_stl: False # Automatically refine STL during inference
   #TODO -  This was hardcoded anyways, remove it.
   # stencil_size: 7 # Stencil size for evaluating surface and volume model
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 99f086e024..05243e99a5 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -729,6 +729,7 @@ def process_data(self, data_dict):
             use_sign_winding_number=True,
         )
         return_dict["sdf_surf_grid"] = sdf_surf_grid
+        return_dict["surf_grid"] = surf_grid
         
         # Store this only if normalization is active:
         if self.config.normalize_coordinates:

From 34997ed7ab7aaf30158975d3bde5b1bfc9af95e7 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 14 Oct 2025 13:55:20 -0700
Subject: [PATCH 83/98] Update readme to include shuffling and performance
 notes.

---
 .../external_aerodynamics/domino/README.md    |  57 +++++-
 .../src/shuffle_volumetric_curator_output.py  | 176 ++++++++++++++++++
 2 files changed, 228 insertions(+), 5 deletions(-)
 create mode 100644 examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py

diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md
index a786772071..a6cafd803d 100644
--- a/examples/cfd/external_aerodynamics/domino/README.md
+++ b/examples/cfd/external_aerodynamics/domino/README.md
@@ -127,7 +127,7 @@ knowledge of the dataset:
 
 To facilitate setting reasonable values of these, you can use the
 `compute_statistics.py` script.  This will load the core dataset as defined
-in your `config.yaml` file, loop over several events (20, by default), and
+in your `config.yaml` file, loop over several events (200, by default), and
 both print and store the surface/volume field statistics as well as the
 coordinate statistics.  
 
@@ -211,9 +211,6 @@ The `domain_size` represents the number of GPUs used for each batch - setting
 but with extra overhead.  `shard_grid` and `shard_points` will enable domain
 parallelism over the latent space and input/output points, respectively.
 
-Please see `src/train_sharded.py` for more details regarding the changes
-from the standard training script required for domain parallel DoMINO training.
-
 As one last note regarding domain-parallel training: in the phase of the DoMINO
 where the output solutions are calculated, the model can used two different
 techniques (numerically identical) to calculate the output.  Due to the
@@ -245,7 +242,13 @@ To mitigate this, by default in DoMINO we use the Rapids Memory Manager
 to disable this you can do so with an environment variable:
 
 ```bash
-export DOMINO_DISABLE_RMM=True
+export PHYSICSNEMO_DISABLE_RMM=True
+```
+
+Or remove this line from the training script:
+
+```python
+from physicsnemo.utils.memory import unified_gpu_memory
 ```
 
 > Note - why not make it configurable?  We have to set up the shared memory
@@ -253,6 +256,50 @@ export DOMINO_DISABLE_RMM=True
 > been read.  So, we enable by default and the opt-out path is via the
 > environment.
 
+#### Reduced Volume Reads
+
+The dataset size for volumetric data can be quite substantial - DrivAerML, for
+example, has mesh sizes of 160M points per example.  Even though the models
+do not process all 160M points, in order to down sample dynamically they all
+must be read from disk - which can exceed bandwidth and CPU decoding capacity
+on nodes with multiple GPUs.
+
+As a performance enhancement, DoMINO's data pipeline offers a mitigation: instead
+of reading an entire volumetric mesh, during preprocessing we _shuffle_ the
+volumetric inputs and outputs (in tandem) and subsequent reads choose random
+slices of the volumetric data.  By default, DoMINO will read about 100x more data
+than necessary for the sampling size.  This allows the pipeline to still apply
+cuts for data inside of the bounding box, and further random sampling to improve
+training stability.  To enable/disable this parameter, set
+`data.volume_sample_from_disk=True` (enable) or `False` (disable)
+
+> Note - if you volumetric data is not larger than a few million mesh points,
+> pre-shuffling and sampling from disk is likely not necessary for you.
+
+`physicsnemo-curator` supports shuffling the volumetric data during preprocessing.
+If, however, you've already preprocessed your data and just want to apply
+shuffling, use the script at `src/shuffle_volumetric_curator_output.py`
+
+The shuffling script will also apply sharding to the output files, which
+improves IO performance.  So, `zarr>=3.0` is required to use the outputs from
+curator.  `src/shuffle_volumetric_curator_output.py` is meant to be an example of how
+to apply shuffling, so modify and update as you need for your dataset.
+
+> If you have tensorstore installed (it's in `requirements.txt`), the data reader
+> will work equally well with Zarr 2 or Zarr 3 files.
+
+#### Overall Performance
+
+DoMINO is a computationally complex and challenging workload.  Over the course
+of several releases, we have chipped away at performance bottlenecks to speed
+up the training and inference time (with `inference_on_stl.py`).  Overall
+training performance has decreased from about 5 days to just over 4 hours, with
+eight H100 GPUs.  We hope these optimizations enable you to explore more
+parameters and surrogate models; if there is a performance issue you see,
+please open an issue on GitHub.
+
+![Results from DoMINO for RTWT SC demo](../../../../docs/img/domino_perf.png)
+
 ### Training with Physics Losses
 
 DoMINO supports enforcing of PDE residuals as soft constraints. This can be used
diff --git a/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py b/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py
new file mode 100644
index 0000000000..02678d9e61
--- /dev/null
+++ b/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py
@@ -0,0 +1,176 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import multiprocessing as mp
+from functools import partial
+
+import numpy as np
+import shutil
+
+import zarr
+from numcodecs import Blosc
+
+"""
+This script reads each zarr file from a specified directory, and copies the
+data to the output directory.  For the keys "volume_fields" and "volume_mesh_centers",
+the script will apply a permutation (aka shuffle) of those fields in tandem.
+
+Since the datasets used are often very large, this script also applies
+sharding to the output files which is a Zarr3 feature.
+
+Therefore, zarr >= 3.0 is required.
+"""
+
+
+def check_file_completeness(input_file: str, output_file: str) -> bool:
+    """
+    Check if the output file exists and contains all required data from input file.
+    """
+    if not os.path.exists(output_file):
+        return False
+
+    in_file = zarr.open(input_file, mode="r")
+    try:
+        out_file = zarr.open(output_file, mode="r")
+    except zarr.errors.PathNotFoundError:
+        print(f"No output, returning False")
+        return False
+
+    # Check if all keys except 'filename' exist and have same shapes
+    for key in in_file.keys():
+        if key == "filename":
+            continue
+        if key not in out_file and key not in out_file.attrs:
+            print(f"Key {key} not in output, returning False")
+            return False
+        if isinstance(in_file[key], zarr.Array):
+            if key in out_file.attrs:
+                continue
+            if in_file[key].shape != out_file[key].shape:
+                print(f"Key {key} shape mismatch, returning False")
+                return False
+    return True
+
+
+def store_array(store, name: str, data: np.ndarray):
+    # By default, chunk size is 10k points:
+    chunk_size = (10_000,) + data.shape[1:]
+    # By default, shard size is 2 million points:
+    shard_size = (2_000_000,) + data.shape[1:]
+
+    zarr.create_array(
+        store=store,
+        name=name,
+        data=data,
+        chunks=chunk_size,
+        shards=shard_size,
+        compressors="auto",
+    )
+
+
+def copy_file_with_shuffled_volume_data(input_file: str, output_file: str):
+    """
+    Copy a file with shuffled volume data, using Zarr v3 sharding for efficient storage.
+    Only processes if the output file doesn't exist or is incomplete.
+    """
+    file_is_complete = check_file_completeness(input_file, output_file)
+    if file_is_complete:
+        print(f"Skipping {output_file} - already complete")
+        return True
+
+    print(f"Processing {input_file} -> {output_file}")
+
+    # return False
+
+    # if the output folder exists but isn't complete, purge it.
+    # It's probably an interrupted conversion.
+    if os.path.exists(output_file):
+        shutil.rmtree(output_file)
+
+    # return file_is_complete
+    volume_keys = ["volume_fields", "volume_mesh_centers"]
+
+    in_file = zarr.open(input_file, mode="r")
+
+    # Create store with sharding configuration
+    store = zarr.storage.LocalStore(output_file)
+    root = zarr.group(store=store)
+
+    # First copy all non-volume data
+    for key in in_file.keys():
+        if key not in volume_keys:
+            if key == "filename":
+                continue
+            in_data = in_file[key]
+            if in_data.shape != ():
+                # For array data, use the same chunks as input but with sharding
+                store_array(store, key, in_data[:])
+            else:
+                # Store scalar values as attributes
+                root.attrs[key] = in_data[()]
+
+    # Open and shuffle the volume data
+    volume_fields = in_file["volume_fields"][:]
+    volume_mesh_centers = in_file["volume_mesh_centers"][:]
+
+    # Generate a permutation
+    permutation = np.random.permutation(volume_fields.shape[0])
+
+    # Shuffle the volume data
+    shuffled_volume_fields = volume_fields[permutation]
+    shuffled_volume_mesh_centers = volume_mesh_centers[permutation]
+
+    store_array(store, "volume_fields", shuffled_volume_fields)
+    store_array(store, "volume_mesh_centers", shuffled_volume_mesh_centers)
+
+    print(f"Processed {output_file} - COMPLETE")
+    return True
+
+
+def process_file(file: str, top_dir: str, out_dir: str):
+    """
+    Process a single file, creating output directory if needed.
+    """
+    os.makedirs(out_dir, exist_ok=True)
+    input_path = os.path.join(top_dir, file)
+    output_path = os.path.join(out_dir, file)
+    return copy_file_with_shuffled_volume_data(input_path, output_path)
+
+
+def main():
+    top_dir = "/lustre/fsw/coreai_modulus_cae/coreya/datasets/domino/val/"
+    out_dir = "/lustre/fsw/coreai_modulus_cae/coreya/datasets/domino/val_shuffled2/"
+
+    # Get list of files to process
+    files = os.listdir(top_dir)
+    files = files[0:2]
+
+    # Create a partial function with fixed directories
+    process_func = partial(process_file, top_dir=top_dir, out_dir=out_dir)
+
+    # Use multiprocessing to process files in parallel
+    num_cores = max(1, 64)  # Leave one core free
+    print(f"Processing {len(files)} files using {num_cores} cores")
+
+    with mp.Pool(num_cores) as pool:
+        results = pool.map(process_func, files)
+        print(f"Results: {results}")
+        print(f"Total conversions: {sum(results)}")
+
+
+if __name__ == "__main__":
+    main()

From 97e4354011e6da407505204600a018a55ff5618c Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Tue, 14 Oct 2025 15:59:49 -0500
Subject: [PATCH 84/98] Add domino perf plot

---
 docs/img/domino_perf.png | Bin 0 -> 59746 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 docs/img/domino_perf.png

diff --git a/docs/img/domino_perf.png b/docs/img/domino_perf.png
new file mode 100644
index 0000000000000000000000000000000000000000..0038267354894237f05051f3ea65a456ea8a1b6e
GIT binary patch
literal 59746
zcmeFZcRZE-A3uH&N~A%ejJr~{GLlUcmAz#a$KKnKU8ID}%wr^E6|y;rlD$`UwqtV~
z+xK<4Klk_d|L^bb^?2Nm+fiND`#RVAHJ;;jg{UaWQBg2ZAP@+uhw`#&2n3lF0zvZP
zBpLi>SMe|%{wLycU&lq=-ps}QsS^^R_|(P0#@@xo@)?U8(#hG<-j1K=_AMSkE*1+H
z7YAn%US8Y(`!{&(oy>W4_bo)>ET<ggb)69i>ZgSNh_WTKED?zL_J^|48XieYqvY=|
z%;1j?Wq8%1Zz$#vQv?umu&8Cee*Kz}{WYTv+h8q+){=n@+sVPxuUYztuWCujl1f*y
z-6vXLnXZt)uf`vk7h4X?58<(uPPRqy3T8K&Oz{$U;VdbZvnMaX)&Cw+4D9dcss8(f
z_+@)S>c0<CzyCoH|M#K$4AI&DevWWE>G$8yekC%96aRhqz(~yU-_I<|XAwmIeXtBh
zoc!-+dC~uOiU0RSArSv>M<EdZi;IocIkc}GDFw|hmmJ+<vveV<-K4QZI(-NKzE7Gh
zkD}S0W<H83s&vIZ$x=#QI@(_?>&w-?iMl{^y+Vq`SXpfSg=p}6RlX6%CuHn80&zBg
z5N)0oKVH*5CHJj2cofTJ3ICuhBV6Kl``sx@udOb*<zah_;ug>KnAO_-_Qk$jbIn|>
zuk7j{b$l+YdNuj78&sC`6&MV+OYGZR7P3oP9<9Rr**RA@&E!3f<5_VUacpGloO|HH
zkloVkmE$n^UF*xU`*@KthkB~*9%V`0Pn8QF6Z5?Hx3_;@wjVU|Jw!HMaSpAr87c4j
zsEoEN8+A*3dqtS9EAiCz);%0{c`}%}w7{V1$ycks5rpshpC9>;VmSK0n8;PlrFboc
zOCH@#9Tu?a%`R`(bRTuqZg4HJ9^ff(onI}}(aL`^6ZQi)U}RtAw%q*nP`}hBe$;1w
z*=^}d<CEQu83Tt>=|HlTBys22R+~cYxys4<;Nep@lZ=CzP}%!c>eA!R?ZS;Wl#=EH
zgb|1j8N@`jGYG^ler$h!ajo}0nZ57&54t+vNWoWy#=+(-vD|iRip62>t7D7@-t|FL
zwrdl0Zj&$Ro^++(=av**_z<<!!i#YkpcY-aCA1TPwqR+_m>B%1m^gxyIV0Q<CV8}@
z&~m)LirK~%Vt&uIB?e-0jWD$}yR(UQpM}oxuL#a_30n2Ovi3XWDJR`;+4Hdi%kWFT
z+~Mh3?H$|S<wVNiN5~!6FC%<M{<UK@Uc<#564uD(=r0=U{d)F8o^#1_1=fC!8`Di%
zIck{`<{z~S4OhxX;P<NJYW^;`tsdp!HlcAFQMV<BGTd(eIOj_WTBmXL%Xe%XFazV)
z_mW1OTDW;EA_lf9dOaVzt&GI~bEz|1^%`CFdi&$b7yPEb&gy+O_OB{nS^53ddeoKF
z{dcMDWqrG?`7R#$v)q@Qm%k2FItQVTW_!GcaJ}lv6}G+O{uCn2uDz;mozmnL&a3_T
zdZT=F1}^W!@gvSWx6T)qjr$DqR4wu0%RU<BUJDoNK3q;6P@i+_l6^T~=*8_JaRR|&
z%SI?iXF?eStt|fdS8Xj13oY@%U7S=%7U>%`%2r96*icNoL&c?AbUgz7)n<s#L+q1A
z&iP**g!PGCtYm6}=Sd73-Y)gokJoiGEis&QI@KPI6H@z2#T@~|sU5_Tjk$%j49{&{
zbZeh`Hf;95QqHqIDY0-Ef+s~@YB$OtPs{zJ<fPqxBeQSP!VyGk#r{h5X8Q{p<_LMh
z)?C|E{UHDD+f=wD(_pc=^4ihiKA!yN+skxpnx8ZrDy$1@w*Bn0+EtI7W80tVW@3tq
z_L56{4)>%wQp5wptCosdh#MK*^;<E^e{S0hex5)n-JN_v>APt=RK)7AI}=B~YClk5
z5H!G=(3K%a&a7Q#H~MX)!Z{GX{QE0sy|quTs$9h!F@hyoh4?hW?|O#rz$fWK`WFf7
z1Lck;`27m!xhKVDZ8kqc>0^y7XS14xEwyG}ox66vMwmU#xGuo0U)Q{1<6<RG!m_YM
zXR53{gWA@?htQJSCO=R3;t%oTRpXh&ZJ)H*#g)rjb1rf}8H{9oyp&*FApA{_{CYn`
zs)hTww|mg)2)3ZA{WT5y`Ge#l@?KFnR$6goiQNw^Yc14DqRRr8C4H(nN$*&8U-yFG
z(Pz&|>aMH~<wur(YRzg^_l$nId9c}0wd&lpuEmbgne9NY@=40lU)#H^7H^*dPp^WD
zDMm2b$Y=M*-qu3Y)FBaqC7#f5u!Q8y7C(~kMAB<mNgp39GWQ;2BU@s(A>gV+`m@kJ
zsC1#5nc4RBO*he3s>NLwH6E$_GpxaX)NnzWa!d;oDrnuWQ?w6%-FCdzn;zBsLT!ki
zN-U^~THc%&b5OKAju|&7R7ndkaQs25N<uVtf~tu(>bwRDJL25odO-t{bSx*fS9k&P
z9_98#IeyG5-)5-jX`DBtqY>dp8a@sS-I=Nb;x6-@#=4rM3TGq_);_ne@BB&LlsH(c
ze}18p7%@#vXp!Wdte)j3;H)ILK8@%32a9`cUq|&sJbO}Rmpr+hr(1l!lw00ggnE~J
zO(0Ns=w*sfUk~EjF_wEqHN(btRxWQfdZ^f3&nI>4@?u6zB2hOir33e8?PyjMw)-`n
zTtpB<VT<qap*eos*SB{-J&LiqH5s<6oc^6H&%=0F!H3G@L=ouQA`>1-h{NLg3ymb*
z^>okj)@(%@c;osU#SNt|w9Tpqx(aq_+`3P7D@JuP=6>@jK^3h)cR^Okor%EF)K?6V
zK^aT5+__|9RwX%0vDd=xem`?v(BTkbPM_=r1Nu`_c7lX?R#UZEi_d18kR7JPP>@q|
z_qE&d?+TN53imEoazx!QWSG#p)BWein^7)Qn-XqYo#dvcVn+`Ca}j0l3%%8+HI30j
zk$tmO*nvWK))@h3Ei_dOud46`Z>HWPanV(xeC5msckD*yCDc*&xt0!5HxvVkU8la<
z4&T8a=~p;q?pIsgC`}kBv+Cn$!8SkseFHP@v(NpfC}AkCTir@z$y=%Svkv-rW2R-!
zLvMR!6dM-b{x(s_-VNKkozwf+PRL>ao7VzQe`Ghqs_)av!MXgWPFfabyVahXBc7V8
zir(!d%&@mYvFld)`5VG0Uu(qvdy_E7VY(IlLT5iRd0a)eqgwp^7(`36`J=-8c3&&5
z{Gjdx+X82FJ9bY2nW?(zFJf<<D0mklL$YeqKP+mDpX%}*7hNij)6ulQ?FFa%m6JJf
zy8~ZBx5VG)Rhtc-@4&|;Sa?<coL^9yn;iN0kcWv>NW<!u)|xKbQB{~HW|D#~Z=+Cq
z{JuzB!;`{();&>0G!;WS38MR6dSV0u5rJ;#IUyc7TIEj8q4|mC`E%$;3<9whS_S&_
zcUP-7^{=V>7OSkL{~V;2JaFiJkN}}ZwXY*s(!WW~Bz9Lu#&oqiU$5+&<nh4;{SM6W
zQDT~7_OVkPDP8$&^Sc*b3LhS{eJRDyCfc8gW>dY|a~RC%_TE~8nU}G+k;#jz-ue%Q
zhDZOdL%Z;ttj!JY)P&T1@y%9#`qK5ZV5ZktZ=vTqYf@0->sPW2_a(gdPV^TTB;Hi)
zjN{h3co&;jObn%sV*da#?<q0Z^!~?3`@D;&J2rz8V^~6|pkY&C`Qx<+vG1bzc?#Fe
zS!lYWA8feg(`)X~{o&#5GSxO$a>`3**|hR>23;ASuhsc`tyJ9n>^#>ll@5!6b8y$2
z^itl9mT-okP)kMO7N6oD7Gf>4k}vKomyKVQP=>AcU83I_C1%yj<}N{57TR8%))r}E
z@LhGvTwJ*=L9kxpBP3q`_GD4=i(EQq9VL4eL`F<%Itm$>$}D?sEV?E8>@7T9-DD`m
zZ2z`isb)g?29i+`Z_c*0d=_^fag-ZeB@_I547u5)_GsK2pX}8}Exy(HYFNM8WBpr<
zwjuovq_d0ytW<Iter5Es$41jlk{C|yu<|x?Mi-TdQ2zFl3h!_IGwZ%|u)9gZDC!pK
zp_HSZr8EI;0_BI-vD|uOWrxw~N+A^on%V}m`iA4@y4u+%B@)-UE-%<;$_h${<Ghpl
zFw4C!s!f|B>l(+sR%>oELoAP*4&oPjvVskiRweD}y~`Sk>_#govEER}ic5GAi0ir2
z@%I2$r+0H{<Br!BZLB}$Bv1!i|KUXY?B!si(uSrJ1br4V<DS66NAr~0B7J6Jw3nbp
zI|-&~`bf`M9~29VquQ5;%c`&!ZYVeuzY!<26*WhD*!~hrbs=ZN;#Xf4U+X@Hd+yJW
zqaJWj=bzT(^lDEMwtMi|?2)xwk(tPw{xtd#d(5HRpf}{^$7_{B*xQjFrP?)`0;2lj
znzcuJ*4jOKL$|Y1P@g!b(&DSSEP{5~vy%RVp|v?ozxfdr*!%HOl&p%>{$>_r9-vh8
zJ$@^DYI*lu&c$*$T@h5r{G|{>Q9<>5&D_|;<=n&W_2<wcjrb_R?vZisOG=RNuEZ@3
z_0*)GsB@WW!m5hMHF#wCH3hNy@hT{vns7mWF*VzLJ-dn@1_KHnHFP4bi_>Nei@07J
zZQPemEEFoQxTiBn{rl-3zd!{puPRWWiJ|NI(zRrOl(Fz`KBHDaV<6yVK+pd!f!cj#
zWT>q?26D(a?A95xOkTax)*5uQ|0d@6u%pj>An(utJwWnsm0A+{X@~uSRLwn!z4>eI
zdc|1W6ZU-tw77SyYmzHwOqBWgnsiV{WtR0o!3sY%dEkJ*jqs}~Xk|w|6&sZc^TR59
zhqq83*gkUuew_GZ3UBnbOkr@Ta<peEK7Csbt8NyVZ~-gWRiRQCGiH?L(yqFSAaq_q
zPBJ2Ni{4rEJ0fU}0{tPR9yDES^&(0l3m+!D^!FEz6)-!0-q=+|v!{1_YCjZ_V>fw(
z?W$co%-`%KKmh_K^5QNr5c#;5#OWnH=a5_&ouAS1<a&)4-ffsIfmOnylO+!KVc_0A
zzFl;+e3jPQeBo>VzGS!XoK#Yot6hJQ;Q|JFzN$^P8We5>A1)DylH}1XYN+Y2%=Z1h
zHd$X^VBh-wwi|$hDn8lfUi!NZsa{lq7mVodEq0`?{@!dC?MQBMscn86+EV>tc>rHB
zf0*O_Cf0S9*DDjBkLIi1O6kF4w#TvdX{)|S)`gn$wz|mRg_;#39>#^L)?=RmPuAGZ
z)~nuTHAOm1-h~T?aWS|>=e(syt5<SyhE7eaj?3f-#6#|`bHL_oL?xN}{hIH};BRez
zBcQ$p@Q$ke>VR@vI#p(TJ_-4m)>~MiCpYBhx1nd6vHQ3lXXF$AG@iG1=X+Z!F81>6
zXR;FjAE<is^@P@7Z<y<76~`R%n`8DCbKf_za2EH<l$B}ZsM%>1j-!2#7K(_gX9N<3
z7#K4yDBb<1yxL4CIFVJ%c~(Jaf2bvLXAfIQ64H*a9Q^!r?e_1Fn4>+;MFq=aAFFMP
z^26tI$?hXd?%e7H`oivF&eup9j+y#PIfhwAth;ix3n}Ae--)e#BdH4wl;{5$W9zgt
z8BC7e3}*IuRW8w1j!VI}@^`q<6Q{XUZyl|5m{FSp(Des!b-g@EVlJ)Z2s%%mas|L|
z^%8Ubt%!=5SUo<m*-`iLHU~`D8_8=5{K+y42a|0+{n$eQt3M8F33oY)^&GY>t59I|
zEe+>m`krhqWB#_W9>d(Mbt=w(6ENM0;twuLklFBmB*swOiGtP_EdcWcwVtl26&iI>
zZ?MI67;0aPZMW}`JboU8{{g5vu<>4;bBFkVcWkucZJ&c3g(1o#g+j>2l8X|(g+}At
zS)0v}Ucd8X&`*!;g^8||zo)15bRFy8RcIX0JL-PSOO^O8b6^`l6cvv<B#j^gbXYQe
zG_y62+4taf51(F{ohv<ZtP%?Tl8e486TVbeH}KuO;JR<@M}_$Fj=D8rV@{~mh0XI}
zW6n<=#4COI@R;mZx#JYyQgD&zpUIAd?m5ZcrJ-W}JbZrH*lX*Wqfk-g2O>no83M-_
zagXRM)tSBHG)>D?2a>zE#lF)#8UEw)0*GEX$3ziBXQ=zW;7&%7#3Uu7h(LMLqg!k&
z=PixZGapnVX(O6)5wA+|Z-vHARwV^o7;|E14ZnCvW$2Q<F4cjhG57fx{SD?|3uNtv
z2x0@;XXIVCcVydQ963|tZpR2`wqq6ynT|69qD!hmsQgpQ@ev;Ls^wEj0)}$vCi?k#
zK}32acb7tnm~g$Z`*jX$TF%dt-b^)-vhQLyF|kkvR2y6^*UD8h9?%E838C;~+nGLo
z&hD2naCKx)K&skt>gOvgYORhuu!Up(;@e}Lg<=svJkX1ujkrv7Hs<bTY#;WLW$O58
zGE~b?3bMWM@NJpQB*5U&mK9KN$Vo!G$n0HxF7uhn56*R)f7erDE!;Xx-ITt|K|RBr
zi+>_8X0B1!Y;k~&feu^p>|;Z8S-8kzcJ`cYjNKMlTz&FQ%})jQUQ*QI2waF2{UhFp
z+@e=qk3O)TjzMe@KczS!3GkUdERNWQ1FH$ovpi5#L}<rIo3!xWP5stn(SaICSDTEQ
z*oBJ-Rb825pLy{M)W_~0h;7pV-bFpFNk}>$&SOw{f>}NP$ycuB(+EV<YXUXIPJ>WI
zlO9W!Oq$K@nvYnSef3r0yMlbW6Bn+PJBm3>V7(M#=DJ_T|Jz2XuMZz9dbBe|;z^6>
zGL{xNs@%PBQ;D*w6_Vqq&9nzut?6*7tv}Swxh6kXF`~0I+s&U;w}T{m9`OIJZ~GZu
zf>xG1bm_J2vN1fCn*OAj>qLX-a>T{<AoTN&h!}Qov8%l~>VwGMo3eqzU&#Cg%IB!;
ztiGugKD~P+C2L{-QpIpQ$bl$=p5M&WL%FfpZ;iZ2AR;*%@rs!GXhu+ek-B%s3z@B~
zB7nl|?w_=rbrM_kLh8Ic&G4%bbF`QuRC0Vzi<pJt>^}b56P9+bRkofsJwyKqkMJ_$
zJMTtZ2MnHur}?gF#ouaveoBs{?dXUe<tx{`t$rs`awip`P%>~Yp7YBNKWRI3vF}^v
zaGrY7`8v`6oM%U*<NeEcaFQW?!*VsL)iYnmG*Vf@i^8kN#3@d)t3NEPketYy*a4o0
zX+R{#bI!LTxA2Wz6iJxahCF^BC=byB9P(?Ka^B-~VwNK=UW2RNVKR(fOJC|5Ev`N;
zUllt>^%dV_I(o=X;Y5hY$f}d<z)Z1}%pL=wz|Wl_vUY?T%X}OcR;h&3SX+$DJ@a)>
zxIso-{Hm%V&La+Iwf_diI8BSY7IGtHS%neR*B3`jO3|R7wK%trb;?ziGE_OHuvMPV
zREX#6|D?$|aD2~oMAT*87&wvwY>aB2&X-qMuc@~}0sHLcDV}D%bK1=Qkk+cOpC$_$
z=UyC(=ZUP%G{?NDtVT(C^g7SN7v5ab3Qgh+s<(vDyH>i4EcWJ1733Zr0ucJnla)f0
zLI{RMkv<Y>*RgsWc6}9lPVUOPoKmk6Z%y<$U*?eN4}%$d>JZmMeT%+(3t55qoWkHe
zIREMT@x-hB`&ode>BHIvznDz@zJbe8!Gy@8=j50?uMaw36TOF@D(O*rQ~7*eSo-k#
zmglQrl(DQ66r4)$kO$%x_q;xP^hVvQu{>y0#GIC^N$TbQ;nmjHa_u}$LN(k^r`Ne5
zst<2%m{f*KrhELsP<#x#=&MY_p~1G&k%uB$sE%CPir0|5iHl^{{1lBvr!&d!6{w=G
zdZYC{CSMjXzuTLF??0~hP3vZ-3^nQgmu$ND_IG>LDc;*dL%+?EMP2Qhn==6*##%t?
zZgUX*?Ou$=jBCz|I6>;gkO1mO3P_V9SYCQ4%3kY|%msEpX04QA<7avuk}AxzjZg5K
z<EZPvkLO7YL~@tKLfmza5g!wVXJd5UYV^Gq{B0Z{G%?rZhhYf%nf|ld>LQQTVE$}t
z#c1^t>?SpfwVz_(Ap^^R*8`FSp)rq)QMX}RN2-8q-7j*A->2xWcT0Up>9KB4GC)!9
zlWXdgSezSh_7a#lfZADM);Ki7i~#n*paCB>fY<{X+FiP#E|5H!>Mk&iKMbDLWc?Y^
z_SSlHk=x2^d+8Jt*1<m}X@mN`Tzbd18vK|w(7|taj=H2Q^JaU;th0&w^QwIXq!Qx}
z3g41dBnnub-@JX~t@%y<ZG(xp68ejL-5ZblqK!c*QB%5FR6m`uNmn}N*oWGO8G0q>
zlAQSjxd&KkB)=u}ZAO)<V&!Llj)@#|l&7syuj;V*jR4!_zM@;{l5^Tga(mDOu>V~;
z74>?2X9)EqTSOMir-T8l{%Dpmvlo<r(J4;tf+ZibxOU9p9<HU^q~rivDeiX%buJ;a
zDw{irJ;4Rn%KCE$ETGqRl)Q-WGrbS8mmf`foipY&EvH#|K9SpD@nViam76}c&&d#B
zx-wGXD1PI&UozRlKL-7hEUQ$DvoAYMXl*0x0Lwg$TQ?^$of$;Ygua*bt!-&-Qe=4F
zovM~iV6>=|tfjCiUMpW7M%}2WThP&!KEnIOQJs4unY{_T;uE)pFGqL>u?}Vim2Q!%
zq*5hzjDdx<d(E+2x_`{>1lO)^3BML3*#=V1F<t<r__h6Meb5m6!xb}L>j{lQ#}B&O
zDL_MCtM_vd+y6wQOQm8z`1zmmLQm;<KEoS+a)HJaI{kUNJ@BlT&0O}$TfW!$3QSy4
z?rb||;L$I?RUj=y^?sK1c+Oz|92Q6EZrJ(d6)mFa?cpn0sdaRe2{+ca=4$fkLz?;B
zovHBAT8_J^sUeb^KTdx%HGkll-agz+6M`FSZ`x(oC1DxEN8v@-jnlr}v-tSK8$W6e
zWP>|>SWlL+yMHPO66FdXETUtwDHA{*s2pzoyirsVO{EPOp~XI$eKBh_1Ng_E&qv@-
zuNGYEMomo`6}FR}kVGIB{Qj1@>m}2g^Aiz5Y>Uh`uY!Hu9Ogx?o?^VxmXF+glt=W^
z<ERPGoVavlxYKo7ux%^l*3*E?Ha%W)L(*{uZn~}KHIMM|W}IGunq3wjz3i4u=DiMb
zvQnCz9_Gt;6mA#^jW`4*dMEYd<whxqG#PqKl3u>+VC-Q0kOXs0F3g3F3bhTuj;D6#
z86DRvbE9z-mOGK(T&dM|qr;^m7awJIpR@BF<I!RFgooHFa~lVK-eXbcXLIyx^N3qF
z&-BiT3vFLJH)lc>$(gH=dAPyhk(|Bhc0CQPyNr1xdw+@=@zPFNEBpC-z8mRXyYg{M
zofS$%&b&KmEEX1P^{c>ZlqA~M4w3o_3d<lzfUTw36jWbePo^{SoQauOEin>q`u!N0
zcH#U1(JPAsB8)>6!-iE25Pw$*LhgsNzMi=btj71u2lmVxA50l31qw!so$T;^UhHg>
z5o6iJnPA|9A9b%ff5VUpg|k+%kMP)5r>965N~HZQ*M^|xX1);KiFc@`f9TV^crp{U
zDHD8#s(nR^MLy7h;{3}q#-ENbkLrG;qqZ5YYtXZiY9Ej&Fr0mk>(d^+@2wz!)fJaW
zU(O9stekc+y3mF(qA)eZ9ZoH4+r_<Zp+@6LJZ%K_$FU1(Y8s20>Bib19`TtbS;YE^
zvG#2^=@svZ{4C~)7x!|D9e6hi>D$E<iNAVkfR4+M-@Lsw;_#!hDHp(?iqn3th+9O)
zb8f0|aiBBvetfkas2uh^+RZxsZGI$a{6(y3kGq_h?o$dLxhHZTa6hm0qlggl_y5h)
z&ku0}^me%?6zy7fHhHs#5>H5e-(((#9X0aaKZ!9;-zfykKcOm|=Ydx*MhV`EiFxc6
z$eh-}-E7>nxEanM$f2$q2hg6iej?E<KiB<1-q7$D0ycmaZi;M8<NN)(uuf?)QuV(&
zfG*$Uw&vR(#g&CeM`TUkF-D6a@A|yES*wO_R<LVIu961?FdDAOAiK6kU4SVe^Xk1G
zwNxFpp7(ewzJ1Yazo2TR$dra*t52BoBJPd|aJqqIlDpGU<tf-z%yDU3q7b>ga^jt~
z<fzRI&_4L;ZF$*f-j2x~849Fff32Wo;!c+tA`oil2yOti7kvBzEh8DarD<(1Wvr_j
zMSFTN`gsZ1-T31jv$??=i#t|QYME%U1W#<?8)jR{;wQzDS@*{4E7lJ<G-q(vJ6oS1
zzfIqvBIdj%OB6EG5?9rI_sB^;!TYjEh6wMrApP}?!>B|x+-Ax^uf8sFOpJt#QvS7{
zrdXkgQex%J4gTELl4{C=7iS_GMfb<YUl|Tbe6bLX@y-g31;LHX*SAMeXs8I;!i7;)
zrp0)KO(qChF98dAddv=h=|^NQ`lrzfPYTW|c~Ozp_W#@WC)@b&aW^^<*7ICmg2Ol=
zySMJQ>a6{E(xhp122G-pO>ze~OKgVx)9ZGfW|~RuB>}BEE}LuZA*X|2zuSpn625sw
z<K7L>D}j{ffcye-WEo{)nOT$dM6>!=D@~z55!FVK!kux+-{bP$8fuo30&JI*(Wj5+
z#9bH{?yXw;rrvxaTw^-nOLfl(os`F!EK#fmvJz#PCIvs!k^hc|1hzwBKd<G^LSJqi
z^cLoIl#zf*^e{e$0d<Q9_r{5i6y8`wUAP(}BMy%jdPR&7jX+B?ijGJBeHyuZIgS_+
z@%k^sBNo`1<(*WAx-Nvh(<@ERXiCtZ2K~n^GT9FJNe+*1UCY>B9yY%;h>J^oWuE#6
zSnH<V>$_52UiC(ahfvs^N#eh3J2yqL+8J{&Eq-wv^PC%sQkIC%Sl+?}Ptlj0s|m)9
zRAx`dpnX5zN%>f?9^FKL``HUBW(l!HKeU;OB#3~j<O;<iQmiO(=(3-e6>=~}*&M7g
z+vbkRtb0tqlN*sBNV>5$`)PF(d1n+j^jH|pRf5eb8uk=sN|rS2YQy$;3@|?4t&Pt?
z-0lbAO>`=hU;kqOW;&TTPWP~tkf-W4qkMzB`PRuzelX{~{RvxjjJ7#UeQFC>{VJ(f
zj~a65E4A-9K<V3tx~1piJR5ILR_F_4<?T2;Zx1xEl&n10^0kRPd_Qyaqi~0r5AcvB
zN&e=_Z^k_pb19cwk2(>1KvlfAEdAJMWF}Y4WfF7n{;uOQho6~bXfOOqa_%r%Nx)-<
zd1)UmlEG70H%>q{7MBv`IS!{|8al0yCIYE{jwr`e#S2)fwYplXD3c7@*Wk2*iY^aL
z`cmiWYm4!(_Kg^Hi2EFPXz$f42imUpOKVG+0C{;Et=y<2-J}zk(`*saf8jf(aQykE
zqQ0~s?*96=_Rx){2gN}^*D+S-Z)%FM9<5-Geae)Z_(;TDBwc`Wc0ZUqfiyst(JAq5
z`lkHZRrqlk(ZYGOsXK2?6Y}W!f_Lt>PkYaH7hKz#y@vJBW5W!_+vF;d4-@a$@{NRL
zVLE~OumK_Y<QVY0t_?`nrM~b-<8LS;y;w^{+?M9|8a-$b1|=5nW^Qtvnmts+Ls#;^
z)~#@A2S~UEocoU<b2MY?L9Z#@@%XjLdGUmmIbMX+QG*Zz5M4`<s~t%wip@LDoJp~o
zJyMlpq+P#Fa7hRPnRhpP2UwE(`dY;a%-&mF6Uzks@r>|vg#Ku-OZt>ApcpcH>>#_;
z*$}g1FTp=CU!A+|)9GH#VfH6`a)9jn-nKzCsvOhuT97UO=i-wG#bdnHYu~TfW&y|<
zqfcPJ@-|@srF#?9+zx?jqmplMKtdO}V*Bq?owQ12GX%Q)Eeaz!CF6Y$08$(~$p@%)
zE0tkJCEcd!_(WpTZm22>*vDiH0_ssV9T$|v&hfvIFGL$!<8s%jgKqTrWIuE+F=Scn
znvT)wPM!KXU}(KC|LR)Uug>9Vp!UNn|8}I%{dSps=j3lcy=MplWc&&<q1`yfF&@^D
zGdHxNJb)t#khb6*{$~gh6rT+Hjb!)3nTYSIox>U3FGbwtS`yToa@Cc^1$*2ZWT?ey
zDDQBXWZRF|I>t>rBLD5~t7TuZ>i@;0@izeV5!d8n%{yG@72m6xnTcUFx8KGQrEkWe
zihvm6`I|dTX-Ya%t<VzQOY<4wZi4am+wN%^-Y;g&=tBGff+){p{ZF0$0^omvC9dy)
zgU1L02Uo@f^OB55*%-?D^K6Y-j@2zLeFrI$Qv@kvO|UEVz7qd6r}~Myz(Da^0`FIt
zzr8L&b8M#Tetbtph4!4K_X5gw6bJfHEAsq|?7Wvw{Ke)Ir{w6BKCu?Io{BM|zhk5G
z5m~ZqXmpNhp8X_^ow0IpO|i&MfuY!#u?AZWuM0*FdnT>|Qgil1-X3VezhhpUk{ixL
zbihid%`$H~=Uht0faa8B9OPFZM-gCw7!t)>Y`cGO;t3g}CjBYkaJk?^R6D7*fon`v
z0ntjFA&OnbdEO=cTSQr*;l&q1-<TI?;*8v+_hmQH9W*7cT*msX^Fq%hW20+ks4scZ
zO!t}hCNdWKV?mA&@*V?Xp(m!h8(-O+X5ic=7@N0Mle!u1UZ8O?fbY)LWIXM}Kt)Ri
z=BCC~t}DrUf@cefJW0>XEQrcrH^LTxN4U&?Danpza61ewp=@8h{$Ox3l&-Z0pAqop
zC`EVD;#teXo5?h-6ldagb5@mk5gD6t5@jJA1E&yDhHPi$`wCt_7DKcN3@AJZKib_Q
z>(C&RAv$v`RX}D0LSOt-hf?6s-0G=>%DXk*H5<On*r3fyc1y4wt?Z++v6$EbX~>bD
z$!a1$@`|BZ6Tw=8e!lT=a4GyBnV=Ja*N>L9(PVAsds)yz^#hotXm)kJ=>E<L->IK(
zM-`O29J|)LzIC?7^9Q!LEp-1l+~UoRUrBMm(%SopaSrDmoiZc~C5x$1<KamA5K7f`
z$3pKw;Iw|Rj-W}mbzZ*O161fr&zIhHO+RT8QHF`--khkJ=;ON<e)9aY_X`tC>n_+O
z9tu1jxUAo`4)@(<oJsesvvyv5x<-QGTaVxSu_y|)?2@;{w1zp+RV$U8>Sq-TUm9Le
zr5Eb^jwiDR>j<`B-{^v5jStGl`;GuxhEy=w1z@RH#)Lju7JPeSC`9#WQT%MqLJ1l6
zKwL$jN4M;&GAfzobN2+AEOl#l(`w(^C|5Igw!r#wjr1}&oJhUqEK=o3G8s&}TOF3O
z!rqp)VT-#foXv$)mPlQ{$3%AVjd`h{NvIkKAdE`N%1a=-0JkXEgC&sjEvIVmHB47I
zp3ai$$w=w}3q||(QQSiHOpLW^1y77-m-Pqy`*zpg)G^!_YRX(%@3j={l=U3f;xTvL
zjmf0VP0G(DTI}Z>s!YhxGmUx45g}^aOC)NJ1*;5(3>iXu&5it{RE|0Vjy=q0Hglyu
z@pQH8j_vniY(7lQE*BnO3Kq2wV!G`8z(r}Z-GA>nhl&fqF(UOLY!@Vl<*@i7Hkaq=
z$y3z5p2NNpJ(Ys<xw>1Zx2_UhZdSK+<}TR=V@^CNcalaW<8-`*u$ApyzqQ2UQshXQ
zL{^@3Y%TPRdR`k^sFH5{f%Q&zPM0X7IJt0`&vlb~iZ|=SKJDPi!zI+XE(d3J<AQe<
z-e`WVPtOj-yt<-fk&lLMNyK;{0gd)MJBp6I^EZ_~IHern6OIq3j+H3H$L02`c(yia
z_sR@PTwiwY8jO?d*>#QEHZ%0Or?meNxzqen{bxq0ru6xk^4E>Pu#K*7?wMF7i)1Z~
zWQ;Go=g`TT>&3aW8ImBTO(=jJ_pAQ$eAh`);1yO4lvXLw#z{brr`%c^;$K^<rzWoj
zA?%9yF=SF=s8Z6Y^6#!bMHhp4#UHF^Y@eYSc!jk9^0ark00L~JWzi>`iEg!M{R(Eu
zW|iPWa;`ypZ|e?D6`6OW{B;!F-RhL7mnZ<goj<s~L_Y%)CS@0buc5I_6bC){&CVbw
z(nuY*)v+3Xpvd`Dz&H3R-12vcr7`%yo}wQQpSSipEPR*^c0h6zeua_XZ5nL6$f?EE
znWc>W27WFRCJeR24vPwy={{q-_Q<K3t_+si>NVgaBs!RC3J6&{)vyMoidRHif)IGc
zWEbQN5)h;*qPcYWmMSS|IcV-Km-G<7q2oO>9R!Z6pNA>U1%F*oq4XEBp-?us4uFy5
zmfO<c6<hNXyoy$Peb=@L*vI@KI$nC~`xBEACyTnDIe-S~+`zdG^MCs12r6CZuE(l3
zLr0OW@Xs``rO}sz%1grfDEeg8bCA(V0KoV(Osz^bGG}j=x=ZAOtz)opp=u-j4G%@0
z5k}%{IFKu>ouC5ahbYTx)iNKE7F5h8d;=nzBu94c5+nJc!TII81kHVIy^&c?U%47R
zpi`J-xanc!Ww13Ce!bs|d&@aGYV%>9>&py$vmX)3cb@{66iYB<O+Xo;Fa^tG6|qbZ
z<*8_Hy*o>H?pXJ8-2&SoS0Nq3PeY0fsP|nzvy4yMrF-WrVvEdLtS=T@`FPDlM2K@_
zq{r8D<ffZ_P8cdUF9E&0yEt?A%Og{bD7&^N13p$aKATlq%~qm_rv;IG3*vcB>L|(F
zj$kvd+&o3Cu;RsL&eQklX%oKaebQi$L#6>hahI<yQMA&}4JB(c>^FaezTGCJy-=Cw
z^HX5optvI@C)4J9OjUQQ<8to6-Mh4w{a>4RJl{?UUVGAb^GT^1Uw5cwjc*Ba^Na2|
zDgMTo-3VsO2bE)m36q7bXJed2XWyIX2i-5)uIlvEE8eczj5n0?{#ngckk?J=g(PM#
zmK*=lOwr?R18ks^{`p()qDeOh?y>I2B1Ve`Io|4b;ta=KxHn@Je9IR0@1r@9W%GwL
z+??!8%}J`d8p!zhnkSi$Xx*8#$iSVx@K?3~ClVa;q6FamzvKUhv7F*2ip0<KtM9Ro
zUX@#Z43EKY3L3VE`L_R=)zu!coc#=*l_wC+`Nq<-f2O`&LlDcRPBcCZW{jP0``?At
zenXp=4+46~80ECg2`Rsm(Yuq$sV&j$QE)1?ZZKxr0`uJuX`K_0Te=WABEoDZg$Oat
zT5Il?v=1cs=m``1B&xfe&`F*1Ko-&*upo#{kC}sj(5mb>dwdIlaJoFs4_T%GPZ{CG
z#v3mCpC8{Z8hkFI`+wf>KI3r*N99nd?eO<8JZO}I4Ivk7XItY-9VYJq!JK?LKqF75
zD@=Iy62bJf453i;{UyB;iJwLt{pEjeW%qZgbl_6Fd5Yrzwea*=>~L9OUygdC&(0+I
zz_(p^W@X^pYe+#Yvg3;Z?S_n*BBZVmx<A6fGD9#fFnLV)XSdLB>D*dc6#{dO&mF7Y
zUr0Z;lj%$0z+vhF;b{wW#>yP{OO`?Cd&@a|0ukZy-yA^128f{6R15;Yx<r8X6s9{k
z+4t32TbmdQ4jMP2NbrI*g@Fg*iZ~sDW%VVs7trK{WA{a~0dPLePfBg*9pfR4&FATe
z5%NAKq#{_sa{P4^h0X6zlOfO5%1@x>H5>s;F%qCzd9qV8`}VJE;U{_?Y^JB_Lb@q(
zSpeEG#Re2IIOLfDETY>|#6_(Fods3!U`_giu%$<^+&t;W@)0QB3L2HRpiYI8eLWia
zErHY$gKc3JgP+rE!QI^VqZ*vgOP}k{6cc>Lkp9lsEc`ty3-j+{^Kh_n{vh_3D#%x-
z>dMsl^2|=Gfgk$NR~PuT^K>R-D*&~8AEQS6(%V8vD*yrsFaT3&-7tZs8U+il79?~^
z=|~V`ej<aH#%hM`DrPS3@6MrZA$^{^9Eyx1zP<hlGGBp&(7*ShL@=qvU!jmc0K%SU
zM>SjJAJN6^j3od|U%`yp55BJGeAnFFUFg?Ga8ScXIW)4*Ru@7cxntIPX&=mHw$s1f
zt+*$PxE6uMN(aRD(PjP)v7W{Ld@NIy_r4p!Pwwnp4Ud$b=xoC@n)|lLnz8mQNA#no
zqaIUlwQ5kVqQ?iDqpLr|7_nxu`oKL_REmR-ysZ!SH|qR)@n8V|9ZJt{18y2Sv3tl*
zz*U-pIU#)%42lJnKL9%r5apW+9%Q)9?~*2Q!<lceEi56nUL_vn;Pwe-b%S<syndpn
zdjZf`9h&VPK<o!1iy4IMmil4mJ{piWpz0$+c!sO+|AD?%xN3i;Duc%v*a}_n3IvvY
zJpl6~^|$_imvoS4)|OCeTu1D6u-<6kmnIj^WLNaj#~qABRg~}e+wa!b9&Qi6$BG<o
z_2h#G-3{zcRea;N!=+!L5J{-7)hiR7M+Rie#y)be49G4Lm-UY(ARy9*Cw&m#{9$Uw
zv4=8*Kwj%B`6_roDdWYH|2u9YW#|bfTLy=*i}V`@O-_w;J^xcN%S-_ET5&nDeGuaY
ztd!h^WD6Xbt^iX8!FG66LdjbVya|Tx-r$}uSc;{i1oAwz<sK2@%H0!EXVt;<J3{9J
z^2OAXBLb{+@B-hQX9@D}E<qd*kZ2GE$$!$yw}SocI)K?<5vAh_5xkf#!E`g(B~lzD
z6hclB8e4}#tOG81TM$alF?$hEK`l#q3QMzuD)qUTKWeU2|K8p0g%(3^$*mvE)NOfd
z>VYnQuj40cHCFApR>>z)bWdD4=KdeWEe+vZ`Q8!zw}X#kIA-iR5(Had9$=2)3kl*3
zS3>Hw0b7tJ>X+dTZ`kNX#_XjZK<3<%lm0zi<z7h3qd!P`55GJdH{T{5?c2E}vTzfw
z6H!1gD^?$YN#1OC{brIwKnt;Mk!eWF3DpO|1Jh#Bj~sK{UuoNMb~{0K1tF(G$ujv!
zl84~U*Ehg)E8ygQPB%pvT=>GRS6VK4y?SnOtfnfrDW|U5G~KunlI_yC@3E^SiN933
z(5YX}C^RRnH+UnoQl<Q5?a!olVl(KiI4-B6Psk8fV|NgB_B1UAL>YnBC6y8wdMqxH
zaL1OxaR}%VF%N?p$#$_#1#D-UOlb-oB0{`L@Lp)it*75#OyF`0Dx1MX9hr;Oe$WK2
zd4tU&f;*nDr;C)p$U}#hBe*%1wukM@w2i#`0Ipa!ZOHWl(P^{4y<8ysH(ZseZt<~&
zy3CRuT~bkAVTm!kiCkclB3RhVQ;XCNJPAcDf@A9J;({YsVD29>?}Ly_VOR()(pu$D
zn%bQ|r?s!<LOiIVTO^0nVi>Q|i@N32K>Tb2S5+Cbn9%A8pnMqMvnj~Y?cc|U5Kb_J
z;?xHXy@6JcMy?jOm166X8a?CT`Yq#cew0-0Tfwc2?juhLR^r^ooEs+Ki%&=f(%muN
z9m>VZpDw9X3$0+Dp-lh#nf+D{>x7I7(PHLhAjNT}V0Eatk(BGyHfz~=)<=W?1YdbZ
z7-t|FhAEZ`Uy4%!2zj%yaSP#HJt{=Ro<^7zIJNrA?2Y35nVOP>o%y1RH)<tm#Dt+H
z8Z>bT-U{Cq--OghT8LepaTu50^Fno|O1u5G%&KsF2R(5Wu28?C8@O{3ucO^L=ck4K
zz2C+y<pqdYehD(nGxM3gsw(hrr!(~UN5=1}kj2Ga(7yBTzquH{(!(sw@m3SmD!kkq
zvu74=KY+ACDkcboRIHFQd3+0^x*WTJhnBcaweubchND9h$l)?YLu1lk%F$n6_Buie
z8-@iKNDVVzBJ(rN#849<cH@Z8YyiH{YYbzo=gGj>3tFl(2v|%6oU43Q#3xJ3>SFX^
zH6ETr<>CdkEG0(Bh<1YyjiHXjRrvr|8~X4tN*_t7nXMAr#93JV=SC=@PmSAVlDA7R
zCBLrbk9d1B;_*t=s)RP`5zR@}1amucAGXiut?v|FZ6O$rXFh)b4sjbC^roC7vh<f>
zK&VxfMQ(_h;(5cP4ASm*wa;h{R9*|wv*aM5TJY428v%b8`Sxg~CshL`n4D^sI1ntZ
z`e0$>A(;l9{3|e^I$*Qo8##uEDECRD(6I=}KsjG=ZoOT%Py4>pJ!AxoS9hSS3|wnC
z7SB-o@+3HbrBd1dErFT-eG#{(65F`5J~g_L0(EQXOCuAO_;vzI&QVT+$B7T3ZwujO
z>u_sr%OTxTV!sg51)a<%+{Zk#s>XmRj#GR${Kr=1qpwfQu44;#S^2P4Y=M8Z4$(=Z
z)%<xo@1eusC%U&^yX4N1BTXaFiIhJtRLeTC>y_r^zRYvxEoAgFjf=tzAP_l)0OMA?
zjG<7u!GP4?Q5+vAWW7FzIBjhvJ>OvtsSPmMRqP+oM07SVD;oDUDB)J`g2^ZU!CMAG
ze%*)XIy$v=ToZ7Sqh=J?p^2%!)lFzI9HKY~9I)u#e8#BkXMk^28`_4hA>i&Q_sOIP
z&_^v~_UCHz#y#u?y@SN3)sN&QrYpk2)Q_bR8lyQgakj`@jUF(E4hrrleIN`5{H=L%
zVOJIcuIj#3u}0jwdCm$au>%d?GAzPf@umO4B}A9`L-apnUFvU+Le!bh9?E?5y+Xu7
zZ)sLwHenUSeXLI+b@Z_|6$)N$VrovL7KPco*FEnQng~$OgIj1qdj0fUtcYizU@b%A
z^%>mZ701Xe-%ztc&R^+>2S6N-*h(r^=0VtgbmbWABFq<vt39aRi-jp44(%|ah)^e!
z=TyH=@kvEo`{bP?iEufuepa^l>UqY42pA-|T5p?~eZy&6lmvrNd+R>7(QX2%Oo~t4
zsFRR64T@y^U69`Mk$qBC97+udMy&@X2zkl}$al7nu4#74Sbks>adC)eZi<{a%b*JV
z5r+HSyf=cS&tdG}N}LmhI9Y@Qmmkm07oS5E-ue5BQw8sIiq?eq;~?=|x6y;k{XNK;
zh|Zc0!F+;2r%c-q2-!J4$qkX6G_$8ujp6Q8jepW?H*>*1$pn4mzXuEbIfQg+0J9a(
z$BE#C!21+F&mp-Kym%eklGDOd)k>Ju(<?H4sNd3omXxethK)4s@xPuO_F5p~<R8fT
zm<mH?f>cSEWUyMh#z_<dS$zaD_M7rpw2-}_ehVQ@*I~h>Qw&`S((CenL%BIHllv#V
z^)%8a&!DQGRPu&mLi7}@BwNV#cduRfBv~9QA3*f?;RLM)#$F`Nwj1FaVy+g4Fyu6>
z_6RGb{ok!bJRyLph=W)j0|rVTk1|LIS2i6*-B&_U$G~GGq-8aN&(apWw_$tl!D!uI
zP`hVUW;6NyWxZJr&?P+(#dCIOm+psdK-2#m(fZ$crBMGM0^SouihU3Y0?G~toUfie
zDO<Cwopz5@52iM%j-Wjq2Iy@C-AJ)XnD9^N@n;fJl+hBS55k#jfy8iXab4_f8Uq!C
z6S$578(zOW5GdY{g4H12XB=w6WkD;Ad!_6KHqgCuXysknuC<s`|GNOc1E^uoUTZpW
zRMC9%0Ff;){wfe1ugjN&#-E#j-TX6n2#3JDX7I5Q%##dmLy*rai?kVxxpg8q?0@M2
z@Ni<CyrG=3^ymjnXO7*mA6p`J2A#7EdK|rs2XFI0Kq%<d1L1~S5*+tou1kYmK%kg{
zMp;!gT}qhE<f&e}2OLpi3m3LK^UVx7#R_Ww)8;(zr59=*!ZQ8%U95;6UuJEs-D~as
z9+(`@M#^%`e%3gF?L5JrYPVJnGin88@?tQwVhu!dah1hqU1W(sHh=`A=;VtFd60T4
zifmytqA-SAufNo0=ow5CHuZ}{zbyiK3#d!2*!>8A$PavP&8C|QBlC5Of5OO{Qk?5_
zjJ_yH$b?zV0?BreT=i4ijb;^*!y2*60jGIB15Y#rx`fT{KYpJ&A|+&+p#hV(GxwW+
z53Nhw${rx<*#NcOHgM34c-|0_yeTZ~OZ3e@9djC2W(r+EJ}_n>V^<JTB4kqCCyCBp
zzh@U(NF*#xcJllu!c1o*?cSVLW}yqwdFdf}==N%1W^0sS#{uo-D>U<Zkcs)ZvcCNS
zgX=OBg<_ae>4Y$sgu34`g4|h~EI^-eYCkO%5_bFFEyvPJPVC3!0nJQ4aBcvXpdJ$t
z1sI^b;{%w}i<E7DmwZ{DYOLy3IEu8P?43)IWA^T~8Yr+OjFY~-bo**o;=l0J?>ej#
z(XjA_xArq38O6AY%%=wGkB4W3iEyl>2tm}EH;f7Xss#h^>_hu=|1RLe9X=@p%Nc-6
z;mJ^;>YN?}o%97JcSRm!%K=&PZNWPN{vHwTzYs)zcg3K3Y!4XuR$!sxmtMB7zTJXV
zxv=$^w??2vmj#GwH^tf*>2yTsBXLIGgLT~%^o6DX($h<jI=(_N*8u>%G##bvHdAvi
z=gT&1D?+ArP&Dt`s)4>lEA8IPE8>{#heLvkA?Z@;06+D$8sEt=S&~O?8y)KQs^}|X
zVn#F0n?bwN)-SOjs8{Xr<q(QU!DJ#sNtyLPdyQ|A)m(e>9q&DtxP|l$OH^|20o3|M
zT|>IR_aWsY4OXgWDwuGHV-b%k09sl_rcE~axzJ=dl1ueU8N}Z(^2TWn?h8r*8pIDT
zWTBb1k9IfbhT)lB-j*Qc7q~SJPA;&JseP1cBl!P;!c3)QQ~7qd0$<Hnf(099XtF3<
zI8+=6hLxq*rAy+dlF{dzZFLioXtEt6)B-Dv;ra|VqJ}S80W#78yhMo+g$XuW06X!q
zn%`uk!-nmu+EPH;ptD$0?|cTRn5XkXILze@LUyu)z4ckNHjPF>^+p$r2M~h7kyNdB
zYI@9T^(zocX64G_NW$pjKpafG5RUkB@Bl9fY#PR&N&$3>^fI83w_q6_KP#RDo9S23
z8OFteI9{VM9FwY+?7OS?^^K=MAN@+OhzXivHkvt?fwgHWX(!ATlX6$Hv_tB@Qf&`J
za6d4%P&-2QbC85>=B`Ii#_(;RgP@s#j4ot7URwh|wT+aRFk22-rY0Xs0L<IX1PH)m
zwr!;l495VvU53#hhhbev6ne#GZ%@Bg0A~oG03@)j&W#vCgUd7yyKVvpP*c{sptxP6
zg^?s4EL6*bU#@wK-={M$1t3w`lCAoA2CniIKKv$?iS~A0%*m=WjxJ?QfNqVz4vm`J
zFC0L|8O%1PuveKL?5wYN8hR}YCf;>SA8wQ<;s8g})CPQy6~lx&=1)RiiMz<5@vDXm
zH44(R6=9^e*Y@AA`xQ=cLV)U$)D4CcnnMh247}<e6-xWl>Ylbe7(is3HJji6FtogD
za8*gSImqKr$jyMVIUtT;I$a?}99^Ucj4*?H4SW>^Vj31X*J^laU&ZsAhgIdz#MGS)
zX%^Yt57#FQP)G`tuNajPGX;`UpBdANxR0!gmc9*q2_4%xQn{_9YY##l%3P1Zr2mec
z{{ht$PPf}!KK?4qi#ubqJvm9W{_<ScPiBTvBFj8Tjw#yNq|SHDvFek(La#t&L$2Ko
zy)4u|<}pPTH#DXCpc%%Ep25_aYZ>62dE(pe<~>U6Mt_6CaM|4pIFs$~r+G~&9<i&W
zr3QsP&O1~ye(2Ap+Qdov6v(A_Pw7c#X}(3iJWV&dICuRfXzF)WG$K{wLn)-JBZ0&d
zrc7==E^eByYo{c-D%d#Smy~)+1ko@-N-yR)lz1=m<H5`a;BGFLb_5=Icz~BBADF8;
zCRT#zz3)k=t^1(=^5yRbUAFs7m<RFLPjYT|^rsS!qdQn@Hf3j<rW)hSfHWh~)a}E;
zc};&!Mq}flw^FsVkFUxNp`7Q@%{IG%%;|?}4nK`JtRZcm8^~D31u#SHcBB`XEP75D
zRSMu4^C8e-PY1HpOgJ)URBy2g?EFKK&gPLJ^}kkx?<&lQ-`*jwjhUPvW!wlTVgfCM
zamhc;N$wq+ha-#{li82My9*p8mBA{b&yRD^*l5bg3jikU4yf|DVin~J;S4$EF9W!{
z_(Y;Tr>F<B#kA}ojiz9m&V><6({gIL48(j%n_Zo>xcFwz#e=5zRsUUYfQxll7>Eqx
zET(-_$BK-3)$GK6nKcq>t1cwJYe4A{jtHZ_A%D=82W&@-c}E7cr%yb3CqynD^EFGU
z_(pihVolOhrK(##*$0_}b?G86PH>%1?5b)wHSe2yL?>|_ziiXunTLiXk;d7QujgPA
zCbUusdDb&t=r^zUn6f`L=>xGN7WM1G3y=Sehlm2;YNaR*9|XxJ4x!Z{k;W_!72ovG
zYs)t|W!swX^UwNS-~+FYWo<lR>QgTS*0=%Lw_k5Gz;B2;(($&_aB3NExmH4>Yj+pG
zN5<G{N2KcJo`VKci}T>qU|pceO(Y5>KmUno--yF(NNSfnSUBPWcNpO?mHnIA(WK$N
zAARlA)tw{-rNZyGeK};UsQx(mMFYslKi)?y;Q?o1Q1f+p-f_jDf4UBVy%}&i`ni1i
zBG>1USy1B|#t%fhk9NZ=_Iw-a$CYOw(Y4Yb*5CAZijN)sdnt%}=s^)IL+m<hkn?9e
z5vCAr^mcLV>NjRhqVKC#x-3jgJO}qDyK1^@0Z*JJVfazM#d7?6%+(g|NmSy_{nJMt
z;1hnIR^cVantLD});YlCB01&BJdL!x^Y<!#8U)+#v%7y@UDM}xD3q*+Q7B88E^Wfi
zouVq(DW*<e=cUV0P%jG6JmMJn+m1xOJ9Z^7@rv<$tdGM7tUnsE7-FJ254X&Rrnfx)
zg&ET~qrVaRT@rDbkC-f_qinK0vLPP=23$XUlb24pREqE931Z!`GSfDvHH@8kvTN*c
zS)TquUAV*0?e*x3Z}9*3G{C5{1i>Jwc{$_n5|OeO+cSej$jAC5<43boIo+qPj@g_!
zu8o0`LReMH+w+!b`}E4ryVYE6|CXePPk%QHa9JCwCwYT2%2Le2k!9V*VMBFu=^
zdqeJTaqxnzd3&JDtc_t90Ca=v^;{fwpz&+c@g)JfkwG~5`$EGSkw)z4(GHI>^7U<>
ze?iADWy0yNtkmwWOyCIh8@j5v8CQ!N#{_#gv(HZ2X&(W2B|!Zl%&Rnau1*5p2(M^|
zKL834a<VSR4%jN5#oF#cvu6W4uWlF^x)Re~pxGyItdC<Gl1KSm=2b)?5|@CUXkFWf
z07SAOq4BPMk8RE^viWzj<b;AZxM8w<Dy$g#NK*T8cyEdBO4ThGCpM1SfVVLW0Uw=`
z6Ze9wK|GEA>LkHbMo)YKTD_9Y0BFud&CV`BGIKn<{Xr$|o*T?F>+p43&*F%NfRzM#
zhke!rMrNqJz`3>r$e;+YqG@>{$hK}P6?0_v$}qDuyZ1-;-!Gj7L(p{`QSgF*JgCYm
zdqDPE5+=$<!jt)suQ`2LVOX~m=pvRSr?L^J#|~1@!Bo8BnI3~df%!DBEST!DS<W;_
zmf;>~VrCe9lnr8$fxeE5o?UN>@E!tipI}Mtu7W?!YlLr+*$L91S>^F>>q1xmv%I<2
zpWIRy5<vO>`;1tw5wjr9-lUTE1@4KUp1ALK$uYaeU*YJZ2_dVH3%&Fd`m{<AC_9i8
zY}OgT5k8G8gg2Pz70wZq)?w{?GbPci1a*7D&lA`lt{6V6pW)2*;D@I4ItE3DiHnFp
zzsGUEYqbMPL;HIxr>-mDfnjLw=-h?~+J@ot^&iG6%c+59noATQjQLLt7L@;?(1VxE
ze4K#^tZ6<I!fO!T{7(srU^=o#u_IV`g0DEeO0@L__IwyTPRI<oa5w35q<3j9!JANC
z$8hO(3ubEho{71;2My||NjS{mcoAk1C)@<I4LviVYpJL5v+g9!<$obG8Rxa{zou~%
zmlt@oLiNhpL7!w{(xYt{259NG@e~P^FyUUmdO(y6Adb{`yvp6)y=!%Ed-*cg%(U)j
zW9g4bAn2g5Y$tF&ona&whAf-sdwk>u<II(jsuZ(q{10@w0R$Ei-tjU+b7cqQ?osb|
z+uYHmZ7u2Rk7TIm`5J7|Wy(y4hD5Po*7`r}y=hpD@B2Qwl1QRKM5s_S2`NMql1!<j
zxunU^JW^>8O{S8DkBm(e&GRS~n$Vzm6h)d#1C9H<SLOfPZ}zeGezRZff5-9t9zL|z
zde-yY_jO;_d7jsKRp$nU{Ty>wl%5_rW2w<shA`I`C-|tSEk-+kS6>l&&W#~#PQmhN
z00n43l^*|6O1Y>MT(S%84}{8TG3d|g5oZ5-OLkfx)p~d9ZY0^MsN5RG>a@Qv+D^B8
zjqN}@9&jUF&4_OGBu{f<sKgrTrYVkRSe&H!j<a~1XyF?O57L0<Fs|@P)(88YHdiPH
zfn}4ZOK0(kq?rclP%b9s8$@S~b=`>E&KTld`XI4eUP~%gFIM7@v1C)sMA1p=ES5SY
zK<82Ap?cRBtsD{}&m`-h6P@nJ(|Z>*rbqVrF*&yq_X>i*vj3rvjPZoxssxy)U##=0
z0YQaN?W7ynyQ~H-HER{C?D_Yt`gGx$^P05woFpldY1IvM0$606RHqXe$+G)Z#rtDS
z@P>T;TDrrw3d3g<f1w;qLr7^ga+h^Hl{6_<{&%^Iz@Xy}yMfl0Q>^Gy_X<5@FdKW1
zcHn=dNMLJaWhje>s<=}*98ElDQPQ{c#Estzd2Et!;AO6$$|4i!cg$sUj8Xz(pR+2L
zhRJ&vgj364P4ARDNn1%KUbi6^dJy5VO{>RtusWv_{|f>G2Cs;CtA_a2hb56taIUhR
zGfR9$&al~jaUvcAwJ%PsPyd`#KrR#4K`HTpTgYaLyDT^U7ruB6rNeK@*y(-ki?5U{
zCzc95zx_TJoo--j7ZFenOgaM~q+K}kAaAaU?i}Mu845?jH~;g$4cZ`wBTja18QXQT
zs!2s)f+RYS|ICM5>^sZ}gA8+_swGUErYLHh!}iXX?v@O*ZA=syRZ~<}AQd-0Lh%0d
zOu7xU%}*DwySoGd8DohS-rM(${s0KIKAb-`!C^;kYYI4_Dvbqdm1gr(K~V9z+eS!O
zNsuH(oHK%a|8OCbbUl{n)2>r=bI%D!Y02fT>P0@SHzP*$eu9e1m%F!t92^gTQIxa*
z8cwMpgp(9bP9tn{yL>lsn;-RvNI!!3HIgC#8P)>yGmIuiVfGNiW=?-`Q2=rlve%-&
zzbVf(k7%yP-qJGhIPRW19Nf5%>(R)px%k`_HHYvYE1YkQ0C!tP2~tHz_hxGNgcj_#
zra?&CmrIUky!GluOnss7<Z^Vk2fMCiK2zpM)rlMVO#JEz&j|!4JURbu<kuJ?HF#|N
z@xSwge9zW}9(}U=b$l-frj^;fu>0tC=Y_>l^2&|->mjb#9>|~YT^xt(?v``##v-W0
z<g&RN!$V6T6a{!-C=O0+Pj6=3^{2fq%g!Xb`_*fqn&={*qt4O=N^K9&g}3o->|6#$
z*$j~f#i~Bt0*b4K3rfk{o0Yx~s-z=0eH}%MF0bu8((86r@%3WLH-h{z8pR|jcSqw-
z01HknU(DL}Rx-dbSV%jgg0Q)QpkJE^D5e=GaMs#rx+&e_N@M}SU<o6i{{T3jIzWaP
za0YR%^7h-+6NnP&XVMwSQND^0Mn{i$rfsyXIH;jnhm~fAWHXb1Z=?I8)f2;--h97z
z;B)LeB23U+%DYXr%-XaA9Hzcr?!0BeJIY5U0aZ1vyYTq_uW@wWrQ)-UCI~@nhNAmn
zsRHV_&0mu0bgn!hT>>n1(>R_Rkz$$u{#jn0awlDZ?l4h+{+b~)%fyo;gX)2IWow{A
zxMB*u!PtBDbB4v7Lx2c_rUzVSlL_RO!zK%M@-Y!B#5G*oclkq{S>>BCB$UJp%F5YM
z$2{xT)^Sa~vTXs24t95j865dB7lULjQ_~>6b8m9UNe$?^-JFR_su>+rmmu;g9@}5T
zmrKm&CeUl#2Nu{j>l#Wcot^EtFIErNSMuHd4sD5DU10kVF)*N4#U&ORr939Ea`7gw
z4AWZ`GRFa>-pG|^D^@hV$tSoug~@w%4DMO6uydQsC6W7Oei(98Xqkn&ChKb<zM?4<
z=V>@p#eWPb4<7Bos=d<EhiKolT21>rNeQ#3LF_jI=<cqSHY#Pi@)!I`AMsAfegpH9
z>^ema0S4tuIxinv-zwv*%pzT4SF{NPPVfz6zD~mQbGyK;8V3sH+w+E5hBX%zbwqcz
zBF@8_;ii~HtAtjMiXKt%^-rJ%HbPQ#J8KG`-r@2}<xGaqor*VGbO%^so0hwoLYHeK
z2Y)GQJRplNZx#$q^DvPHTW&1<I`}8~U^OFhd+#F+s{mlwcc$i8t1(PyJN2d7qvs*(
zdvu>-!z*M!ayoU`wq5SbVKN>`C#Ptl?rl7f<@uXIT>=1QKN|nG1}u_iSi~Qtj4Xrw
z>pm=^8{<8HbiqcEq&UN^3|wQc{|RBM213l^M}7s=TW<TJV*=Xn2nUijiK@>a5zXXe
zFHwkm$~L0Noxj}0TZCIhRbX$lm#6!VcD_e)`8SbeW;UTs<n%i|55DBW<t|ZP#klF5
zdnV-$2Emu56k4lQRyquh+P0NP@~!m&0Doeh8jP~71;fcIq2Q>nLvQ{bcN``6xEkeE
z@F(Wu>(-UNs*oXhYBS|p##62mz1f**d${Oi#&{`0vTr|mrNJn;vrD}CMmW@oaoT57
zmV6Ts{1He&uTAc!$Ktztt-C*2VNEQx5G*^qj`u#FWoVV=eZIk@t3&!%bc!~KCUmN0
z*<Mn6Q}(;w!1svuTsB21@xX>P%m<tQtyIusy_=fr^3gy=hi!DL0yYlAqc59tRY@4q
ztn<kmbKYJb8vt+ksnTWxy2H~@b_Hv&+@5g1TK%8oWAFyXU8k)9seRD4NMY6Kmi{(i
z!`fKRqdl;?9Fm$|pl-MRPR-wb37l(wM~IxLY*4ZM9=!BqwH0`i$RS6de4S^v4J*((
zTUXAb@#oPkrxkUDM)g-y`lyt3D}q<1TQ!QZXYJ$INH+1eUjhW&RaXNA8RI(9M2?C%
z%z<0HV(~^w(m@ScX!^aVp#djPMk@#5Oz;XL%`xoRL>k&9_1JuU1k{v;Xla<n`3(@W
zN-<_)W~AdA01>Dr%xYWnY=2cT=KEM8KC<V4RIm$<W@Q=`Tb3><5Z&ohEBMX^``b_x
zJ$iieUsH&DP1<g&P*YkRQo!zg9&g;zy?Y2Wl#%)v3L%?yOE15D!gpE9h5IAU=+SOP
zOFrG1x87}_aNO&{M`yf=0(f_`YF-c;rMH~u8MK&#a47!l=zS|b(tRnkl|3u!+)WPC
z9q5U@I!ntW;a5Q^zW&_*ENNkF;M9F~7XtWt=;6PLT$s9BIe^St?YIz!tFd>Up{o5&
z<A;2L+g^ij@eenfyX?2u2^2mokGer;$;}D#s0-dxa8en=xlay}q}*2dhpZ)f_-Em_
z-|3_k;H%(-W=WmjqwaLcb@s1toAP^z3ufim-#T6aEjS+eZirnq_vR%&Te~QJUPJ^3
z`%Nv+wpf(t-3HYc4}_4;u_R@g^Qi6fsk_Ycj&Cpg6b}tfpxAw6(sQf!%Dl;Mv@bbl
zq7f$J7JmWVnLM)3$nB5Ea(cGynxg$eUZ{mMl1Oj(?vWm7;?CczYk*bVS35d2)T|Hs
z?8H#sAGR4$&!2x%5yE&<%r9}rvdais8vfxsv8K`txdcW}sgAu-Tk_$bNrRg~j~KQ+
z^;gA}GU<Y~dp3>C7#35&`n6!3*kS}+IKPbxXC-0Y=`6~j0Q;7x|DJ(B0nJBNK#)Ry
z{H6#1u_ijxLG=&q8Z>}FRdPV+iwKHk2HkUrVb#Ox=pMSy`x-4_OXDq?Gj<uLeSqvl
zKkhfs%QW!17x<D_mw{$EPv8n-WCeNKDC4+}eS=Yc&x38J?*ZpT>`#GEx{Q>)KrnqW
zCFULhE4g`wl{lTacAqHZO1O$m>z+P2@@tiSCf4@S69&=mYJiEb+lz~_olCtO$om?N
zLc6Z&G>E`O4EWf+&_&FNQ4SNJhSuds>c8`j%9WX!nY38H+xSj4)h7#ljYzXA0VKdw
zHjffD1eD_INB{1%Xf-7UjM=SuuCo~;^XUv#gvosy#4}e<W9sW^Jh)wD2X)|Ou;;?(
z4Og;3OYFf(l2G<aNR=F&>0*`xJ6hQ27x&`Mrvp1_K;39ia=hrs<gd(j&-DdCj7^y~
zFGu@Zns9)#)H$x+w;#Rw?hkqQ$F-MUxa3SU8xX{MO15^Vi=Zg2D?b7WW03dtwRD;&
zY>Zr|M}wX2ViT^Z8=FhbxQAxWZBwVns31!%t2pp2D*Rs37li|#MxzupBFMRL&_S$Q
zwc<POiB&$UI50}HZ|{8KmLB0j2I!XxlswQ-`PuT=-^6Wi-BN+okun9m!P?)R{sv;g
zWPF>ii(9j`%=o`OW+9SzF*UuoQ6AdnR8|HRLm%WRE3ze#JgwNHJ-%0$h<&g?g$wPS
z?zYzGX<z5q_yCJCn;T|cW~N`?UKc(fTLhrTV~oUb?)MrOo){5X-U~R8UQNJa;4d8G
zdwuvi_r^LR#<(Xrrk)KZCzdEjW}x@T>fJ#10&>`%(zC>F?)-Qx&;jADP=jdhqsZBG
zo*$MxtRixPhGUe4NUqF~5`&|o8-I-WaO=;+Jcg6WMt@Os88{HO8tzxdLj(iW>Z#EC
zgH^5Ly3``gA;(i;I?r0Z7?8iu0VDownHxX4^LN3%X}s}6*juk8aZ$ziuQEd5PDqr_
z;68qF%P3JV0>0uTx?>U1CF2CCtAFlP9i<efeMYF!3z{>}+|zmE7eZx+5z;jfh(xCP
z5xeXsB~S_{_$qxpXcU?9q&-PMrJIm#4_)Hpg^Vny7A(6hRperb#<FGRjllh`eS)+d
z$VFE+EtjQvMCdzDCN6UTt%+0f^aZhCEZr{Pr-QCoXznOqXN$mK@ZX<3y=Aua?vNPK
z>QatzP!0~CVY>bPHhU9fG1@P@L^wtNgpHR0-Rn=<i**>(IeM8}mu3we%Lu13cNo$J
zVq|<m42D&&oxvFP|Bf&SPf}qB>bju7e?m@!%W)%oYCd%a9$eKpBEbn@*Y5g;QMJko
zx*B^0I#IE|_SWF}b?-Mj<o*cd{;O#GI)H)u;6HrimI+acw+0@r>Wp}xP+Bwk>gao5
zl)b{yafqhEHiuk0_~h?D3K$AIVXpZ0um=BJ%#i0o!Zi2Ce}2)_|NQ@TnO^laN+~=>
z=K7+C!q@>PMqbP<e9AI%fF`X~89aBnHfCL2R*H6n9a&%Et+gokG=hb7nj^8*#C5sm
zZmD1Q9BZKAaFCAheA7s7@+Si{V12Xe0Ge&SYpNmaq0`MS&1u}6il7bbztZ2kh#Lpp
zr=c`^XiIc*-kw6zXp^Hb_nzE=AbI>%%nj)x!f73}rw)3oH_yoD1o{5enZS%s;5Tg*
zZ-VNi|0h`ZzD3+XyOThncI2zh<xdo5Qf6-;SmK`N_s_O|KU6or1h}sd0%;$%-a8$M
z(pKUvl$5eE@NLL`yH^B!JJGOZeyt=yx<q+b@+>vcqD^Z30ejORPH6%Hq7vvuXiA5s
zY#&qFJWu21#Ys-#<vVgJJQ>%0b6=|~{+dV$)XL65f16JPphSUV3aAfOFTBAHRWMgP
z!48Z!72&A?5QY#leLc}OqU$JWHNP^?mn2lVX(bP~H6iB2!WGgU5V{s(WQ=8djH`1r
zP42xuTuOhf7ElgSKx#cpvYvk?y5x|4%yy>>Cz3PN*ZpG!9?Pi$GLX;juMs=&i-Z~~
z4HVQu1-QE<-mb;=%c(foS`?tWTU_B6m%U_=D1zX2)NxUo9K#nLTgkm>6WmF}_9yey
zEM@X(^N{@3LoFh*-3g95)Q)vj$rB@1G~9tJv!{m3%#c*9>yJnGKz5!%(Y^dONmBLv
z2YhFHVG=l!WB{;|ao>d<r(k1k0K7x{g`LkLK^TWTB;~K+kGby?!E#)0(+T1tnLS#o
z6^j^S{#Kbb98wDF8_A?7lpHSJV!p&lmp~`~)49B5kLbmUw<}hbqsl0M#`QON$#C))
z#uhCF4Ia>CrIyc@{0?jnJ-O8R1(9n#0`LCmzg>k-(IQLmDng+U;}OE$lrfh05C9n*
z*f(KnMv}8=kLDQgr`wq^W7M)a7-z0=h6RpJdt@bzPbHnG;rI#oefx3#@11Z}viZ4I
ziMbOdsVTT?S;)fLTJeV;{UC(fV!#FW?@H{-xb%qR+w4<{lpYwrB>;A94&?cQWFWEd
z>D6z`*cF$p_&C2$>>z+U$zfXa@zRO_q`_5aBcV}~liMAsm3p~ziTGag=8H&stdmoT
zm->3CYZ*QF{n$w(ikiqzP)Ih(KfueH-(9~q4TW0P!5<K2XC3Ue+?{*<ah2W_M8T{%
zvH7-vwzuSXli51x)n>W@qNM;l%C?K*`GUS#6JM!`sP+G;Js$$nl!oOf^<Ux;*bbuH
z3=Vg$_^F=1MrWisu}eHP#+^Q~E15XeW}N`H<({Wj5>4tCq<W@AWJALldxmCd;B!{G
z%>f68u`RQd4N56WnF1Tbp^C0vvqRSu5yP&`9MBx$zoN*2v<9f$lkG5-Y^fba%dF1<
zm;I-;E%WKj4{)N+)6+W}nL+y8Y^48J_R9SNGUNOe1(z)qRo-Abh>ane%syCR@#gOb
zlEpb;@$0N`4J;+DhuVAOGL)snLuoz&^sALDBBjWZ<7YLyfK^yGMVLK+t9Qo3ABjs|
zT#r#+<DRr$f?>viH*vF4!%JLj<{jSAgh4*7;|FO*^ZkIQ?>SR@bNn$OG|HNp=@x^e
zIdX!RL6ol|v^+tYRxY3CiGjraXJG!tg_*<3lk=K`OU?QBbvR&s+?ZA`sLVW+d$<E<
zU(zf}!t=q|K*^wsF{fr+2s7u!o9DAq9@<6R-X-fF2f|${Tcd#xtO<8Y$D-DyEV(3!
zo%m^I!oxGAJC&FgD+T$WH@M-e@{5z}(#;4pnjOo!w^W|STI=GPqZx134T(yt?Rl_>
zPZ<#1-a#&((j0!~UrMKMU%W9}a+Om_Pjo|I{BPHw)%ABq8qPKd#j^z^9na~#r^sN)
z$K0(XcTALYHtN<n+?=Ph-Kqf}Se<crs?K{sMe_x2Q+|`C)ZQc+6@?QtYj&))L9(X_
zcC=9Vwe=ndE>?bys*~aKdqg024l5@WRet4x^((3bCl59#31oIuI{l3d)@5i_WqJZp
z)(4@yPNffLVm>}F4Er%aG%4>Ur;E<y%8`~FdvQ8xoMEeT`ix$9IHK^l@sm?bT<t3l
zN8tM3B4<?gpNOBAmMX)(@ZBk1eNn%)x9eWZ5HPYXkNYL`Gm?7+wW~${g6_DN5yO#m
z1#&~3I!4mLDP2^~baVKEO;mMA-S;ikS{AvT6J1@u!#!J{qF~R21F2Z|y(6D0(9Xw^
z6RRr`eaP7<Y-p*d!S>fPho6GEgBL$(RvpX=c_c_#ed?m#k;n|{J{J!^b{(zW$<V>o
z2YS@b+JvOMdKNNe-xHgjC3XKkJmv}l<?j6yvHbd$Hp_ydOpA+;$UhwThGBc#suJQ2
z*Pf<!LwcL2cdwFVxU3JWRe1oHw2MTpj9-s%R#zC~y7l)<^^OR*0wl{}gyJl2<8Sf6
z(C1QXpAxhCR{#{RRZmV|e<I(&#TmaiD2hvwN?uLT;KzBS6Z+Bl7_p;j7(&n8k|
zamb)|wq^EXi0uovB6K-XS+wQm#E;Y-Xm^%RJjIWFCSHM=*&}3W59^Lm+MzGK<BuyS
zKhZIABOQkxCtS>oz)>24i(@qQpky9&zSx*<VX@ozqh<Qf@@E$0=mNH|G+4>?7J)^E
ztt1^siU<Ex_-k~ZU&ZA5LN+2vtAtOrLL|G)Gdzabwc^aQTb6wv$8W0{YlmoCQxXlh
z*Zp$cefQVJ;jh5ocTb;#kz2Y3j@Mq2Z}WzWv7D0JyycXz1Q7D?Ppg+uIMJ(M7fS0j
z^W#H9Xq6gNMDXaqp3{zWpHlI>jb4pMN&9ms-WTO=J62pJ;;V7uz+djy&W!}<Hpjj@
z@A8_#WF+VI{Sq3fzpQ#+SR+)5n>=B+$1q(#SAXnMh|nL>mVh$F3^jS&=^v0SawIl8
zM0};E$R1uM)Up55&2bg<ryAds3L?F@g?&hvKU}-x82INxsaG6r@d}W?FHp${T-CBW
zRdeNoV4)~9RabJsj6CFgt-Gh8Bw%<4yG5<Md8JgOdxgPkR+q-H80{pEozw_v7-6+8
z@T@e9zrVfunqR-dEeCmbra460`91JzIQ#KOVWN;$noyxxjqe>V*Gz!wl1G~;H*L^E
zY4qo?IZ_7F)v$Pd0O|Mmq_~WIF=L+%i^YfQuHvp|Mw=5r-$sqYEu%&G`l7=#cOF&T
zp(ao=ttJggk*|wG8GSF#s{K?IH>A*ev8orJg^O7_**-m}43cpuNU2?x^EAHlt}stq
zBGO$YxT;OD!AW<Nwb*=>ouoo?iCDr06U%i&$I`w>8F%T1+r%^0LiojMdMmEi*Rtsv
zm{j9#=}Me)QRS()4917cP~fk~?yqP&a=>_o(h2EDxbP&Fz=zyc%XM2SgT{CCI2(Us
z`IQ`LK(LbR9!QdpXN{i{sMz(zB=omB-~vp3pfCrG@71)QMf%AteeA<P8BUX}S9PXk
z4y9LS$6j9EYIn-|%(?sO-wz9%kVN}V!Z^f%7KHU6J#>8tET3%ij$3bXZo5)^4n2};
zOUp<a8OII<CeUsVZT`wt967{9{prYY^olOUV2kq)U8IqTWZV!AWndTX6;T@L{1|+p
zpMHfmZ^)9y8fMZ^)jtNIPwzx+4tU~zCQ|ip@7<$DyascgyN5&)sz&Kq57_F`L9arn
zy4dYoW<)e5w(M%$^?RvI%J#}2Ac*Fk<xVkm1zd{N_D8i^^UgVHrx@jm7_`3e)KcAj
z7fZf$`2FOYbs&?=Y=2>ef2`Zm^6PQr^Ll7ucW<zv$jg0saq7g#Rtjb6zv5>Y#T8;$
zB0IaZ6;(yQ<6RCh?ULUWX4+6AO#UUaZb&LfOdAvSU;p6TT^DbG7J~%5?S+tx$XNrz
zU~6SqhCWIV1f-e>S|xkd;;i+Fnsq66j%)U~Rw9GV-W6+jKsEYfTOGUnV$x#7!2J<$
zmQEhzzs*8_a=h7!4y@A1_mc0S{~4rZo`~vV(^W0n7aaBdKvUCb#W^0=yv>-9eb>~%
zQMs|}4f_z9W58OlZxDU+?UtZ+MIV8(h`ZOf@4_&x-`gZxeNrbnud=m`v{VjewLTKa
z>vVnQG&LaB!qxZR-$%v#%XzDk7_P!5l=rGu{uA|h4{7yinG-3U)7RHZ3O6RrIM<Tf
z**Q+svFR-gy+XP<^>lZ_l-?R5@#~)e{T~!CkV<>(=Z?tdCTrF}fmm|f^*jHcN}$&p
zf_3O99Z%z>PK>PKeta=9YUN_OwNUJs;#5g@M)E+QlqMvSra;<~ShmO+c6D|Sz2eRR
zZc>|Y{&4p1&j(@wRQ1mWl5s3;vw#N2AlPoeA+|bAC1pl;-_W~NEgkz#sY6!6zFWoU
zDo#%`FeqaHSF_CyI-WK>GJ}T9%tNzRyHCi2aQfxGDf9g~rmFRN_qJ^*lwK}nyRrRZ
zg_yRz&&<Qm{nFI}8W9g^IfQEI#Dnh?p}=~RPw~EnH|@UwRbiNUeA0@Mqfcillpk)L
zPOQzoGLTRva9h62u^$OHqqFeTp=Y}r_e4qiw1x}VMLZM>IkieyaS$nJmf4*vxnZLp
zvU6ND26nIVm4CK<`e01mk*4k7O7fhM+9}mVHpp6I#tSKBMa*1X!oR;V@OxmxP1(Tr
z_%(&%y}hz6urx{0)8i*;;yLJrd$Z%)?77xG=g@66hKPRQKjte)6th{?O1mm%+e3Jw
zTpowc3Yu&T>3!kZ%TREUHGF2gM*V(iMjLu}iC!sdbklU}6`A3?JexcDdJaeGi`KVc
zGE;ESe~)fdqdd5`w%$8^N}+4NTeIyumZRzjF(Ok61DWWN-O~L%;PUGRf!3c#XjlR>
z86uyfX34E?0}L;zzwt|qvd<pZ0T!dkD2ME$*IA|yzB0@H5n>_~D1iqi%)FUEv4U3Y
zjlr@3yhPXaNWvk>X*=;!y~af)G~AkB_!x)C9Dos)Q&*@LA&b+N3unANyH)Y;kmV~>
z2Vg<3XsmA+;EobX-AeDMF-^ua)Q|Exz3UkbWuU=uq1efDS_heoW{C?~W>!4Kf9X2z
zVwRc{)$H|9=4-#x*&Sn(t#TaYnn-hN%B2VEQ|@+xz3|Cc$(_zGGgPB?j?O|5?rkrW
zeK{qYh1PzOQ4+93nxwsKd(F<8?}1NWYY6M=$x0r9PWIEuo=UF!r0U3?L1<g-nui8s
z0=#Cof^-=_YRosSFseLS^i0{Sr#jkz@W0EyVV=>u*we{oBh4s$Go0?Y?`m~S&2e86
z+7#*!$)?t5P~GoKT2o~cw9+9DzmE<m-uO#uz3I<_rz4$*yM|ha27^YseCw&DSmKw=
z<EcigJ1R`h#_LM5cPM71CdsPIp6x6eIcf*QPXCQL^-PZ$ay;)yBo3%?b0yJZ5XTfg
zvsZPG_cM3S%c`7rla8z0(plB2^0^!9c4hYC(x<2Y+nGNqzCqkN!sKRLl6uHawK(le
z?Zy+75>GTyouW-<aE!X<yToLa*T2QMz4GRmqI$NyS@j!+upPO?AO0KGk3G7XGuH9w
zdipQo7^>pB%0S=6v}Z*HF>vkw_Oe><)8$Go`*x8Swur&;uEC{@xot;FpDLFKPUpXj
z@yB$P3{&Aq+S^iJ$b2zDrbrci-`dVtwNoAXW?oY1S;Z*UHrO@b)Nj#|J(ZEGcRa(W
z=A#1pY?2GkXPZ>L)wY#fH4HIV^H)pLNv52Pk=jJP7tNuwW@04@lDovjPvx!^yT1K(
zKK=G{xcG6`2pACGOmR%~bb13=DRVY2^^T)=l<g=yLPKsl(Q!Ah(cgT%bkV!#96lU7
ztA#A-4oV>-lUU~B&9PQ3R#WV%R^_!%a=vnRi^TMLl^$+#eNuAuk!UncaFUjF)nc9K
z5HJ&AK0CjKxrHUIVcSqKd9>jK!_FwxOS)&4cpDdVF`br7H*Vm|SQqD`oui_A{28ya
zCu>VyCukcRyYpgY7m#_!{qzkfNk@G>Ei3mpj}oy+Ozes1C4ZCd<Ia~T@|=fXDl6|<
zCs{8p?nS9dR$4XCL6jQT{bH0gx{;nP-c}K<e30jr-cHM}GFNmOHkYGUIW2wTQseo)
zJfl~EQO~{Hxft9#j0O(}a7Ht(G<tkh@~HlhalDg))ZY$?CMoW{{PoX=HGH1SSxv-5
z^!z*${bm0JrEg@!CdL0K(ykkIxs0>DC*S;z%CQm+vF%bC@Ff-lr)FsVo?djnXG$lH
zFL%Qm&f|C1PF1kqy4?HogI@TXI}#!J8*jT3?}y#lb4rM>lnESfA_2<x-nBJrKb&;A
ztR3f)oOso!;;aL=CYSnX&^3^?eP~OvEsNgyneh9lLGS+EKv7;r$|fTNHWsyGuaINO
zt#C2qeIDlMoA`!#)Tkp+Oi}Wvq%f<qLoR|U)sn&CUfR6&N5ckgn{uY|{po<ZzS_Bp
z+x%^tow<NE0Pl&bchAHkoz$^_4bQtofzg}|$Tf&<2Fa01&3rgtFZo5OA|HO_oLuRY
z;wl4tTNCk@`AmT+RqPMm7zXf$@m=G&Y#h1d(^Db?{g$Lzj>Nfn6og5prz*LL6w1B?
z(J|sJ6czazRvW8woY691sQH6b`p!I?9T4(w)NL7M8WNGLUOtR(ZJT$7JA%j_T#`%G
zY5ZJ}wrv&j9d_qx?L%=wI;X;Q(R)@9)q_D#YG&XZ^GScV?Z+4Dx;n*(o1f5}a5v#o
z15p)R(K+P=@czlBK#1kY&r;7t8;Apcf@~lx2PU;Wi&5S-_aVD!0>1L(l6c@tWjzB_
z2@m@p0lE~tzzfZiRgZV|pKG{Fd#pR;>zuy)hDQa_m$%rR%4x}Quvs~_Mn**6aP2Ps
z!9fx|EhgiC-V7V4H22z1`gD-y6T6kYPBQ17PlTO$_Xf2hVX>Uh<<T4*h!yIea<Ou=
z+CtmnkV#d9kKm3-saWKpiGWesPML0p`ec2b18P(4Ogp8o1%k&`XRQo8RF{O5?gnYd
ziaTD6q)lg_0Vi_X-E9N~#_b9|&l(f-7pIRYiXRYWitO4AI4U4c{up~$rIpB5%>CoG
z&b+!M5AdjY3~1Ldg>yT_AyS6BHfr~<<ddnuWA}9g9!BnnR3uG$XpOfbTCgixG|TD4
z^Z_W`E*q|3e>AS{8xq&^H{etiq>g3FBGqt2)k>awC}Nb9dJoPG4FqikO)tzx+fmdY
zH6iqCcZ9L6{6@0x-O+!URTTrOm1*oYErqAGBASjhcNzWn{Lf2W=m}>fOiw7Fx^g>2
zjsD4O(8<E|mGFh{`SQ}2NKn4*Ck-cc^g7qsVh<l=bRIy6Z=mBB4T|8F<D=6UnLWuz
zcOb|57spYCmq?1>0u!~N?|`)hi4=>t<@o|*s-$CqS=n5_9aG`+8^ol|v=rMXg0PnT
z84UX)4t0I8_#*D;+y5s5CBU0PKTn|Ws1RC{_kiuN%3r~`rADC~+O=`6l?;FJe|xg3
z8!eOtJdUd<+$Z)~P^(8b6+k=1hv}Bjv>i1CpzqVlMk!_;$mx7iH1|o$r`lyLD1M(&
zub5(3X9Bt3@JkJV;g4I!ubjx=!JiGtZz{Jzp%d73N@j7+9+}qjn%n%mL~XLr{5uAS
zajWAQe57q_)!*BUbLhL@tF<+hVRbyi=9Asv61S1x5Z-@|yyJX}+$*%TbnN={T=nm3
zw0^HzOyf6V`?Tus@t@nD<jr5(YLDFO5-)~#;osA764oO?5c@@Q)l`>mshawwRhjq4
z0P1S*p@|IeKf}#z8UH|cOJMG><C<SN?@i{E=*sYK`AS)G5W(-qlC{LiXUWFvwlBy3
zqOOd^ZdyF)|5%sH=vu&zb5C367!>a9n7X%;dBs+N--p>hrOn_QHlOBxf_b5J#-ATw
zBHfakkKE#Mv|;X?Qh`v<os)XK+_88WtIUl%sE$vK7*ibLpf(V$f9Y?rSZ3}ES#s~H
znO83SKX|Hhucc7_FJ~7=WCECLq;}9QG-FdXC@z+%YCg5M#Sl;6G_Gj=$5IA_%ff$L
zdTAN9ms|P(b7VB)=`#t}l?(skzk<eZF}gsXpy`nN^s_C@^c{aO*?I2A&y?PchlXyR
z(8I;`Z=(3?fs{;#7~_bJF#wjhfIxOWew8Rc(t?Y>?Myl_;<P;(5HVzf&$Q(7q(MfJ
zuB)Yn(hF}Zzjd(;E9n@(6vYVdaWwk<qz)oQ;oL{?Y?MSA*ex+=qJ0>IT_2AbO2s&j
zR}n?o@;K681{ac$BBzD#Cu2=!LCYAT&n&gSC0h%kC{wul?!mxM;yX~dL|QhlZd$V+
z$ayll(bc_HZZw6KaOt(Cg?EtWS;W1}mXs&h<y%@^OL&9Tp>Xd-lAH>X$oNtVgXf(M
z;#Kg@x<H&%J!0qOpRj{aTqJmXjj;CGA*u#QhoH-K<y#IgtP#3M7!@6K-4U!?3w4`K
zLh8azh*&8i<6jM5DING3ZV(SGdVMUb^wPrh_?=^>98-qVfsHg0Aph{utG=^%4;!QG
zh1bi|Etc_D!QRvU@c~GhP0CY3%NzM$bYA$44tD%TK*+N)D9Q8z1(en9$0(L>oCD|P
zb^w0wFAa?!PYoodua&|*v3|-G#5>u@7_9HY2C4Ran%Tq>iWn~8J8R+2v`j3#dGQXy
zKQcEU+4=6imcd;D8Z2T>rH%;P2f3Kt!4-k2=t}>u4_;3*)8pp|M?~z6aHqOTFQA`R
zO><tCgv(fl%Xsn8mvkhXlT%r%VFN5}!n0+=-+J-=8A4u!u8EeF#q-Jv=wXETt_OC+
z<;^&f2EJaDjOEPD-tV&WaD<TAoG`QhO4BgUYa&&{2e6ea)_{TUT~5hHV3OU(&pGhM
zO$CF@AS2q~fMrGI+G?UYUGTlaSLIpEo1lEt(V2+z`Zt3vNke|xlxERpjvg#O(vP(A
zKyQMq1u&MmpJUf4^0$-|Zn*tpzT1^B&UF5V#P~~b$l6phcKS%o|E1fM2Sr}$IZ~<l
zpN?%2OCtOL05#%FG{YJIgWI3MiN`<N@z?&}3L$$l6EpAXp|m-VT*h-6pn%5b9~Hx7
z9&>WvJR=Lhm4$LlUr;qBfoLF~9pe9Y{Cqo1;qf7onYfNXk$pK|kLJ76A>p%D#b{YU
zw77k=i_uEQ%w&y)D~njZi2E2ZLL%7-`xGB!;iMCp%%{V>dAeG7!)zH4qrJ$CE<ZK@
z_c^Ge<ozcAyxmyG#h@pO^SJWS!svCGQf`WW1nG=%A!aT|^L-Vb?AHK)X=7Ek@Ug@g
z&0mG&Q{~uyKQBsRc)?*|v-u1bmJ;E5VS6d<sTV5?MrTQF7Pfa`ce(}OJ06Q}_=~#c
zG8r)l(u$|4#{qz3FVrBPYv!A8flD`#+Z3NOe2Zw$sU8++p--#bW!5OVUeeJv&SEZ2
z$3>Ogp~$mh?#5#{tu<k?8;=P*)O74>&XSd<mFt*FCfZg$T37-yRbk5<S;-9@bc978
zp}}+(zM70JzS@88oMFaYS$|xESGO80Jm2CS^llMuy^AFmezmxqJaBXe)qqKiHkE!u
z5*5f(v|IkIKL3r_N5of81Mptb^dEF@b##v{yn!DB!&aDGjCUsUg)ZEmVvAyYYcyUb
zlQCpRd;XJfu#PkPw=tvPN5qP3^H|v4h3j$)kcAv%{>RTAFW*IY0Wy#6$LA*|u&78N
zCWHt~F-YpsNJ1KQo`{*&@5IdP`IpHY!o6l87&8Wk6%w~m@3K4Cm(ysC3q)`KM>G^#
zQqt!+eu`Vi!DTUQmgpwX!uHf2#33cCqdJZxya@vZ4Iu=U(rbc6CG|%}WD_k<2^sQA
zK%?v)3>A>R4QB;nG630M6O1m2305k)-xqVeG$Pb)Q?64wna;Va$z}dcD6H2R{dmR*
zqlyv3+B?JHNMTQmqF~iyhGU9kxI{)vsOo7IX4yJSjHT^q%gbBt*Jb56b#vJ&*4%%~
zpPbPC^~7hR_gA9rFJ&j-ZSWaYNCqk9q4Nf{kUa!DO~v=OnBmZ>Mc*U);-T7w?Mqw+
zc#drcEJ<9mh~U5(PHrqWIPdeV6>%6gB#~RI9B0(9sn)_XlHY?*tS0`-y)<FETn(W`
zD>m$_(3yoblC|CB!grK6Ccg!PG9pK@|G2BD<Dw)Q?PCOQM9#U};TPx5Au?xM8d35Q
zIfuq;|1E468C=>~z}iOUWSWGyCJ28U?X5ol<2T5~6DiyDHFPKRA%uOb-H%0KfMOiN
zzp7)~I=jafF5B-TR(LeFxE8T#!$AT!fF`{&BUecw*KY-qC<(C+dHw643r*A#99Jll
z#Q&I_{X{cY)=XT0bau+torYb3Nn=Vhan~yi;5Q&S3(0y>bwHYIG=^Vm!c#JZwlnUg
zD&ig)<gPFhw{UMr_H<0Yw0SMadlKT}e2bt4ZRX`<aPfBMf-~D8PK$qOQHcku`Z4oj
zzfEK0pn`YFje{$u@7NCY?@$W6Nd|@6<K}F;`fpe58zftsXb|Dd--`zL6l9uM(bxS^
zab|VKXMDRwn|62MQg~J!*NgOCvVx=jzg-5s(5Kn0_`Lcnc!(Rs3E_CKu-R$>l>{*A
z8o)1OkgWT(YA?K&^%jmJrSHG7=(CnD{3X3FtNOy9x%<fM>G_{apni_^qr^_Ztup%4
z0)^x%tG}>LtViY8N8}S|9sNr&-L&w65-)@Y%o7YEGo^`%9Ah4A-Jh=(S}Wyn0SDI@
zF-OFf>4VXBBBp*fLE2#LmWHX1)E~DFnPo9NV;B~g`RuyPTRMq=N-MC?a*QtWKqO<(
zpVGezz)1?utwtpBOkWpNMpO$<f6_dwO*!1ag_1gl9DaMy!;lU2K$uiNl>3RKPZu_B
z#Orhn8lXmqJzU<VEktoQL$Y|M5hK#zoBLWb_0=g%V}~{5qsXEwXhD#gow6aFFg;ts
zs{Y+BYD!fNQBO-baXO;gl@yMIqkjmjWCDh4iv<6Vm~{kik4Px2J|nP=*eXq2pTZHC
znKmz(Msjh0oTgAF^<TigM*>GQq0dAQEo`rjZP=egJlax@=6!6=*6#p?p9wZC1aHzp
z=O00i#N%pr*X`bVw5Rg>+2bdA7D5Mi9{G0#wAE7WK!C20o+v|;jgHO4uiB9eCDm?|
zTX>rOt8se3Z)_8RvbEO~<9?YV`%Z_vtP%J}xRe4lRVN4h_&y`oi;{H9hm{vTYuLBI
zr<Ed0AOt5cA7<DE<Z)c#M~)zd)@poDGr034A_Ql9{vw_|M6ye4DzE6UNnqS&sMf#z
zK}`o$!(^8cgoQMF1?@Q3pCnG074#ih2V1g@48l>o4nw@>Q5%dAUDdsrHxu`wiR>xT
z&%3^3@V866wHi{_bV#1_U#Ub}zg>V(Fb4~fi7u+ThDc>eT|gA?(A8Ry##tJsfv_eN
z9SLM|ErwI+qXjqx%{$v1zbtGPd3x+hKO8^xuuQp;UejZMVo12+V8X%;ahvsG^kXY$
zj7IZCfjd{JgagnV?ReJFZNES|A4>c8dpI5Zwqxrik!4OSU;t+C=Pqx6Q7-$%lF~>(
zT}us{79J9bHT?2M@t8-TN6uOxHR^C9b03E{LeZ@?+e2QKOJiPhHU=*1EE8UMl|Kve
z-%vd-jz_*557ZJvuF}&EQ#5`A)`A$VFRLdm8SM$36MM8F2j2r$;)+%su@@mJ)5b1c
zKB_hAQ}cxw!Yz*QNW*Zg8R|9Rro&S}{2Yc!;~k0ABsl~L@E~{S!+O%c)KO~x7?y-%
zMDh-bym=47o&qEHSD=sA@$IvPTG>yti^jjBAClwXOQB!A7IDpdJ-V-S$*<<j<ZLM<
zVeAq0=v0m~Ekh_EUc9NOg|jdXX70x?24_bi?D*<|K(R1h+W^&~Jr-iNh&l-&=r|Mp
zV!-wv@|z^8_8qyM+SAK0Qp!w{gaGbwE|L&*jAd$T{40(eke9QbX%LJ?YAYxiSq5g#
zj128qZY#T77DFnkjNI5@&P!x#M4V`yBT3SMp?~eD`<TgbhGydiRC%%3+$6ybp1yw;
zhQSG>RUi0UA9R$T9`r3_!_-_|`Lez+%HJg72jwK$y)<lCqx)Z>I+qS0u6!ou&#@nw
z^3aZ#@r>9mMc#&&LH3V9{G<J(T7*0c+4VZn3DAu-p|p7_$eXvY9-?0Z51_Qe9Ib6y
z2#M*v+TVZ$v!8X-`3oZinxI5UMfTP}{E~=F4LMmo;+y`7AfZHr<xJxtjBO)Zb5G@?
zg*72hK{$H_2q4ZjkmDIb=%%I~BG@7MK(H`QkPC!9TCsNLTO7!%rlw(ZY(%<ja?w~x
z+OQ9x2Ma?$bdyo&NXL!#qi)oZAc0~s^U8qk6&gRg)(c+c%oOe&gOb=hRR{OaZh!E3
z(zBYxanjd1eGNN|G^#f+=n-*_uFGGeE7L@_WvdA72GWNRln9%@I1^U__H)-NOM`b<
z6nZgTF-jnELt-u*1JmbSb<>EFIA04zrd`mTMLeCfo0f7*SN+8Jyqtn@MheeERs=il
z)RQ5iB~>-pw?HF{f0Cz-Ko7^EPrbx65GO+e#J))?pD&?gA|{7d{FMg7k@W?2qg}K;
zNrPd{oz|=$$jZ`4lXzeuUL8!b4Wi|Er#*`nP<>MBcryB-+vB5{|M-OvN^#>!rWMBl
z{FaRN&``Md4&i2${CZu^noB2h`4{Y7BL(9N82tEfcGQC^*GE;KPRvC`)ET@VJN$Sk
zJQ<1#D~>Iow>k66=sUPqrY^sB4GgXM+jGvcY(iH-M+$9x*Ti&Wb_&oK&5DQEGG(68
zV5qB$qF;Iox7QX)SIc}J6kkNO1V;uYaN=j4L6t+B9|vy1E~9JB=ma6Mp3IL+jc#;(
zdnvU^VEO9fdE@SU<fQK08_Js}1t$P@wuNBY!BlCoY*Tx{`#~g7SsxeghzfnyYDXuR
z(=;=A0!B|{g94ef3*y>n^^Rqo`3{{d(eq6UTmN4=m06_<r1-Pno*zt(R}l*@qxr$E
z>>P5FpmQE$y=0jDs<iF%d%%qrX%K%H_8~%LJi##xh{en&45x*a3Yl(@jEJv=)K}Y=
zS?nZ}8mnMlL$1>m&!$rEf7i{mSVnngawsaCe*b4Dkv~8#dLET~8ai)2XuE)mSZA(=
zwa1jc0J!;?@?G}`utaNK1G-z!V?+6v{6(g6A)^I3yhNkO{N>5O5Xk+eCc1+q!Ml-$
z)pw~mj*s4bM-*D2y933i;EE;lzgsMge3C}&kSh!tv>l@G)T=OaZE@YcX>g#ahA;X^
zCfow25MsVg6xAP#?yTKaFgDP}wR`eAQemc6PV{&X8Ma#50<cJZn2D&GG*Qjo2qq^T
z;E3Hq$=HksIs?x1-V%&UAn_Kqh{h<Wa;b?fu}r|47t+YZVCuMdub;3yyZ85>bs~wA
z1+DSZaKU{^;HDO-Ul9*_p+5QeK6c^TL@Ww<zHJvBJw!L>fn-=}aO%D-9DsRq3Pjo2
zJu{t#DEx>Qg(iZeb>vb{j@vL&-di|8M)c(1q)0jWsR5i0@wK~HKl(Ratl5kN$$A`g
zEu)YawUWgL<=3-~aA{S=(@8fYLmSJXz!Of_26os>+Mo#PXE2Aa<2J*01!uv!ZFFLg
z=U*a@X~gjm?JnzcX3?f-J-s{S4u-833uktadf5jpYK;U4oWwUa>#7K~8n5>@hU6PL
zQGoY&JWR`U4sosWS+o|z&`d4Y1@_(Qz`Vo6WvjN<!xh;SVMqAOcQV#I+Z3>&PJ<ww
z3Bwwml7#|#7+xpG5J=9(9n28nGA@5=hM4v_<s(7*pxUsnTfO)%F}5<)V??}#1^vaz
z;j$fOK=4S@*9Nl(i*{8ei6$MtehW&&Mzp%3xG^TxmUo#zMtGHK7T@w8M2{CtP81PM
zBBd^ppFT}2xlI05+VZCmV*(b@TGyOa^H||h4Bb5oqswFX)jcddjcY5L7#|pzxXyQD
z^Gh1Poae*ZS%yc58Qd?yfA9yr*M!Wm<I}=y^%qf<)lZ&XbbKB2Q<J#Nt-aRd7}Ho^
z`ma2h%VvSS^}7V|DQ)hZCgy2_6DTpvu|k(;YvUt#$1Eg~Zc}amos>^6|BK_KuvOZ=
zSAG19#%6pemv^8rleb;wUPz<eY-MqRfPN?sj03K$`C<~rmt_0{L+hKau-lvCenO~a
zeb67;1kv*Ukw8+BrSYQxlBwt+lE({O(^g1;vU?azfpRrtTyk+jPd(Bi0y^qQ<A7ON
z5wwu_kq)*w0N1z(pmiVVZ6L=7klI+Bv1vGezgiPFo%*5yXwZeTdqpPk8L_Be`>p18
zK80G4)T1S0t&m45tm4@}4mvtgt0%R1-F2M1t-i^U%^E{$AfnR|x=+CMeD|E{`FC)e
zx}h}0V^8dwu?7~3(F_@HvYMALuGNF0&iF+U@dh9WJhJO#E!|+`job8a{LKBxKQ35y
z&HP;`u9!IQ6O_JZHW6{6?)PEdq=L+^?#qd(bbeDQ1x**9XqxpGE+2h`^0)ZNKw=u!
zGGeOvFJtYv0B5O%U;BSCECZbw^FMAWP)IHO$Nz1XZQ%ukkMY+d-V`KlTtmN@HTPfL
zAF%?Y!5H?LCQb{I;1Q>8*gcOE2Xmj+e<+zU2MVj_y}krHh8vv;`U>w6UuQrs#vs}g
z$Vl7&0T-5%PsH*|TFP~;f>dwBS7W&+8Ja*0DQK6jOhG$9GrHD{5gU>ne*fTgwE6ce
zmJx0RxqKW`F&-#d>g~vbZzP8ee5P#knRR|Z%8L^^9mHreoh-GTABG<wzV=o=TG&1^
z%9|<vvPh7)dTGP_sVOvubTp%eVrBHi`+j)pI^)oTY(p^64!}+mfhM3WIyUm--|`kE
z`4FP$l1Vqdt|8P|qyQ&P4=a}(2E}t3N5LC}mvR9NFb;`+9X)|FlQsA6!j0!)BPGkQ
zn!t@nYjuKNlV*9SG7JT^GZL125;y%Z7(O)mZ8{tSD1~U88(~CmMjWR8!Fa=~7IE{-
zJIhpa>)fp(Z5de2FLcnTKo<Pc<in)90cJ|xXa>Sqijje~HYoskO%REN?Omt?+$4@t
zc;1t7E716C!0{PF9C3-?2hm^wU^=jtu>ojIF7gPH<kjspgu$jytLefw8OApeRYqM+
z+$+gxo(2$b5(i^1+9{IZI+))QuUAZPNiaC_BQF+~z28fQpMBl;<&k{bl0mEQjlAGw
zmXCQNTLdu`AZ+i7`A=|@0M_s<N+sG~2U@xQh1bhu5ksB%pZ|}W?)*gS02|=Zq3&2%
zA#Sulr=*z4mRdO1z-In$?&E)RAOD;Cn8Oj>M$#O98xyk?(jPb4r~T$yCKdFB6~fUC
zRyRo%7?(4A92{g}Qc5c_YU0gp3dpcOt(MfG5{;<NhnWCXxd8!aC0%sxT-Rl|yOXk9
zNuczNL!%N`f*JLE<bFcX8&!C?_Ds*FUg6SHRT-cCQ(kp2#H~uUH|%*BvzCftqF($l
z6*eKIRbm|O+c}iN!{kg^-ruUvj$&CY%66Ayc|G%TJsK93#rG8>dS}IdPYz7?7YI8A
zw-^n4exqA%le^pL!so}?Jt9s&C4XL<7+HqG*P<SJtip4%6LpptVsH}}v-q+dz`NPf
z8ZW;l6=6_xA9`^1twLB|T}1D<;mU_~7jA|g_S#xT*M9U`92&AOf}*+zD^qnaml#X$
zLfjTbCf}QZO#YAnVOxjqF2q5-Pw&fHhIZQnrtw$eMrQ7MzitN_aT36K+(hN?Vc;vd
zhe-BA(GwV%HVL@a3t4$4tJ6w~STGB>I6>c65DyAg<7z~DV$gH`oj1$*iIHFD0eH4&
z54>3pSi^33!(qRSDI|1W#06oQ{5Kb?Sy&jxy=o(@MZ_U!tyPyvhXIGX={ohDtHe6z
z;1CKNq10Gp+MZ_=#Cb(Ahf1`TqPT1^x46*C$n8<TcA;{xgyE3~A?i}V{nhvFt=GF@
ziE`Z&pL>9K@Slg?;MgHQcYAz_o7fke(-p{~EpapkWt;+kvzs_oSVfzmOD44)Nh3Y!
zEqU1ENuflpa_fkKN>22z0sm+I32+htYaIi4JU4;Y)9<QJG^qNF23h4x&PbZwQf}{E
z_XbkJb~oCUd{&R_YBjbcHv=!JNgKxv^2kG%+<DB|JpLkY2}R&3h39B4+DA)lrnd0a
zPN=W4%0sQad-qG?qoL@~3Vr{_VOZU1e6zPo^K6@)89$HpTcV<o7$<*8zkM_(Cr6))
zj;<Q==PLbkOWi__Q~bkG@jRKjPbSk^rzWGNW1Fk`d%|<Nm9q4>XhSStGQ6kB^g;C)
zUkQN&(MQYm8TF}Qlj*xfJ;)qfiQ&)gb4}<C^p6KK=U6g9`n6(;8n^-1u13%P(->Nr
zQ)gekIv_Jd)G_?ts+@F_>?s7B4`@TCQUI8_0BlzQ7RWj{L%oQ2T4dq!`w`DqzDo=-
zfXqD!jAaULv)#gVQmeM8$?(A`|04Xki44fuY4z>hPV?pr%THo;hA&zh0hc^@L*~b~
z!x?-zbP%>4x#uWR_wG<il(6GZ$(g^C)-J`2fmV4r|98}{dTR)(2QM(^Hvx!l3m4H}
zI}FPQkE5IpJpBLZ`tPe5#q$if$pf^J@3$4$m`8WIqp(**x;tPfv21p-ZPvd|e;SnB
z-|}Pfn;}c5k^kIimfqt36Il37l=b#^8<E$^Wq?xg?*#6UduWn;f3XI*tFdEB!A7||
zYgyuu_f+6!Sfcdz1hV7-Iq~XjGAH=HDHrVnD7WH<5)Hbn->+8VDI;wf``}@}`@B5L
zoI>3BtPf_iG9mOb*40ban=%7@4#u~VzR4-+Xm~5^LJu(G(_>@~%%9PC*Nx~#J2$rk
zL$-cSsetByEdQ;k65j$;TqnLfD<Iy9m=5{m&SHL{d(7hKr#gf$UMM{?<ZI292Waq^
zj$aq>HiZdd%FVmw1iBlQ`p0OvSCD<ydXN<ykl3=r-&!^SIdNVKsN@fLr3cbaktjZv
zDEcc|bZ6azDVEi<lv`V*$gB#(iPyLO6p~%dr-86iuwrks-<{8pI~^gzQ3{dJ{pa8e
z%enL5Jdeftq4g&H(7h`t5j+9@ZtDSxdbta9z&;?gWi5Ls5gwbn#D(rPX8=7Zq+{3i
zTfA<__u-#nxfm|i_k#x5RIfUolKgI0fcF<Y9jH>;QPC>A$hmb;UP#piI2q4wv0gZH
zm6$TUUww85^936|`aOhW($`A@9KWq@1tvCqG|YXzc0Xls7Pq|_9mDJ#m@6SsB<-os
ztjuiA_VVj!MmeS!7a~P{qdH0OU9krA>paw$L9OHebDzQCsTj^1e%RFU)qCvWrTozv
zD%(PMGiiKFvkLNFlsHy|?ZiTWHR~K&iaZmIycqLO`W8H%f<QIYr2yw65;Ts1u3>i>
z&u;TT|Nc(X#P@4VQlH_|{8TYN?$AZ}bBNqBz!~>9@5FX6!Y7`>81iNUyzFD`^Y-01
zd~j%lh>D|Ev%X)y;RI{??eLRiEM~{nVju&;BsjNYDKQdP+b@%}ZlbJiDyz^s$w2o1
zS`kBe<*yQJ)F#0~6++_cFMXNpe3WEs?9o*JaVHVIJlCIUXL6O7vS7c{%~ygu{ZykB
zFxrEkr}i}PIK(J0?q)dclUpgbKGuE`P#DkNlbpC6B{3{RTZ{40645Li%Ol!whc({p
zrpWKWUHF0lO9?n(wvj?huMgO&waCV^$J%9uTsSRsg)Grxrg~iZo2{l*1W}h4B6{7w
zwtdMaboU=~r;ho3$nYABP@wfSN|f;BvvyRn@YRjAmW1#itL$|S!1Q5DaztX50JnxT
zI#LP|4A1aSdnX@;%3^;rLhg~vx0(Sohl1!W877^25TdK`7cF(`E}-`dAZ?LX5sV5k
zBII?JATCwW41vMEWJDQF)SC5ReF}FKZFs;YVFB1iM4$sgi4~-YcI#9IqVWkc8=Q(I
z)wPxQwzsZHD%fp&A0797M1#bf0<_R<1gFOO8FFDB?T6UgN$2uMG!cic5iGF)2T1`W
zjt@*09o=ZBQq=~7qDX(-Kfmn7-28i;#(F!L<_>cr**G@syY|o>E@>%#^%)ZVaDHx0
z6*r(~F|mH<{uAtF`KwQ8>)y8Ptm@cL=~?mQ4TX2=?PF*;jy_}a_W1nWF~3Kty>9u$
z1}<R5tKBGyTb9Y->r#>`@J<~{N~P9?`Y{JsO<ZFXF2&<S*g}cc$L`g&E;{;H(01@^
z{`sPfS3c&bg0mCoo1>Oh)E4;(j6G_b+8SNXGElV-y;B8PI=byw%sV<e&&nSl?@v+G
zseaYHvf4J$TjM&2thnK<!MVDv^fSGmfIW-~JxSLfc#O@AXv7Pw8;7XQ5+5CBHJMn^
z0Cl|PXI<(mWJWjRqLyy3l=)A}+0iE+>erpNz4cain|vp!rIySo1BRG@*HQa7&wcWf
zwKV>PBzRf-e$BvEDL|1f5OaF&A2%W<)Q2cIi(&ffAXsxxkPsp<XmeTJx`X-~#da^=
zmY%m7`2eg@`NuS7+{`4j*f)7#0eO+eDIi4&$`fQ+p5@)KqkPLzE{dNZqALd3x}3)i
zGtr*hg13h^lO+tV&R%V!zAJa|G8yHroZp|9x4BH3iLgchFKQ>*7JL+5b(>SsjklUx
zD91wgHK%|-EGhOCjLX=(ZpARDua~c~=de7JyA+<zJ#E*1cQ|0Q^4XR-Z+&$WvT#ta
z8_LQ9FqvD>6iPyEwT6>d7!PzNCph0juXH>E)){5ht$?`;!Pphz6b@L-tpq?=AR_*M
z-JQ3{hE6bw1B&4QCYK~;exP>tB2F{nPg+`8=->1PqR9fZqO8TCLA62wr`gINJ;wk9
z!2Z)(mT{G;OTfrzl7#-Q5hR~N$R^_tO-|yp^TO{QnEG4ps!-sY3M~6lJ7j7r!4f(X
zg!Ce0iXx?v<W^$Vgh{S6^|CtFY>}~+L6Cn?xk?ar6IU;yb)gG@X06$KN3LDH>KMS~
zZ}}D38a<MMsEU~nN}LGA_9CBD&t`!4EF@<D*^<PIFs;?GC*kw{9-cEU?*}xfJZd|D
z=u_@2)X%!iNh80KOmluvRQk<h$}Fv6E93e-LLKgabEF<am_`!f5=ZT)`>gcIz=b+Z
zu5T*mfmxjlx`MHE=tgI{`Dss+kcRK|lZ<la7}jA4&pa>KO?n}5@Id^HOP!%UZol;>
zuOF#hDc<47Gezi_wc^%>tfEMaFUD?^=KKxS$f=+Mwq4A`v+?8~7BNu7*eRkL96Ihl
zY!pDc-LKg*hv8%%ruY|rEA|Cy>7|_rDc4|A+svMamtP@}nvB*%RKWy`=i+L)OmvwZ
zLfLeyg)~^6e(~WJI$c;HnoUxjr0R`iYLF)<sIx~1jY<n>#Y7K5@p=<!e9WkUG&0Kb
zaJek8#*!^SK-Bq`h6dO3%FiV+=2asD9<H^TbAA}bD;D_$+z#JF^cM`5Opw&CINOO=
zd=e_M3_oA!8L3crmRq4I6>SX1`{uvG5sJSxOz{+MMXOx^X|#;w<;GyU`Xl?#Tzv!-
z-xG$ZYMQguhg2-Nh}l~A@x*1cMm!uvdKjE_8p^p_F?q<^1FQB@f64mG>}$4Dp9x-B
zfNYHg&pmc0!fS$FzWd38gEbGuOjM)#N!Qor9wKP8dj?B--Xm465Uc6W=@GbFu(63z
z)=Cry*2;@>65I#3?AT!u=&ZU@?pD)_{SN`B-ybEO0;%Vk<G3jgAq<Tiq2g9<%{-Kn
zp+<J6{>%&iVX-8JOX?>iW+`z;=IZLe+0Zqxa$^Hujt--L(qEUNqSB(fbk(bsyu{87
zW`Dmz?^gO%f|e3-Fl099*l(wBM`k=00F)BM>R$sM#!skS)j0N&Q5D{>#NY7RZHEBn
zjNvm#!Mt$TND!8?qz9<t<$V|leaJ4&`l=ENvzW2lZ7=;B`99-y1!(_GE3G?vmm|=w
zYZ03ToR7}7&{32>IxL|?eTHfAx~f-}I_IJo7Wr*heRMP9IuV|A?}yU!rni%^c1LtA
zP!tX~<FTC`YavG)rrtd4y#kWk6OmOaf}bCtpS|iNrWU_EdFeL^8K6wg87`KGK)nFq
z;Cd<@Aw@eop3T1FcputMLIK~OZP`ds{(wu6zd^flA9qAe>F(^ejayMEB-T4jE!8Il
zD(_TrEvcmj@qc?Cqe6RlpQomBe^1OA!)fGcC6{I8Hmo?Ky#Bq<mJ_$u>#$0noRnG7
z`F$KWwgWd-<-(<z16w~0-0cokJ$uFN8!<B~>GB=E*V+YH|5fbRLXbv>>}v4;tT*n~
z@N1VFbnc-pEBU=N^7le;st@P9U!%n~=@_dyJ8(1|+(1p*mqdf22xi9(O3FI1I8xrb
zSRDV2I}ubr$Nl{x-+^va5F1LRw+BeE!qW?%LITgy4}zog>c0FsHccQ&3I~(&xKHTm
z_JL2Ob6P4tbKL(L7#4SIo&i;(+1rCQDBWQRg#4N`Vg|A8%%#=B^tvtBP?5!CN`XeV
z6kILnkyMt&z!g7Jd4hTcGUlvq9e<9@0&I;e_Ly3Pr*mqNzj~L%niD$|7>|-T{!vA}
z<}9Mo?K@w|s*Rq^fsw$GTBp(Ov;5A~<Cc5xGE_5ZP3G%s!CCJ_3@2HgCl1=U;vRBL
zN8xS%;HnCNFK}U2QV81t-=;hy%b~8bGogq{=W&_1+ZbKDMdbo;9$91YH+fZTtuL_M
z0eh?rtexM%zNoF=Z~nQf!#i98YKP8*{3YvelK%MLjz~xfNU_;5x{tRS;6Dq=(-RQv
zy--@+d}N-*kHc<9$%q<`A5-Jf80}Rr|6~y#M9`0_*)7MmJz*nSIH98FFC(J!!~RG*
zU<vH*K?R~oH%1lkxpm7RtOAkmP(*832g^|>q9Qn=n}<51aO)#ef9*lvZ#MbiZ%OH3
z8hjh1+T)vhtWu7HzAc33sN%P@A?K%<X#H>$_<dl8ndwhpR0=`q_4>CfFqeU$U!1z2
zG8OPK`g{rK2}(fh?ifN^u-YLPnuZdU)6iL5P23_+498}EKbyS?8Kj4{C|nv6&=&S_
zg4DJ(=B^+pd9N$jLd@hrUJz)8i+(`Hxf*8Tqt8PHRFg%KpnjUkhrssBaWJQ7hX#WY
z2FeNyuCqXLLZRa!pvJ4guUj~`cnWwpgF$%M2~ndL7H3{@&n~x%;93&<R&z!xH&2wp
zikuTiP-X(2`_?ZmWP;gw6g2Jd?OYB(pOG{liZmKQ7;@|n%VB@gp|_%Sj#;XCLgC@n
zFJ?CJRC%$Fp%XSc0z;!f6t?;Xj`JUT9C8^-(}!j_qGOaNnN1$&075I_^95P7lgt_|
z373g+U_JWzT_ulBmTk%Klexom54H%pt6~x`r+dxcM#$kggfqUn3-o-ETW_Y)&38Ac
z3my#r?L2>ooU>gJ`6=f}uD3EQ!<DUr`9s!D?52p;B0wL9z-~QY;Y!q<lU$2EBCAl3
zTj4j`QBk&=E5T+N$Ug!S!0Ri<a>H-m^jsDea}v0X{vV~U1bjd~AeuBsSta2ZP$I6~
zClA<>OaN8bS-W8Bvv}TUc~<f$xS6dEtpG?o=8i%|xwx30Q80<;DM!E0cC`Z#A2=)X
zY@oj6GAp7Fh8{cBn`ie?u^9}MNdD0G*s@^RWiRTlD1Ij?F1RR8lJ1c8n*d5x%zT@)
zbl#!KxkQq>=7=HdV7%+J;K|4Pc1=yx<*kGSDrx5&eUYTN?a|#R4vd&q9-exAZdw3}
zip{+c5{80FKX!gZohKLhOk~;+_~|$gYRyiOF=xRsg=e%Eg@94*_by*>cOIC2EY`E<
zo0P}gnd%mTZT0=uqv2A~IoWA~BOYBH$`OL&#SP6cNes%s^HbAfY>qXtenN@hSj>%E
zs{MT$x$h~O`RAe#`>LN3SY}HqSqi0ERt_JIBjkWBAQ5dd2m`punA+8~+o=Bz^9lf*
zn1jW4Wj!<kul(Ln|60aQpJ9&3<hHv*iS{HS3vgsNs393}4nMiG59<(of=L(<n9o;K
z#8B+A6y77qB(WfB2dc}j+?-=cv4%*?6S#)}vD<2gC+e%PxtXOhH7L+&cm3H1!SsVl
zPNjB|O2ikZf%~I}#KI9E@!kTd$=+3*E`Qn+FbyPbRBEoiIjc;|Q$!@l?PygxRp7h!
z8rw?gTdR)V=*7pE3`6007~V*Qkn_|j&nY^%_mVY09JV3Y@IN`22z{U@!I&Q|=|O)S
zQ{F^PylTU?C7dM9x{3q)5Q(SA!Z(60?H2eYO}#fNtC0)e{-S$C%QW%GRZPHmPz2nW
z6V|BpJY5J?M|>vxlgflk=>33Z-^8K93lAUD?PGIG^;Hn|4Fb(}vVqS-A<jh5Ad!ji
z7-((SVAIZy@98y&-23eDxxA5^9Vd&5kUr-V9O&l>sQTSrU(#=%b1h00mFv0GyW!SM
zPEiD|l@1OLCZfaPM2E&(PIe{TC?oZ{t)JnOiG#ia$v-k15nB|7H<(<Xd*G)w;q?S_
zd=YocqrA)j6R1n%{p&fY|60w4#h421Gfz3(r49bz2&;>_OQg@ltg>Vn*qUzW^xTDL
zlM8udnskgr6MPk}0>lvx;9hDkrnf0PUONT|j)ZBX_o!VWN9W>%^{FRVw!DZqJp~A$
z>tH|Flkk7E_vP_a?rpzIWhWZV+7%TtMNyigY>@^sD-o$SO4JrAi`pn;$W#g`L#8A}
zL@7hY(trvLrWBDOWwG#nuce;nIq&m+-p@IIoPW-F`)lvwX5IJizJ~8`U0_<JPL&a+
zbPT0j1{y3MEF)b+8eJJSQYj(un|pe(RLqN(Stl>lhoIIG&&_yX+!W!N1=zUg__t59
zz?W1wVrPVu^IF25TX&eI3A{kkGx{E2sl>B`Uiz%M1Dp?A#*J3j2^X}Y1(q9u$EaN0
zbW+hY@R9bT<4vz1zZJzEpgKqJ!iKhEoNyK^$D^Kd>h{ys@{#o^82&`{TI5ZU>Faoj
zF!%<Y(wk9albff1op}{*<K?#iu==2kP-;AaeO_n+XJFj8HgCL=X@&&Ns=Z@ZE0;Wr
zpD$5lg_Q94W00H>Y?lZzg%v`k?)alo@7;PTd?U^7!t{|HrkLDr<T4)3QoOJSrbPCp
zg<D&=@>ds%vR&B?ie$u!a(XOgg)Sld#rTmfGs5$&L-t+b+Vf#~{R&|%8ZM0mUw;yg
zA(wxc^W%dn(NX_}C*h5%R#j}u3m^zRkb3%nL@{!{%lVo;H)eUp>seS>P+C;(Yk^!$
zd(+=+fH2Ab+Rx`gYqi!b1}E^>Z(BE;OS^*P>|(QC01Ha#9(Q`AEqAVacU7hwk19mV
z2|XFVVgb8PIqNRat|cC$>>e~wQe4_VtgiEZCdi7xFr17JvjmbnevLr<wGWQ%!GMOq
z0X4MM0vvGi+qlIjMFd&n>;_G<2QfEHr;ZQ=50j6)$xo;z8o`3vYYnv3CafU{zBP~n
zH4yGH9pvkV&svqDbrQ)2a^d)pYqA$!c0<P?&T){qi#PTmc7R-3>&K^_{bv8=C8$m0
zK8uY1ekNHd_r}u+L+A;JEnzKYv1|tC-#jxZS2TB==KLQcL4|w#$p^0nx|EIgfiTmI
z7xe(q`it-)?Y12^{HZG{a~lhh$dCB2gsd^li;yfGzvQK2p7GC1p}d?6qIj#7(BH}3
z7%!ufY>@%>AYW2J6#n8ElgTWrtV|8d9{(sN_$Yr&m6N;lVf^(bB4(oXtZz841^P60
z^Oz}?J}2B6S0&`qzta1LF3C)mwsx$+X6zLGdQPG3X>OF7GJba%de3o~OvJJ@IlrXD
ztH=qde(vyET}3AxUN=hfpQs`D$LB+qxCP0rvE6XEk@;NCHyw%mb=PWUd2;@Sk-YGy
za7<r|F%GQ72mX`+Y0v$cA#pE%-H%BUoNrU_;8ExNhv)xXXuOK}KfXtWvuTS+(W>?+
z0XpwQ4Lu_w5)g$WR&@CI3M?|NSPGm$tgo-H;@c-kFB86T;uK}Bt6SmF5~k^h&p_@u
zzTCFL8}o%Zh|xg|2DQSQYlZUnDg^3H@BmE1e2N<#<BAtOjoRdn>3a9cYtV$uu{i>j
z(nCi=QXy=V?xd86?(n$iQtLl*D7%n94wG9?@qR5zyzFpkARDb(a!(8AbO#SllCVLL
zbg1+a2a1eoH3No8kkow~KrTB&kR6G4nm~0Pc10r)+SP1mCL|lUn)9l7)-L51He;cX
zD8jt<5Hs<$u8!w>;?xlci}1EwF)R3zSy_m3`_)fCNP^!p7=w=RLe4h}TuwuSg81)(
zh1jhD@G+!~szeX8H}*HKa1Q6pDe7ev7*Tl1Y&{k;>%1Ms{EnvzrM9;?|GMW8zo%Xb
z0cuc_+SKjc9fa0#KL~Ow)Vtwz<8j_JlHsLK3)VI3u<hKuV?BBW+00Rp(1xxBoXf;Y
zPME1P0KuV%*<r-K<l(e@^xI*jH02*F?fi&`@ut$k`xVWO%%-mGy6%-8LgCRX8NKf1
zTqcRxVdy-$%pCw}r2?+ACJk(BLbt#~U#f|K#VqO+TE%UwP&f*{^bD$=GdHs9<qqa`
zWz!Bt-Hj0h)vQIq$-zMZ-|}JRWKu>8=bhGb=TVob1y-w4xwHncs?x52h5=6q7Bw{t
zLxFGx>V`8MIvwC({{Sj;?=6bClEis6q1xj?oy&`vI+g?gW!kyGg;l<{?Z<t<1g%_Q
zyB~cGd_SKd_jmtPM(}U8qq{hG$Ogv7WZp9(PBog@)~5RAzqdU{LRx}AmjP()X5UV@
z3<`;I2IeP5wW7`Jw9xIVWe=iGu)>Fu&+0<Q=}nm^{ASSX<#NC?lQs^`SZ>6=&+S>!
z%)wr0;h=36Dx;L{3Cp7xUdmEgPP}Oc2`1Y3@*O5|ONqJbr`C0Bc|{w2sZrgUSmd`*
zj~cojqVDTQ{~b%(WfMOo`2Jv?_&AWX`TjgclJ-*-hWhL|$XZvWs}v?0(TA(U;Qy*V
zRCFN&OzxMKHyR&bs!%=YE6!{w3ljbF4=RWUfgBW}ZM;)I()=l+c~egD_mi<ceZ40p
zsqZ@y!^KFsy=-i5=*J(M1~P_reV*_23@DMcM!*Zf^y@ov0<vgNmQv;@_AZ^cF;ARM
ziD!xw<EkOLf983;y{MaUJ~|HzrVn$z8g;c&=6a|fvW}nR94QpeUiyc!YK=APpd|s6
zLzF>Tb+yv1fQsxRPXaOfDXn(=HP*|TBoUT^AfZ{ecU{1XwES(*U>DUJx(?k)LVB?u
z801{ha4{E80(Fglf<W8vj#1JH-Tf$Kg=IY-;8BI8&Va0j7g9|gJ`u$#yWuyt4~Upq
zHW)Z^Uia}If<mu});^{YlEVRIalJD*<oIq5$jI&~Dk2K8_B#<s=LgVO=bD#Jre7fT
z_$<%haN>JzvjO+}zz=XCrwgoPAxy#kZ(a5gZ=UlLoyqLbQjOESwkXDR!R<yAUewX=
z&*I!i#%Bq}e0elICzM0tVjnz4u(52;Ui!=wHC$#s6%7!E3gx}U#8((fzRU#LD2}0l
zViX1Shn-;A@P^YcZ1qqJ@PUf7u{I?S*&jTD<9A(zi^t5rNn^v}0Mcilo4Y3ab@ENl
z5zoc@-W0f|Tjth%+*G0D&%M<iqql3;=c7Yu!X|FQ+2p?uK<chA2QZWswv<PH*bi3R
zEPuQPcoC24&$A;A6Ox;Nkxl`Y;mBziK+cMf_MB1k!sNU~Xqvt0@He8`0z8KBxMJ<d
zx0hB(5{qx$XS3P0=^}M72lrrThg2nSvS|Q73p9Y~ZlvN3JWuGgb{y;@F!j2UQYdrx
zmsdc`_(4OPSiK)F#8M`6@$BRG_bmWP+k}#KIz4r2b^2uV&;xSx263z4PtT~&w)R|V
zQx;tH7v30WW8Vh+u(4@0Or0`95nK=<=jVBhbXxkqg<bq<`5hD5#gSzv*2+;^=?P3x
zm${7%ciu3?tQB)!%482N0nOXm`TN0)4a#kX<Up}o^qUZ;7f5vJtXmBDIj;E7-VFZh
z*?vK!6hgTz6+~or<fc^o_f>S09O2ET6wV{lyFEqSbT1S@(@9X0TmK2gai5GoI2LIG
z%M#AXV+fYA)j41Mf3@^6fRF}1B_k7ph?U6O>zZhIG(oV$WgzcPK{VNcw6|^}b?P{q
zdbMwHn~q$hnypNX#bQ}uMTr$k+N-#WO)Cmm+g^jeCg#2nd|VMODMHi3Mj;dN#^Z`h
z$=kK?+m|;g7#5RW(?vn8wH=qr-AL77>p7x=XiosK=5%z47%R)?P{b_IRMEx>?)Yjt
z43HxTG$#d$GASnkFLmxn8**yHqju*sg&3WX8XvU07UK1Ya}{RMJF~(rPGU&hr8?3x
z>f(@GV9!~K%hp9fGD=EHaE!^CSwM%kvup1*1e@E>5Y$t;Q51*y?)nSgA}3<Mm8O-7
zX>zmx&Mx>n!I_>Qj2D(XpXhWcfgpd->eAFt5-)odRu?NcOsqf;8oe!<-R8R}Mmq+E
zWKoa@msmq^5f&nc=GVKoR4WEfHDfu!IwO741AtA*113U|abF&8NlFOrwCC&ts1Z2D
z6=ZkQLFqB|O&0jfqw_t83G#*XGG-7~f(U5hY+`>7u;n5*(nJ)2H4U|H?>#WHC_waV
z%}q-+=z-=@C$U*ZpUiZHK-^*u3Y2v1&2h%CO-T+$pM$qd1NK#AgM%dlujlRi0S?ED
zP^HN1gU)_@(US}qjI?cb^zq}^n2u@%k--#8_-Z}@%n?;%o-~CkOywp8swpUD4i6&m
z2tWWdmqu2$E?67hi84tiW^c|ckM)GEtb`XLBxAvL{{gt!CFg6ghp3D?Cz_0UGP4?k
zcrPeaUr|$w0&rX2Pf-##^J4x3V69$|Imst4<=0||8CP94!Xz6dbF|Z&SI(MPEXu4w
zVM8gtiIzTcsMXUc8KTOIv0sb*S=WOXr+oA2WYgC5K0)4pCu0Y!EP-O>)%Oq>tib=>
zxaKkiqn}#aE*xu^UZdCh_fduKqF){4*G`+Lm;HGkdZ<YAgK|0A+<LXmnmw+wRA;N~
z-}>pz?RL3_lb8l!1!hDiV)Bf6a}d!B>W0lii~U=d#>*T87u$q_Y;j)}2Ns#ZuU1HS
zT@!#9xg#;YP35SQBfWMyNp8r`m{$~*<9;#*2(Z5Tec(U#thvUAdtD^!{<H<_%&$}8
zc@5FM{Ojg9$^9s8&k*4ci<QijX(ptYk$Pk(cm?W^`YL<EAdpLTE7!N<3oTQ>`gbUe
z#)MvedLlXHgu2u02kSFZ^3<pBZqiCT-)V2S++YtELvL>}U#k;uLK4XR5+8A5-3t*0
zf)!}Pb;9-m4OW_Y%R-+y@@K?%<mWK4lVedOm2R#$!G8JkwET)^@j~fS?j=cZ@L;Lh
z==pOJv`WBB-2TtF099V`lRX>yhG%;GM%&Xxm+VpR%R(}gTKLCwOvodY?MCL|sWbT!
zfA_a4L!1q5JAGwGRG)$m&{OJA0icTZZXZAwBVFl~5?$--lAwk7rJeZbT3?#E<0ybk
zIj4sABi$~MZIIyEgug-*YN44Wiz>k=>m(ky2gOAZ47Fl}5At@oY@1b3eam<|`&~Nu
z7d9t}cHS(uO(%JlQTD8@xFbg6KNWmexxxk2Nd=HZM7EvIs~2)kpW|egP-uN9BIK{G
z`sKH2u7F71wx~qezy-de(AY@7RDmSpkWk(GOJK!RQ%BrwkpW+0EU}7OsAxpm)+O8w
zbt6tx*^2^H=5{{F75z?8A==~|+HRV$O1L@DB~PpOS`So{0$BTcc7bLz)_sMhm0o-5
zbbrr~xSxpDZ-R=;#3%ZPzzVH=n+<(nc+T!9{bUJK-$SXIM!EM~zQ&l1;)OK$qIw$4
zMx81J)8&sr4e^ttQb8ADtgME|&*yLx(Am1`x8JEwLTe$ftT}g)Zp@r)d<dI$!hP?c
ztSCgieo^q_yk&ptW$La~l=3%_EV(BywawSawCT74w_0U0#3O60Fv+95Z^iqQ0#AVy
z^L8qw?!;a$l}f4*eH*4EG8m?p$M33B@nb;yom<xh14R?c+nu&=2@){B|6lSQ=Dh?0
zR`tOpwxv<AE>LFefWDLUo&nt0^+>@BzjeM+GFUbO8tTTg_5B|3DV{eI#sedn3yz7*
zt#0*GD>aRrs5V-H(f<5OpX6{+>&0gaMas7*L;YFR2X?~vi>k+9k$54dNrW~xlF<uM
zT%&Jb<?-u|b47K4f>%o|jNuK+m))M$xg$1y><2eR!$X8w*o9F+q~cu*_B6)a6`%1{
zsO^d7k^w><W$l<W7XG_zI^)i@r$S>#Wo4M7&|QSNK3Z32=xaSU0)sQs`;IsMpj5Gu
zR%7n(xKnKX&B>-*d~C2(hq>r;AL74H!ARHP)VbjT<c*Kq_Xu?$Cw;0190h*rO{Gnf
zNx_g2ErScOz194e4jNoORLVNtIt7qBC^wWge=ONiliOUpLls@0y}m#*7}PDBn$l=3
zt`5^-$JARS>1-V!@f=qj*6;`g0;sNf1v*s-O9H>yu#iNy%nN%0uuHGm-f49%$lFzB
zG{Rg1mAuKDP5(T}3rjvULr}^KmT&iwA451Z!X$F)fr?RDq#9p+`OUG*hq7gF&DD&S
ze~RjU-aPCnYaM32Di~e<4fqt%_oN_Uf4^I>jkluob15N=L=#pqdz(+>Kb+^`aBGSC
z!RxV#TfV`w>O~+bi}gpZ^@*vu&FgL2(Rxj#(e84VW9XYX3b~s5wqX7G`gca|;YFLH
zf4w3Q82l=Uz-}1_FRi8tpPg*9&HPrE$}&w=s7TYFxo-P;L9Mw_7>c*~RM#}s*AmI%
zvy|dRwuT!?1PaTV-&sF8)jHaD<qhM%pso>NMHYXK;7zRdyUHuofVwzTPbu&P)(_ba
zzWvI)a4yQv>IKqRsDx}?fi0enE8=G8A>QJda@{V7o+48cd6|mHNVL;nN+O9|z08^L
z;2C=5vtOjV*)UB=Egr0)0%_Sxo(@k7a{x7M1$z@oza6!f>2($R-*Q@=8lYM)1KF(M
zx)LGhz&MR;0NL@<+6qt`LZjCdtT8?Lw8blhrz-YZX-)#y{rsCsz{6gwsq}q6Ol3T4
zOs%eR6*yT9+7|fb6`&dtM=!ASe^i6&F0{>*=YD*~vr@y%9gPK*tv>x0DIN3lqI_=7
zNb6A4J8kNrc*EQ>E=7!M0z*BZkAMW~6LS04`Usc!zGTtP*YZ9bcGG6f>_W}xmXZ&z
zyD<SJ^x9L1%&9pUxc{*J8A7GxOa=Ssz|*mDl1rB;@J!=k#40C^UlcusH%U6pk3Ly0
z<se|MK?9G7jMuc=mX;rI?I{Cq#}*Wq>H&Wdz>Y%KHT-4)bi7SqCARXeMwTfm@2?Pa
zMy*%oo(OYe>?S`0lM?F&;t=%>PoVGJeE&3x2UV#1*UNU#xqeG->kcpxo*f#T>t+H@
z^QMfO{Hm9%38eJKe0`<lSF`*hDNSyE8w7UcX69FDY|Y}6`Vpa`1)UXn)JbNA0N!2|
zDFCMYvI3tve%m@lPCV9ui@K1+oNFs|e*Uy+|AvZQq#1U07yGr^ylegxE$rO8QEu5u
z_dP}~r}#<fLZTQ(o#!lsKm7UcJBeASa^*BFEc|#6U9WOeGIE}QZ;`w5zxBPi4fN-K
zkX&`nefWP7q^<#m%TRZwCdJrUMCQ~Lq7yQd7ykOhOD9Z{m>jJprxd@bXr_g@fc{l0
zQPWzEp<H6iCeMsb>+zluBj-iZM54HsA%6YU-MET|UswU!31_mA^1X@eJ&5;$gdJDm
zw<gce)t|!3aOODDbmTI?H#1W2!3RztLJ{Vy3KI83%CZbpm(o{@VL;u8;gp5g&HFnh
z+U%5d=V&TXQO+f<k$5R@isRHjTK!~KsA+5R64=gz(RoQ>|Ad^w%F%lYr{C*H$$(&r
zIzZ{GrPu^3K;pa}kjp-__O-RW{R}`K#VFTiUm+%&bZ=2eJ66Cisr~20GFMLT=86zi
zghk3BqnaGg3s6+~#a`oV4v48TGJ6PPPeP1ME|f)0z<hYgU6%h22tf~;aj8dtd_zOy
zgGTvQVW8TP)-p%x9V?+n=Amr_%0uz$WaB{DLWON)8iot0iurP0QuQYb0Y!nGI-1iO
zvPHQQL;9wJQ`@yK4;S|V)VKaN6J`%@6x0#JgFhvh+M6!?VPfv2sqYfMvUd>5|IHCC
zVdhSk6#Xt-xX^w+jzR?^_9USBbwe%4AleZvN&cv&fr_9Hd=)R@h6kSCL+DdusRtUP
z(9h}Du=m6ZUEaOoLztyflmW8+fL%nfY#v>Ukz|I8jcYGy2XBIdIDp5qFRhJQd-vu%
zCr^`u2d!w1&^jl2RhUDGs83r6uoAQdv9x7rr9<KgviVN{aTWqzc{MnA>jMjCE9Y@(
zkbz?W_B{mzaei&}0s1z8H4f+hvm(>}BzxsRE$YD3FF)}}wf_oxH~ilF`*J(Oc<tkE
zf&X&J|Aef+B*F#S#j4|Oi4WyYvXjsNn01-B=Jm}P$67nC6WG?c0-R~pk*UkU!erJf
z3Cmd$AO<R3&7`cCZ<C8nT+cBKVXd3(8#Dohm_Z*KrKa4q>A@{H!UlNZBub(VFp{>~
zuCEp}53Q`OzS)tkrvXUIsHrCwLsiVanz&BVcAF3!{^LpZjD71y|0$=W#waatve4)H
zk(7Ir1rOJHEG(N=GJ@)&%1zpYbyvY;Rkiw?mDR0sV-amBmmFh!Uw!1&HJM@W58PL1
z-pI`B4=aqLSC9QDFmc<YmzIAx+g+x!7nYI2XR!EG5#kZ@T%1OjAFtud=-^48K6kYj
z@gYd?#sE~b#oEOfa-scG!z|~pW)%e;v>V#EZzebsj**RCC!70lB$&wG-4W9w@@@WQ
z+l-3i?3I}~CsMZ}rsy$yi%!p&$}klEb(x^^;)YV1&t#3LdYLAqV~`WGsY$isC_D4p
z)ocIysKya)vI-`2--hUDf%6Jp`XO3?0iM_utaLZh{PmEu0L7F4aX}}#X$9viM6^e?
z+>BS18y@U^jgB%UxMiZf;&%BJjvpQxC0>~5=?6tD2Y*i*{Rilc`n6L3hnd@DXn=3l
zwp3qSx=_qpe^%+_I&*d`z05-o8R`j+F<i0AtEw4>9yoTj5EU;d@sr*jW!kZxb6h91
zLgrpMTf6|lL35L1*udZpP~UZf|8iE15C9#i>8P`WwNU?uZ2mUjcp4}zDr(Ox3|wg)
z5F|b^3eO>|^XGRdO_-T^1W4Py2*5cdMBww^-JP*#1F6q$4X1yBO9WmjM-5T9_U(c^
zFep-L!`5%WgBK0oNFObwzNfaA6Uwv~0hoRRnmF|Qoaf}s`#01(eC0^Ze+7B-H#EYO
zR|xwHI65?zz73C$k=(?_&3|z=59A(|&2uT38J>g8Z_(`?1VE$GO)9JpFknJcU$;50
zm+N82uSi-4>!%Itl70FV;Mj|?j1ne|)n*RB<*+?xjn2WtfWb&>5jj9)Uh2A0Ba_`i
zR4UuBz$Nlws7%jKI0w*p`H`q&_CU{QSd&?SR?>7rw0fvjnIcM)wb=z&ywZ{ZPQV9R
z$|dHAe?*v7-@7lWF3j$Y>WYK<B>ej*1d-wFHMD`so0v;TxV=e!&;GcgU<z4olq*sd
zfaK72%H*EwY9q4L;U>!q794;|ace}&Z~;=E@tVx3vC>X4vJGyD8Ln~9$ihP?29dhY
zKtb3D9%bj{07EMeSOr$~1Q}7hA1w0hknC!Ybd7F71w0kqiY2)Ty|eOg@*M#`o&f{0
zD)*|Or$U>C7>v5Vv*4ifyTaQ~vw1v_!dpq)zycdqHCAak8uA|DxHXIV1rL}XQV-eF
zZOY~jvK;9!NtibVX<OxcKs_N{G;sDkG%ReaTxu=Acrg|bH(Ms4+R#_CvBF{uxy|Uq
znlmXF2X>7Ovt#Sw%yQ|-_mR*Zw$XECktlr>u<%9Ji2UFP)?Vn1Vo<csp*-YmO{U0g
zw>g+x7T&iWN}K0w++wLxu#a2by(dV44Yh(z;C(<2ZBV-A)nro_%Km%$YMBBO1G%^K
z0h+l`fIQUx6;ohYFez?7|Fs<&-SY*oD(ko%X5?16C4Om-#E|<6c^0&~wu#a7&^#as
zF8hk+jtMTq#4X5QL@7P&2T!oSU4`36=xD0mTl+6IUS$C@US(}JB>5rcDze@885ze%
z6XL6A%q}}FaJ5_kj#O%t>s}crm?u9Hc94+Wk19ooHIcTHR(1^HPF1P_D)sxatLLn^
z4aoY)G3d99AFlCkbb<^|nB_5oU%m1Qs5UuqIk+o51am*U2mQ+i>v~||)O?FO<=<yb
ztoBnqIoPv6BtUxW#bCF^7LtDR4?ODZ{rd%+sdwtX!$P{d^}P@F9!ZRW1X0M`Hgyf{
zRF_<~__gZj@4xB2Tv;79vetlR=U#gA1K3pc71(snzY<Y2zvuzytfa^=Z?^w?UC&Dk
zaL}lU9Rq!~J>9qX0nzg;Pz8%!7?~)g*s$rYIyw>uDzH2tX~Im;FRTk8%V~VFO;`!M
zj-gulaSm;2dvsLX`dV$UC8{)nHMo?=4PJ--!*KVq0Vk-EmZs_oEDVtir#e1BDp|%p
zwX0sd6K}5Ld9|}Gnvn<BFELDj!a>@_t%O;eXG8s;^<4}G_Nd#S*~E~vsM#4U`cQZ@
zMr^}xHOro=>{F{2JllTm^qv;r(dJ#*5OE56GM4MtSF7~yPd$BJzIW)4sfT#SE*B(n
zm7&a1iQCt#Fnei+I1HPNNwHJeTc+p*tY1<ryPC+S@OvY`l_FSw=Md<3-(F5+{SuNu
zMvhv*SS#f~DSyGsFjRzzH4Mq^&ieJ5iLM8X*n0ZUy771|(tk|Ud&{}y0#71U78Rf}
zF3)xnJYw7YWTTVeHO8bQ+ZW83Rq*eI%Ff`c-n3&Dmp1#sA-uYpiJ~H&jIe+OpBS~;
zi2R|}HxgE%5cd~6;SK@D#&ndgcMFLUYcmb*DHhOGaBBEjm#w}%&z}(y4=i@`bBf|C
zWYcn1_vT=vfyVw3lp{(lc?|8z?U7%qRh~ml!H@DIRAH>^u!nq>5)#rhL`@C-#=Ccp
z3aHOc=v^xS^4+PNm52u|RPUf5NYBiR<6UmxPTCBBu2!9jb<5Gd3(v3zI4v8B*(d&K
z9uOSOH(7^e0g-J4>%g97mDM1Aq-=g<8=3=eB*OI9T_ntCUGNe#RfLfmR^(kV3tTA4
zWyjwhnR(>^tpFh7xI*c}`v<CH%m%Ilu)PYIy4m-65_4aijPYd-q*{{0$=Edy;)z3K
z=JLMFrx(iUdGzkNmg<^y51FyjUywz`d;n3?CIZvEEIPh>-!+4!*cG*UA=rNNZYJ$)
zi){0IaFkEt)hc>P1R5xNX6BPBV`o<5uHW<os#O@RsvZdw<MpC;#$`|dX}v~L5-?8t
zf2uyHAR%{{a;MPJIZSe!fu(txb_L$qHU|NTCs7ABVm!QvawWv8`*MeQ)d$v8Ge{f5
zko^YHnNV)<ZKSq{d%9809D!fAb%4Rq;`xoBM`n;Hmef5oP_PiuQ|`_cnm%xT0G#K(
zEX$@YV~4mj%pwve$S(sbJ{yp^y1E@c3_a!c$eAcWE^^$qDh7AhVD+Yv2Tr-xDF1J_
zzo)74kdYz>Ppa|J)Zk1gG;(zr6M{E7-3y#R6BTY>3Q~P%B*d5Vc+Kt>_9v&a$4>^j
zJSE-oW~+k1ruk3LSnOyn^Do3F4y|Q+-<3ak;IOK-dyweyVm=e_<cD74z!u@|%!4&r
zl=?A89N>uLb)J`!60aN}wQ~<1giT!w`wI|LwUDC~sBc5oTxiMf>Ya6=<<Ikj?tbt8
zh9&Dl=Q^&MNwsaL&AJ6V8SzypFQam0UIK#cAwz2FNHfn_$W#`&Y%g*A0Jo0IAjzi)
z!e+uQd!YMZ7uac60Oa!VH<zB3NrS4KQ_f@B(yGoD5($;h>B3+)K6+zTB4LIOh<b4*
z2@o6!4e&nK$&BSe{abl}swo~3=|y=Y4xLXvNvM6!FVyk@`09m#=L3jV{*7v&;<@G%
zZ^@~R*R`TIo$=zv>gWT2to&jp#%n9o%~pE&NAJc_($9ln+}XkGeOgN{2$D0r&fD`9
zwazm<ZAaLL7m1v-4mjs}p%V$uBz-jf<c?%?2KyBKO!YiffA5{10k5!<0%kd~_AEo#
zCH9*JBYLA*<xp^KOkIZU$F&A0Gy3Q)#tbv5N1MpUmM?tW()Xoh$<b=O+0p0b`MS*4
z1+f}g$L-m1B8E%i2av$v1y1z=r&pD?l(11CWz@Kn`n3m@i_oyuRgPDThBKhdDq^4J
zQ<QP>Xeo&;BwnJHiN;g5Z+>ztQ_I$RuooFc`s=E9(`bgYw2QB%lj%u{AcV0c{$S{Z
zuB;Xm_xz)`yE(ilsE`ckRb?Th<U+GK&1`@>Ryx@MjRG{+lO&{?=jtNFZ;ilS@Ql_X
z4b|q7*p<@^ZJYD+dbcM}SgK9Rn#$)bGl{QW(Ak^*`k<iVg5SS>eDeKtT}AU-8_Y7+
z{m2!#T)mk)odAqTem>7@Q3w3$LU+FT3MYMSw%FIT7$)bk-yOv!{q)47z+XR|yLq=h
zVa!LkJV2Ul|9I!gM-6qel8vXmOWd3*<48<k;kYvzQ^LYtKfZK!p&H5Xs*am~x8nIQ
zwz4|y!P~+6XR{J?Zljpo7qRzHFbO(RM;Y<Eo!9-03bl>M<563?1kbpC1+GA14q0Uv
zH%&QFWs5x+``TuAxGk51#n&>OI#NRFO%1R|UT6QRr+jnTr~LNNEitFRs)9>fff=*y
z)BvVk<dhA8?9S|%p?=Az{pV}07f`a`U9UzVj)^#_c*&z-AO3m0+=~is9$=gX>EHj+
zIaS#e6gH5C$qwlEYgC8Ioh_w7H-nFHrKq$=Hdm`)W{z&6zHIQe!5w<>F54!EdEQCs
z{&~k@FVRM}kQsbLDfF>H$n10H+P_UGhF{XpS*2nx2~S>ZQhm9-<hd9bFZSO@A9wXy
z@yz%%k#fe13b(}8{;gBhsKl=tLYbV|QC+cIyJC2h6jJD(%iK|j*nPs8+_qk_m{mk|
zR-<f|w74+JbRin9ft+%&_It0Zceu+?^2(cszj!p|dTbdl);_#|Y1sPi3AjQai1xPI
zLUm46-z8z=$=Va!BVSajV7bEH{cf-rnF*O%vbMJrK3b!`EhOUCZQQ1&P<3g!JS(FI
zZU2QqD=Ta~ollmFk}HMVu{n_>I2@;B>Rzf=HS0k3x9eGO3~4Nsn_Ii{cSj>w-YA<K
zLDPFm99*zOKr(3M1;=bqU#V3`NW$`NhQzk01DR{8=Q!8wh-`bkKk9M9GBnB+3Z*Or
z5$3S|4=(M?cPPYw+VZq(MIFE8<70$|!y1?ZZ;ZE9Av1TO>T&o+lq(_l=YciA-nZc?
zdRZDjUH;ZQ^(fK7hlCo%hEYbP9UI0eUv`^Kd|hsyi!*-f=->c|Atj5x@`Zhy&qsMJ
zQ?0uA{CxOYE7O9o`Z233ozt?)-zZa3PuFR2`;FA7TuIW+ySL%hG`+llX?j*<hn!tt
zBh8;E`=%@Pb}=S==<cf<>$^}8=Gr7JytEM_kU|o^PDiG3#9qCK*AD2^3qgWk`PpF7
zeTiIiv}JMx6e}yjq?vz~KYn;>qJLEvvvA5Hn=EnlohZP%)(TV$5fHY21m-!jE<uIf
zVY4MVlJB0-9@q;xi&5$fAfq@SqZm#!NZeX}(p1LuJ;H3!eo$QXrKDsq8b)F_UfFU^
zMJGyF@?Eks41nI`n&>2f7%||!T{MlZ{EW{8q?>-bAaOO*V%0b*ccAdvAAYELCX2+A
zN1HxDFQB7n7cTwomXOj(p>OW{dOJzwaDGi*vu5(($iyVsr~`Fh*1kkV#hGqV*?R&b
zBnmR1q%_K=Z7_*lpWuYqqL{{2BWSCJXpu*<P=?G~>Vc^<sXE@&*7qkycqVq25S~aw
zgj$=o{St01IQ72>x88!?4HXo*2}Q1NSGu`t{+*4jw{EzuFh02EU8M6PZV}1;JEb$$
z0wi_l%cAV6(YDvAFK15s(C%mmjQ9IJp%F-%or+&u<bS9EVn(KCoBCApW~m%-jQS|)
zgIe>*=7{$jJpXcxksWqR{E$EEx`FRYvy$fvdr!YXbHz^tWd<Z}ixzIA0Yk8gheWwx
z^8CHBPcvd4Z^rjX2r8M|tn*pR-TaDgLVw{xJ!?yyqhy=FWRBS4uWu{5zJGqX$PJTN
zDkn>{55OHVT5|s}PW^GghHfO$K1g9ckN0eVtWyugA-=Fc4C{_UZL_88D;VnjU0HVF
zZ+?)N()jC9%pzr>-nL+piM>k$d8)58Nc@V?E{v0`pqIEGKyNfP(zOrnBA>R2!!6IE
z5ue6~CdLQ!Z+%m(B41EV;4+~-QV$A1!7m=8vLv$MH|@Ux#(f4M7y0fbE|EF(uH=VB
z#IecK=Ug<(V=ULSVC!xWouK|i==zt7%WU=g&XzoE*_{ei21ntwrt6LYO_GYNzYKW5
z73eHSc3pLOmoR=io$hx-NPeMfXsOucbB!eMF%{@_xH<Tsxv|qpW|*{mHV@vf<c|kX
zZv`}A1{&w5v$LkDA46>|I}_4Mg1e#czLZ}I6|l7T3m1q`7I44h(*8lO^Zb7p0*Hdb
t9|>pFvGnJ^C#?R5bo&4A9?d^As%C$qU+_n5H%AS>S#$dz=^OW){tuz_?uY;Y

literal 0
HcmV?d00001


From 8fc358d45b5c911ccb70ab6c65b9c465153da628 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Wed, 15 Oct 2025 16:26:49 -0500
Subject: [PATCH 85/98] Update PR based on coderabbit review.

---
 .../external_aerodynamics/domino/README.md    |  2 +-
 .../domino/src/benchmark_dataloader.py        |  3 --
 .../domino/src/deprecated/README.md           |  2 +-
 .../domino/src/inference_on_stl.py            |  1 -
 .../external_aerodynamics/domino/src/loss.py  | 35 ---------------
 .../src/shuffle_volumetric_curator_output.py  | 27 ++++++++---
 .../external_aerodynamics/domino/src/train.py |  5 ---
 .../external_aerodynamics/domino/src/utils.py |  3 +-
 physicsnemo/datapipes/cae/cae_dataset.py      | 39 +++++++++++-----
 physicsnemo/datapipes/cae/domino_datapipe.py  | 45 +++++++++++++------
 physicsnemo/models/domino/encodings.py        |  2 +-
 physicsnemo/models/layers/ball_query.py       |  3 ++
 physicsnemo/models/layers/mlp_layers.py       | 27 +----------
 physicsnemo/utils/domino/utils.py             |  2 +-
 test/datapipes/test_domino_datapipe.py        |  5 ---
 test/models/domino/test_domino_mlps.py        | 23 ----------
 test/models/test_mlp_layers.py                |  6 +--
 17 files changed, 92 insertions(+), 138 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md
index a6cafd803d..21dc5adf1c 100644
--- a/examples/cfd/external_aerodynamics/domino/README.md
+++ b/examples/cfd/external_aerodynamics/domino/README.md
@@ -113,7 +113,7 @@ knowledge of the dataset:
 
 - The output fields (the labels) are normalized during training to a mean
   of zero and a standard deviation of one, averaged over the dataset.
-  The scaling is controlled by passing the `volume_factors` andg
+  The scaling is controlled by passing the `volume_factors` and
   `surface_factors` values to the datapipe.
 - The input locations are scaled by, and optionally cropped to, used defined
   bounding boxes for both surface and volume.  Whether cropping occurs, or not,
diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
index 339363195a..04ca2340e9 100644
--- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
+++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py
@@ -181,7 +181,4 @@ def main(cfg: DictConfig) -> None:
 
 
 if __name__ == "__main__":
-    # Profiler().enable("torch")
-    # Profiler().initialize()
     main()
-    # Profiler().finalize()
diff --git a/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md b/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md
index 9124353b7e..fb7d062f56 100644
--- a/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md
+++ b/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md
@@ -1,5 +1,5 @@
 # DoMINO Deprecation
 
-The files in this folder have been deprecated as of the physicsnemo 25.11 release -
+The files in this folder have been deprecated as of the PhysicsNeMo 25.11 release -
 they are no longer officially supported.  They are kept here only as a reference,
 and may be removed in a future release.
diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index 9707a7e6d1..89d7a9ba24 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -614,7 +614,6 @@ def main(cfg: DictConfig) -> None:
         global_features=num_global_features,
         model_parameters=cfg.model,
     ).to(dist.device)
-    # model = torch.compile(model, fullgraph=True, dynamic=True)  # TODO make this configurable
 
     # Print model summary (structure and parmeter count).
     logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n")
diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py
index e8a143b9c4..60c52beb32 100644
--- a/examples/cfd/external_aerodynamics/domino/src/loss.py
+++ b/examples/cfd/external_aerodynamics/domino/src/loss.py
@@ -19,47 +19,12 @@
 
 from physicsnemo.utils.domino.utils import unnormalize
 
-import time
-import os
-import re
-import torch
-import torchinfo
-
 from typing import Literal, Any
 
-import apex
-import numpy as np
-import hydra
-from hydra.utils import to_absolute_path
-from omegaconf import DictConfig, OmegaConf
-import torch.distributed as dist
-from torch.amp import GradScaler, autocast
-from torch.nn.parallel import DistributedDataParallel
-from torch.utils.data import DataLoader
-from torch.utils.data.distributed import DistributedSampler
-from torch.utils.tensorboard import SummaryWriter
-from nvtx import annotate as nvtx_annotate
 import torch.cuda.nvtx as nvtx
 
-
-from physicsnemo.distributed import DistributedManager
-from physicsnemo.launch.utils import load_checkpoint, save_checkpoint
-from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper
-
-
-from physicsnemo.models.domino.model import DoMINO
 from physicsnemo.utils.domino.utils import *
 
-# This is included for GPU memory tracking:
-from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo
-import time
-
-# Initialize NVML
-nvmlInit()
-
-
-from physicsnemo.utils.profiling import profile, Profiler
-
 
 def compute_physics_loss(
     output: torch.Tensor,
diff --git a/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py b/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py
index 02678d9e61..553d4e575a 100644
--- a/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py
+++ b/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py
@@ -82,7 +82,9 @@ def store_array(store, name: str, data: np.ndarray):
     )
 
 
-def copy_file_with_shuffled_volume_data(input_file: str, output_file: str):
+def copy_file_with_shuffled_volume_data(
+    input_file: str, output_file: str, random_seed: int | None = None
+):
     """
     Copy a file with shuffled volume data, using Zarr v3 sharding for efficient storage.
     Only processes if the output file doesn't exist or is incomplete.
@@ -127,6 +129,9 @@ def copy_file_with_shuffled_volume_data(input_file: str, output_file: str):
     volume_fields = in_file["volume_fields"][:]
     volume_mesh_centers = in_file["volume_mesh_centers"][:]
 
+    if random_seed is not None:
+        np.random.seed(random_seed)
+
     # Generate a permutation
     permutation = np.random.permutation(volume_fields.shape[0])
 
@@ -152,18 +157,26 @@ def process_file(file: str, top_dir: str, out_dir: str):
 
 
 def main():
-    top_dir = "/lustre/fsw/coreai_modulus_cae/coreya/datasets/domino/val/"
-    out_dir = "/lustre/fsw/coreai_modulus_cae/coreya/datasets/domino/val_shuffled2/"
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Shuffle volumetric curator output")
+    parser.add_argument("--input-dir", required=True, help="Input directory path")
+    parser.add_argument("--output-dir", required=True, help="Output directory path")
+    parser.add_argument(
+        "--num-cores", type=int, default=64, help="Number of cores to use"
+    )
+    args = parser.parse_args()
 
     # Get list of files to process
-    files = os.listdir(top_dir)
-    files = files[0:2]
+    files = os.listdir(args.input_dir)
 
     # Create a partial function with fixed directories
-    process_func = partial(process_file, top_dir=top_dir, out_dir=out_dir)
+    process_func = partial(
+        process_file, top_dir=args.input_dir, out_dir=args.output_dir
+    )
 
     # Use multiprocessing to process files in parallel
-    num_cores = max(1, 64)  # Leave one core free
+    num_cores = max(1, args.num_cores)  # Leave one core free
     print(f"Processing {len(files)} files using {num_cores} cores")
 
     with mp.Pool(num_cores) as pool:
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 37634b8715..070bb71412 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -343,8 +343,6 @@ def main(cfg: DictConfig) -> None:
     # Get scaling factors - precompute them if this fails!
     ######################################################
     vol_factors, surf_factors = load_scaling_factors(cfg)
-    
-    vol_factors_tensor = vol_factors
 
     ######################################################
     # Configure the model
@@ -661,7 +659,4 @@ def main(cfg: DictConfig) -> None:
 
 
 if __name__ == "__main__":
-    # Profiler().enable("torch")
-    # Profiler().initialize()
     main()
-    # Profiler().finalize()
diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py
index 9b742677b2..9970c186f4 100644
--- a/examples/cfd/external_aerodynamics/domino/src/utils.py
+++ b/examples/cfd/external_aerodynamics/domino/src/utils.py
@@ -113,7 +113,7 @@ def get_keys_to_read(
     cfg_params_vec = []
     for key in cfg.variables.global_parameters:
         if cfg.variables.global_parameters[key].type == "vector":
-            cfg_params_vec.append(*cfg.variables.global_parameters[key].reference)
+            cfg_params_vec.extend(cfg.variables.global_parameters[key].reference)
         else:
             cfg_params_vec.append(cfg.variables.global_parameters[key].reference)
     keys_to_read_if_available = {
@@ -204,7 +204,6 @@ def coordinate_distributed_environment(cfg: DictConfig):
             "surface_mesh_centers": point_like_placement,
             "surface_normals": point_like_placement,
             "surface_areas": point_like_placement,
-            "surface_fields": point_like_placement,
         }
     else:
         domain_mesh = None
diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py
index a78157936a..21fb8c4783 100644
--- a/physicsnemo/datapipes/cae/cae_dataset.py
+++ b/physicsnemo/datapipes/cae/cae_dataset.py
@@ -550,6 +550,9 @@ def __init__(
             self,
             keys_to_read: list[str] | None,
             keys_to_read_if_available: dict[str, torch.Tensor] | None,
+            cache_bytes_limit: int = 10_000_000,
+            data_copy_concurrency: int = 72,
+            file_io_concurrency: int = 72,
         ) -> None:
             super().__init__(keys_to_read, keys_to_read_if_available)
 
@@ -563,9 +566,9 @@ def __init__(
 
             self.context = ts.Context(
                 {
-                    "cache_pool": {"total_bytes_limit": 10_000_000},
-                    "data_copy_concurrency": {"limit": 72},
-                    "file_io_concurrency": {"limit": 72},
+                    "cache_pool": {"total_bytes_limit": cache_bytes_limit},
+                    "data_copy_concurrency": {"limit": data_copy_concurrency},
+                    "file_io_concurrency": {"limit": file_io_concurrency},
                 }
             )
 
@@ -1116,6 +1119,20 @@ def set_volume_sampling_size(self, volume_sampling_size: int):
         """
         self.file_reader.set_volume_sampling_size(volume_sampling_size)
 
+    def close(self):
+        """
+        Explicitly close the dataset and cleanup resources, including the ThreadPoolExecutor.
+        """
+        if hasattr(self, "preload_executor") and self.preload_executor is not None:
+            self.preload_executor.shutdown(wait=True)
+            self.preload_executor = None
+
+    def __del__(self):
+        """
+        Cleanup resources when the dataset is destroyed.
+        """
+        self.close()
+
 
 def compute_mean_std_min_max(
     dataset: CAEDataset, field_keys: list[str], max_samples: int = 20
@@ -1180,7 +1197,7 @@ def compute_mean_std_min_max(
             batch_mean = field_data.mean(axis=(0))
             batch_M2 = ((field_data - batch_mean) ** 2).sum(axis=(0))
             batch_n = field_data.shape[0]
-                     
+
             # Update running mean and M2 (Welford's algorithm)
             delta = batch_mean - mean[field_key]
             N[field_key] += batch_n  # batch_n should also be torch.int64
@@ -1215,28 +1232,30 @@ def compute_mean_std_min_max(
             batch_n = field_data.shape[0]
 
             # # Update min/max
-            
+
             mean_sample = mean[field_key]
             std_sample = std[field_key]
             # import pdb; pdb.set_trace()
             mask = torch.ones_like(field_data, dtype=torch.bool)
             for v in range(field_data.shape[-1]):
-                idx = (field_data[:, v] < mean_sample[v] - 12 * std_sample[v]) | (field_data[:, v] > mean_sample[v] + 12 * std_sample[v])
+                idx = (field_data[:, v] < mean_sample[v] - 12 * std_sample[v]) | (
+                    field_data[:, v] > mean_sample[v] + 12 * std_sample[v]
+                )
                 idx = torch.where(idx)
                 mask[idx] = False
-            
+
             batch_min = []
             batch_max = []
             for v in range(field_data.shape[-1]):
                 batch_min.append(field_data[mask[:, v], v].min())
                 batch_max.append(field_data[mask[:, v], v].max())
-            
+
             batch_min = torch.stack(batch_min)
             batch_max = torch.stack(batch_max)
 
             min_val[field_key] = torch.minimum(min_val[field_key], batch_min)
             max_val[field_key] = torch.maximum(max_val[field_key], batch_max)
-                     
+
         end = time.perf_counter()
         iteration_time = end - start
         print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds")
@@ -1247,4 +1266,4 @@ def compute_mean_std_min_max(
 
     print(f"Total time: {global_time:.2f} seconds for {max_samples} samples")
 
-    return mean, std, min_val, max_val
\ No newline at end of file
+    return mean, std, min_val, max_val
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 05243e99a5..9d01ad0978 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -304,7 +304,9 @@ def __init__(
 
         self.dataset = None
 
-    def compute_stl_scaling_and_surface_grids(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    def compute_stl_scaling_and_surface_grids(
+        self,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """
         Compute the min and max for the defining mesh.
 
@@ -325,7 +327,9 @@ def compute_stl_scaling_and_surface_grids(self) -> tuple[torch.Tensor, torch.Ten
 
         return s_min, s_max, surf_grid
 
-    def compute_volume_scaling_and_grids(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    def compute_volume_scaling_and_grids(
+        self,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         """
         Compute the min and max and grid for volume data.
 
@@ -363,7 +367,9 @@ def downsample_geometry(
                 stl_vertices, geometry_points
             )
             if geometry_coordinates_sampled.shape[0] < geometry_points:
-                raise ValueError("Sampled points is more than points in the surface mesh")
+                raise ValueError(
+                    "Surface mesh has fewer points than requested sample size"
+                )
             geom_centers = geometry_coordinates_sampled
         else:
             geom_centers = stl_vertices
@@ -444,14 +450,12 @@ def process_surface(
 
             if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample:
                 raise ValueError(
-                    "Sampled points is more than points in the surface mesh"
+                    "Surface mesh has fewer points than requested sample size"
                 )
 
             # Select out the sampled points for non-neighbor arrays:
             if surface_fields is not None:
                 surface_fields = surface_fields[idx_surface]
-            else:
-                raise ValueError("Surface fields are not present")
 
             # Subsample the normals and sizes:
             surface_normals = surface_normals[idx_surface]
@@ -548,8 +552,6 @@ def process_volume(
             volume_coordinates = volume_coordinates[ids_in_bbox]
             if volume_fields is not None:
                 volume_fields = volume_fields[ids_in_bbox]
-            else:
-                raise ValueError("Volume fields are not present")
 
         ########################################################################
         # Apply sampling to the volume coordinates and fields
@@ -568,14 +570,12 @@ def process_volume(
             # inputs were too few), pad the outputs:
             if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample:
                 raise ValueError(
-                    "Sampled points is more than points in the volume mesh"
+                    "Volume mesh has fewer points than requested sample size"
                 )
 
             # Apply the same sampling to the targets, too:
             if volume_fields is not None:
                 volume_fields = volume_fields[idx_volume]
-            else:
-                raise ValueError("Volume fields are not present")
 
             volume_coordinates = volume_coordinates_sampled
 
@@ -656,6 +656,22 @@ def calculate_volume_encoding(
 
     @torch.no_grad()
     def process_data(self, data_dict):
+        # Validate that all required keys are present in data_dict
+        required_keys = [
+            "global_params_values",
+            "global_params_reference",
+            "stl_coordinates",
+            "stl_faces",
+            "stl_centers",
+            "stl_areas",
+        ]
+        missing_keys = [key for key in required_keys if key not in data_dict]
+        if missing_keys:
+            raise ValueError(
+                f"Missing required keys in data_dict: {missing_keys}. "
+                f"Required keys are: {required_keys}"
+            )
+
         # Start building the preprocessed return dict:
         return_dict = {
             "global_params_values": data_dict["global_params_values"],
@@ -730,7 +746,7 @@ def process_data(self, data_dict):
         )
         return_dict["sdf_surf_grid"] = sdf_surf_grid
         return_dict["surf_grid"] = surf_grid
-        
+
         # Store this only if normalization is active:
         if self.config.normalize_coordinates:
             return_dict["surface_min_max"] = torch.stack([s_min, s_max])
@@ -860,7 +876,9 @@ def set_dataset(self, dataset: Iterable) -> None:
 
         if self.config.volume_sample_from_disk:
             # We deliberately double the data to read compared to the sampling size:
-            self.dataset.set_volume_sampling_size(100 * self.config.volume_points_sample)
+            self.dataset.set_volume_sampling_size(
+                100 * self.config.volume_points_sample
+            )
 
     def __len__(self):
         if self.dataset is not None:
@@ -926,7 +944,6 @@ def compute_scaling_factors(
     cfg: DictConfig,
     input_path: str,
     target_keys: list[str],
-    use_cache=None,
     max_samples=20,
 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
     """
diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py
index 55ce655090..7b27eeb134 100644
--- a/physicsnemo/models/domino/encodings.py
+++ b/physicsnemo/models/domino/encodings.py
@@ -147,7 +147,7 @@ def __init__(
 
     def calculate_total_neighbors_in_radius(
         self, geo_encoding_type: str, neighbors_in_radius: int, n_upstream_radii: int
-    ) -> list[int]:
+    ) -> int:
         if geo_encoding_type == "both":
             total_neighbors_in_radius = neighbors_in_radius * (n_upstream_radii + 1)
         elif geo_encoding_type == "stl":
diff --git a/physicsnemo/models/layers/ball_query.py b/physicsnemo/models/layers/ball_query.py
index 795958800a..77416bd57a 100644
--- a/physicsnemo/models/layers/ball_query.py
+++ b/physicsnemo/models/layers/ball_query.py
@@ -82,6 +82,9 @@ def forward(
                 - outputs: Tensor containing coordinates of the neighboring points
         """
 
+        if x.shape[0] != 1 or p_grid.shape[0] != 1:
+            raise ValueError("BQWarp only supports batch size 1")
+
         if p_grid.shape[-1] != x.shape[-1] or x.shape[-1] != 3:
             raise ValueError("The last dimension of p_grid and x must be 3")
 
diff --git a/physicsnemo/models/layers/mlp_layers.py b/physicsnemo/models/layers/mlp_layers.py
index ec832ad6b1..5c8c3348a3 100644
--- a/physicsnemo/models/layers/mlp_layers.py
+++ b/physicsnemo/models/layers/mlp_layers.py
@@ -19,31 +19,6 @@
 
 from .activations import get_activation
 
-# class Mlp(nn.Module):
-#     def __init__(
-#         self,
-#         in_features,
-#         hidden_features=None,
-#         out_features=None,
-#         act_layer=nn.GELU,
-#         drop=0.0,
-#     ):
-#         super().__init__()
-#         out_features = out_features or in_features
-#         hidden_features = hidden_features or in_features
-#         self.fc1 = nn.Linear(in_features, hidden_features)
-#         self.act = act_layer()
-#         self.fc2 = nn.Linear(hidden_features, out_features)
-#         self.drop = nn.Dropout(drop)
-
-#     def forward(self, x: torch.Tensor):
-#         x = self.fc1(x)
-#         x = self.act(x)
-#         x = self.drop(x)
-#         x = self.fc2(x)
-#         x = self.drop(x)
-#         return x
-
 
 class Mlp(nn.Module):
     def __init__(
@@ -66,7 +41,7 @@ def __init__(
             ]
 
         # If the activation is a string, get it.
-        # It's it's a type, instantiate it.
+        # If it's a type, instantiate it.
         # If it's a module, leave it be.
         if isinstance(act_layer, str):
             act_layer = get_activation(act_layer)
diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index 8b7a982142..e3faae8123 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -469,7 +469,7 @@ def shuffle_array(
         for g in range(gap):
             points_per_chunk[g] += 1
     elif gap < 0:
-        for g in range(gap):
+        for g in range(-gap):
             points_per_chunk[g] -= 1
 
     # Create a list of indexes per chunk:
diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py
index a4f7159c1f..9f665886bd 100644
--- a/test/datapipes/test_domino_datapipe.py
+++ b/test/datapipes/test_domino_datapipe.py
@@ -363,8 +363,6 @@ def validate_sample_structure(sample, model_type, gpu_output):
         expected_keys.extend(surface_keys)
 
     # Check that required keys are present and are torch tensors on correct device
-    for key in expected_keys:
-        print(f"Got key: {key} on device: {sample[key].device.type}")
 
     for key in expected_keys:
         if key in sample:  # Some keys may be None if compute_scaling_factors=True
@@ -748,9 +746,6 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf
     sample = dataset[0]
     validate_sample_structure(sample, model_type, gpu_output=use_cuda)
 
-    for key in sample:
-        print(f"sample[{key}].shape: {sample[key].shape}")
-
     if model_type in ["volume", "combined"]:
         for key in ["volume_mesh_centers", "volume_fields"]:
             if sampling:
diff --git a/test/models/domino/test_domino_mlps.py b/test/models/domino/test_domino_mlps.py
index d181f24401..8cf9546c2b 100644
--- a/test/models/domino/test_domino_mlps.py
+++ b/test/models/domino/test_domino_mlps.py
@@ -19,29 +19,6 @@
 
 from .utils import validate_output_shape_and_values
 
-# @pytest.mark.parametrize("device", ["cuda:0"])
-# @pytest.mark.parametrize("activation", ["relu", "gelu"])
-# @pytest.mark.parametrize("n_layers", [1, 2, 3, 5])
-# def test_mlp(device, activation, n_layers):
-#     """Test basic MLP functionality"""
-#     from physicsnemo.models.domino.mlps import MLP
-#     from physicsnemo.models.domino.model import get_activation
-
-#     torch.manual_seed(0)
-
-#     mlp = MLP(
-#         input_features=10,
-#         output_features=5,
-#         base_layer=32,
-#         activation=get_activation(activation),
-#         n_layers=n_layers,
-#     ).to(device)
-
-#     x = torch.randn(4, 50, 10).to(device)
-#     output = mlp(x)
-
-#     validate_output_shape_and_values(output, (4, 50, 5))
-
 
 @pytest.mark.parametrize("device", ["cuda:0"])
 @pytest.mark.parametrize("activation", ["relu", "gelu"])
diff --git a/test/models/test_mlp_layers.py b/test/models/test_mlp_layers.py
index 19db339ba8..7a943cc51b 100644
--- a/test/models/test_mlp_layers.py
+++ b/test/models/test_mlp_layers.py
@@ -50,7 +50,7 @@ def test_mlp_forward_accuracy(device):
 
 def test_mlp_activation_and_dropout():
     model = Mlp(in_features=10, hidden_features=20, out_features=5, drop=0.5)
-    input_tensor = torch.randn(2, 10)  # Assuming a batch size of 1 for simplicity
+    input_tensor = torch.randn(2, 10)  # Batch size of 2
 
     output_tensor = model(input_tensor)
 
@@ -61,7 +61,7 @@ def test_mlp_different_activation():
     model = Mlp(
         in_features=10, hidden_features=20, out_features=7, act_layer=torch.nn.ReLU
     )
-    input_tensor = torch.randn(3, 10)  # Assuming a batch size of 1 for simplicity
+    input_tensor = torch.randn(3, 10)  # Batch size of 3
 
     output_tensor = model(input_tensor)
     assert output_tensor.shape == torch.Size([3, 7])
@@ -69,7 +69,7 @@ def test_mlp_different_activation():
 
 def test_multiple_hidden_layers():
     model = Mlp(in_features=10, hidden_features=[20, 30], out_features=5)
-    input_tensor = torch.randn(4, 10)  # Assuming a batch size of 1 for simplicity
+    input_tensor = torch.randn(4, 10)  # Batch size of 4
 
     output_tensor = model(input_tensor)
     assert output_tensor.shape == torch.Size([4, 5])

From 763d97855a71800f946148346d6fccfc92fa0e01 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Thu, 16 Oct 2025 08:53:27 -0500
Subject: [PATCH 86/98] Remove error that breaks validation / inference.

---
 physicsnemo/datapipes/cae/domino_datapipe.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index 9d01ad0978..fb9a920708 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -422,8 +422,6 @@ def process_surface(
             surface_sizes = surface_sizes[ids_in_bbox]
             if surface_fields is not None:
                 surface_fields = surface_fields[ids_in_bbox]
-            else:
-                raise ValueError("Surface fields are not present")
 
         ########################################################################
         # Perform Down sampling of the surface fields.

From 0ea5f99316e0fabec0a7e53336ad60788e3726b1 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Thu, 16 Oct 2025 16:21:12 +0000
Subject: [PATCH 87/98] Update Domino model and tests: make sure pre-commit
 passes, remove unneeded layers, and update tests for latest model.

---
 physicsnemo/models/domino/geometry_rep.py     |  23 ++-
 physicsnemo/models/domino/model.py            |  50 +++--
 test/models/data/domino_output-conv.pth       | Bin 0 -> 5501 bytes
 test/models/data/domino_output-unet.pth       | Bin 0 -> 5501 bytes
 test/models/data/domino_output.pth            | Bin 5255 -> 0 bytes
 test/models/domino/test_domino.py             | 187 +++++++++---------
 .../models/domino/test_domino_geometry_rep.py |  13 +-
 7 files changed, 144 insertions(+), 129 deletions(-)
 create mode 100644 test/models/data/domino_output-conv.pth
 create mode 100644 test/models/data/domino_output-unet.pth
 delete mode 100644 test/models/data/domino_output.pth

diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py
index c2fbe9f606..eee192e600 100644
--- a/physicsnemo/models/domino/geometry_rep.py
+++ b/physicsnemo/models/domino/geometry_rep.py
@@ -74,7 +74,9 @@ def __init__(
         self.num_modes = model_parameters.num_modes
 
         if self.fourier_features:
-            input_features_calculated = input_features * (1 + 2 * self.num_modes) * neighbors_in_radius
+            input_features_calculated = (
+                input_features * (1 + 2 * self.num_modes) * neighbors_in_radius
+            )
         else:
             input_features_calculated = input_features * neighbors_in_radius
 
@@ -86,10 +88,6 @@ def __init__(
             drop=0.0,
         )
 
-        # self.fc1 = nn.Linear(input_features_calculated, base_neurons)
-        # self.fc2 = nn.Linear(base_neurons, base_neurons // 2)
-        # self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in)
-
         self.grid_resolution = grid_resolution
 
         self.activation = get_activation(model_parameters.activation)
@@ -128,14 +126,14 @@ def forward(
         )
         grid = grid.reshape(1, nx * ny * nz, 3, 1)
 
-        x = rearrange(x, "b x y z -> b x (y z)", x=nx*ny*nz, y=self.neighbors_in_radius, z=3)
+        x = rearrange(
+            x, "b x y z -> b x (y z)", x=nx * ny * nz, y=self.neighbors_in_radius, z=3
+        )
         if self.fourier_features:
             facets = torch.cat((x, fourier_encode(x, self.freqs)), axis=-1)
         else:
             facets = x
-        # x = self.activation(self.fc1(facets))
-        # x = self.activation(self.fc2(x))
-        # x = F.tanh(self.fc3(x))
+
         x = F.tanh(self.mlp(facets))
 
         x = rearrange(x, "b (x y z) c -> b c x y z", x=nx, y=ny, z=nz)
@@ -364,8 +362,9 @@ def __init__(
                 normalization_in_unet = "layernorm"
             else:
                 normalization_in_unet = None
+
             self.geo_processor_sdf = UNet(
-                in_channels=6,
+                in_channels=5 + len(self.sdf_scaling_factor),
                 out_channels=geometry_rep.geo_conv.base_neurons_out,
                 model_depth=3,
                 feature_map_channels=[
@@ -391,7 +390,7 @@ def __init__(
         elif geometry_rep.geo_processor.processor_type == "conv":
             self.geo_processor_sdf = nn.Sequential(
                 GeoProcessor(
-                    input_filters=5+len(self.sdf_scaling_factor),
+                    input_filters=5 + len(self.sdf_scaling_factor),
                     output_filters=geometry_rep.geo_conv.base_neurons_out,
                     model_parameters=geometry_rep.geo_processor,
                 ),
@@ -477,7 +476,7 @@ def forward(
             for s in range(len(self.sdf_scaling_factor)):
                 s_sdf = scale_sdf(sdf, self.sdf_scaling_factor[s])
                 scaled_sdf.append(s_sdf)
-                
+
             scaled_sdf = torch.cat(scaled_sdf, dim=1)
 
             # Process SDF and its computed features
diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py
index 20be346dcb..9f46947f2b 100644
--- a/physicsnemo/models/domino/model.py
+++ b/physicsnemo/models/domino/model.py
@@ -171,7 +171,7 @@ def __init__(
             ValueError: If both output_features_vol and output_features_surf are None
         """
         super().__init__()
-        self.input_features = input_features
+
         self.output_features_vol = output_features_vol
         self.output_features_surf = output_features_surf
         self.num_sample_points_surface = model_parameters.num_neighbors_surface
@@ -267,11 +267,6 @@ def __init__(
         self.encode_parameters = model_parameters.encode_parameters
         self.geo_encoding_type = model_parameters.geometry_encoding_type
 
-        if hasattr(model_parameters, "num_volume_neighbors"):
-            self.num_volume_neighbors = model_parameters.num_volume_neighbors
-        else:
-            self.num_volume_neighbors = 50
-
         if self.use_surface_normals:
             if not self.use_surface_area:
                 input_features_surface = input_features + 3
@@ -310,7 +305,7 @@ def __init__(
             sdf_scaling_factor=model_parameters.geometry_rep.geo_processor.surface_sdf_scaling_factor,
             model_parameters=model_parameters,
         )
-        
+
         # Basis functions for surface and volume
         base_layer_nn = model_parameters.nn_basis_functions.base_layer
         if self.output_features_surf is not None:
@@ -353,9 +348,15 @@ def __init__(
         position_encoder_base_neurons = model_parameters.position_encoder.base_neurons
         self.activation = get_activation(model_parameters.activation)
         self.use_sdf_in_basis_func = model_parameters.use_sdf_in_basis_func
-        self.sdf_scaling_factor = model_parameters.geometry_rep.geo_processor.volume_sdf_scaling_factor
+        self.sdf_scaling_factor = (
+            model_parameters.geometry_rep.geo_processor.volume_sdf_scaling_factor
+        )
         if self.output_features_vol is not None:
-            inp_pos_vol = 7 + len(self.sdf_scaling_factor) if model_parameters.use_sdf_in_basis_func else 3
+            inp_pos_vol = (
+                7 + len(self.sdf_scaling_factor)
+                if model_parameters.use_sdf_in_basis_func
+                else 3
+            )
 
             self.fc_p_vol = FourierMLP(
                 input_features=inp_pos_vol,
@@ -398,20 +399,6 @@ def __init__(
             grid_resolution=self.grid_resolution,
         )
 
-        # Transmitting surface to volume
-        self.surf_to_vol_conv1 = nn.Conv3d(
-            len(model_parameters.geometry_rep.geo_conv.volume_radii) + 1,
-            16,
-            kernel_size=3,
-            padding="same",
-        )
-        self.surf_to_vol_conv2 = nn.Conv3d(
-            16,
-            len(model_parameters.geometry_rep.geo_conv.volume_radii) + 1,
-            kernel_size=3,
-            padding="same",
-        )
-
         # Aggregation model
         if self.output_features_surf is not None:
             # Surface
@@ -522,9 +509,12 @@ def forward(self, data_dict):
 
             # SDF on volume mesh nodes
             sdf_nodes = data_dict["sdf_nodes"]
-            scaled_sdf_nodes = []
-            for i in range(len(self.sdf_scaling_factor)):
-                scaled_sdf_nodes.append(scale_sdf(sdf_nodes, self.sdf_scaling_factor[i]))
+            # scaled_sdf_nodes = []
+            # for i in range(len(self.sdf_scaling_factor)):
+            # scaled_sdf_nodes.append(scale_sdf(sdf_nodes, self.sdf_scaling_factor[i]))
+            scaled_sdf_nodes = [
+                scale_sdf(sdf_nodes, scaling) for scaling in self.sdf_scaling_factor
+            ]
             scaled_sdf_nodes = torch.cat(scaled_sdf_nodes, dim=-1)
 
             # Positional encoding based on closest point on surface to a volume node
@@ -533,7 +523,13 @@ def forward(self, data_dict):
             pos_volume_center_of_mass = data_dict["pos_volume_center_of_mass"]
             if self.use_sdf_in_basis_func:
                 encoding_node_vol = torch.cat(
-                    (sdf_nodes, scaled_sdf_nodes, pos_volume_closest, pos_volume_center_of_mass), dim=-1
+                    (
+                        sdf_nodes,
+                        scaled_sdf_nodes,
+                        pos_volume_closest,
+                        pos_volume_center_of_mass,
+                    ),
+                    dim=-1,
                 )
             else:
                 encoding_node_vol = pos_volume_center_of_mass
diff --git a/test/models/data/domino_output-conv.pth b/test/models/data/domino_output-conv.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0a3b7102a414c49428b7a0c051865bbb8f984743
GIT binary patch
literal 5501
zcmb7I30#!b`W|pY5||mtT`RT$B?Sds-nT+7;Q}fyC?UzNpa>3(WL~$*a!Vt{g&8DM
zubC#YGxL2<F@sz)^=hfiko!XBmYJp3t6uw`$?T_Iy#MYuzcb(O_su!a`+V<t&U?;0
zF;T5rD-=#nivPH}DcUL0a%N;^=OpIL(#@QudoeX9JHKyQvM#yL%;{N?;jI+$@$m~C
z=V=vwQHoSuPHt*OpTt?Z%&a`;#N70hS(#aBiMsUcyqw&`{C>__2S2~Kb|Oli=$Sk<
zUF-N@h;w9CPO@&?gNM%AR&frZPoTDSTq|);ojFU}W@LebPTO`&lp@L@DlMw@Vr@I|
zB&v0k!(y$|($v(PtgQ4@U1m;pp7ZG3wDjEcw1~`9owK&R|5&ZFPW#MQZHFj_3=uuU
zG2;Pxkk%!m&B$K_s_R44HjA}x;z?8+JF0SNhJCO~r&W*Dy3gtp6V=XXT13B_&4Q7=
z#XL1<o{A1F<*d(?oZK17y2SkS+&n=vGTh0Yy5FuKJ9zDnC*HBYw!J+a+(!}6BKYqK
zO-#<poSHo&JzEzU?)L}&!#Gd7hX&gTyz++jlA{Q2Z%-%CT@l<8fs{9O={feyM}`Of
z>9`O1WM%4fS?P8T@4j@tOIs1&-k#2(i=ux^9PFy>8yFeh_kSf27i2^HtYTjJWi87)
zdL!uTCOj9dXZJ2LcMbQ$<@rr$-P6GD|6StmCV6B1bu0AO^?dkbEvMCM;JoBpY-_FO
z4&O+;%B;aD&n9eLW8jT<B(@#$!B2;6xbQ_0a~~srMV@eZ&5A2i4ZNv9;w7*8;okE$
zeCelW=QBob9nlNF#@nzqvWU;sf+P0(<5))<&b#RO{mfc6hx)<eT`NXBYv8#fB%Zg<
zA7QRGBxUQl;g*qCIs4%I+cwlpGH}Tb6ZaVAgNRE<QTk3XyFFv#Q=Yw$)w2oPat!P`
z-^AWkUU00o;g#$n9(BRQ;e$P~ez6r5CyV(0VVTdK^nx<U2IuYuUN+On7f<`3ZJ`w%
zhZ&fMn7HhAFLb<Q!<^5H`O^tTZZP<xM{gT^MLwHW7<tr0UkuK+p~Ta`3F{>O%H1EX
zs~eFLF8Ftm_{Wd@uxX7Ih2nirye#vq&OVqJXGL~{frqTD<()}hD068<=d(q;Mk{ed
zkvA?k+YqYN^Xq;lu4w0rqEZ_=jnQ+^0GUfV`C=tEA@4Z@PpB}mcZo0d3~9ps?+x79
z!Nd#F{1I}}hG{<-*yEIm7shF@eY+J-g~j~qDv8T_d*hSSO&EMm&(B*;+`M0dk|T}y
z`mCNWuMqj0?Tb#+ZMeO=h)-2WJmVK%yfD><K(n5+cT4Qh?2C`xt>_nP;F>cgp6lt0
zw<B%Xny2TgMv1E$0+7AXhM-Ub`>r(cC%Hl|&5bZ@*ua5WBd7P!VAVA%B9jd4xmM;S
z@jh@5Ys7`BB3`l2$Uk=SLDOa%+6HXk`x}gGNcP9v&#Xx6S;C#ge2gyeMa?NIUcOq)
zc_xYHf7A;u30B;CN6*8)HSzT+-dO#U6@%X}@R{}!7aDvZxg0}~rkGF8ujLfk8-ZsV
z;W)>@rORqLy4)Wfriu3nF!1a}V*Diy7BpJ1w@VRU{kMs&S2R#(+K^kK=g;n#ICq^N
zqJ=&R&lIsi<oo`c8Z6DXVamnzyv;*m^&BtE3TwjKVtwSlBJmqDeR1w?Berza^Lw(1
zUx@KRhUi}#P{LhB{%}Ww!Jal;_b=h)W)u5&)W9jpik?S`_|}bDE_+FXO>Z^AGNYK!
z^*8c}6TTSw>k-WPTM?W0n7FIK7ZdVqQ1&clT_>4)OcnY)Xv1`80|$)~^ONU?xVcSm
zY1Ff8w#4hZ2tJjKIHlge(PDi)+n_<~j*SSrYv9}CYWat|UTA1<!)WaWZV>BrL7^Y6
zZfZi?K?csS898vCHv-z&aQJvJ>-9zsj?y61V8ev6A`a_q;y3&H;N8|%e6mB&ox_bR
zZT7}n*BWuM!oY2Z8`-Mvg_>zLv@167q7Nj_InfJKR|&n98u&v4c*RN$F3c5rPb=n?
zArgB|^+bu7&ko1+T$5+w^>=-MK~@xq`moR+oR#Z`3A?Q57HHt}mn1G2?T2APA0sjh
zY<}CsX~X=`wVMszn~J%NOxzRRNVeE8YlMNP#7gYQ8pQSZ7Oze=@VVu+>?-u1vRBOK
z1wGdu5cMd;7vcA8U{Mb@rpf&NaZkhzx8gX8dGZ}2_mliFV5bd{&lU5|TY}$B4bJrt
z>+g`BXWW!H_gg=#74`SPNj=|q(a3uT`onjW70#~}bH5UaKlkd5qsca;tkH9vWaNd>
zFXHNVN71`b&w=NSJm-uCOLjM6tFM7KFPC}INN-4?Ryal*IQp+5|Lc6QO5h(Ad`?_8
z@$CL$K8A|*eN)eGHJW(EPA~9FHZ0eR{0uemOrsa##@R4%iJrHOm-s@o2H9UVA+A8r
zS31<PE?<Myt8LhlDe7mG%m-b(VBfj2zlqu&Cblhm`)@nfpr`I!L6PAh|Le~6U<d8A
zScMaL<4De_CPmzCl%<2SPbjf3E}GirRMC5_EGSI_Pqn!rIWL-~g_&qfqXmmS!P`f=
z<A`cFmG-Kk;8+U|=F5CL-3`n7OeWjrTAJiz0p?3wd_ak=vnJ8NA1i3E)`I$SnfH3R
z;K1bsLRlI0n{0;f7?}h6tFUTdGA;eJf|i);&@olwPkwSo!u_%Iin@yKwXVa2pJXoB
zq(tzV1PThPCf6S<IIu?Im|berWX4j$Gc{DGH)C3e%==zbqpABiy8ZJmTKJI}S&L-u
z^`RQqt|U@H$10*+Gd8^=^CxYT*s?W=DpNsaBJOsvcD`mMECKO!A)<_8j@QGnNn+ni
zZaC^0OUoOo$*QbJcZ<Y#=BRP6GKS`lsG^xVGp>sqSUb5RtnVc1d8C3^VMd&&9X+4(
zz}-=!sW_vWVuqV>=slUU*STWP_%U?q`BGYb*NjVn;P^T<BG;soKCFUp+KeGNG9UO@
ziH!?lsGG5hoK{&7nILo6W+fEL5%hJJT@*3Fj4g#SNB>)e=-6nAyID!LQ)Zk`0#BK)
zLd0hysH&!%Hukq*xSPb|JGw!CIgI+;Eu;G$^(c+{oNpXaB5QUG6(p9@y^;0!*&?&^
zK34=^97R#_<+QV%1%=r%s|(%Glrx^DU9KkGKP;HAN#eyRD#V{1PgldLC^)JfcZPxY
zc5z2(@^D)C!Y-<BFhkEWd)F&5_)<JAd#{|1rPtxUPU0jVH}vX}M%U`f=-3tu;xsak
z`bC8?KFM@{Z8>@VP=|yr;N~`Jl&8j1$X1C??5sz0rNng?lxP^5LJ_B{>5B^%ToEy}
z^VHa!Ig0+$TtSuLb+F6<7ZxkgW#I^ds)7{BX1LYM+-HXhT^r-6bZ!|PaI)Y=zQjDz
z4c}(R(bk*QB3~996*X^1KX)8hpFldnx#u<uCW$$E;j9|Jyg8f_+$yN&QlTNM!~v__
zuw6Zd_BU74`fFw!-2k57Q;i-GlWFR;DyoRAgE~XR)u`a%F`7I*%cxCYJ<iOOxhYqL
zQ*Vu-Ji{)!UR4L@ZQx+D8VNaL>HJIO#5xO>U6DB7>W*vaW2vgFhDy@waUw$E0ZHyq
zhNVz(RvGnDS+Gx>L*l2qpxJ*kEpw=)lpYo=`KO8399JSh`x?D*qMR0;s)MdT=9I-s
z<mZm2_y1K++dM4Lu9Vra&>h~pVrl=33i5~)x-!Z9x~~d_=b|Y1=UN)S)`Hz@Wlosm
zhEi!HeKD_`MupVj{ZD27>&Ggbsfwd11FEUuxETd!W&UuO8V*gdG*Mqg3(nT#;7OT_
zKU3jCK`bphE%fJV!I4bx!3Z_xZxPShRM6jUiFzRBviosYOt_y)S!XI}QG^Adw<Om1
ztC0HZI7-`8ML#==nj_B5n<uF;_re&8_;(HcG_D>V!-ZZZs-T!Xj_z0PqWm^OFRx0h
z9IM2Ec@dNmQcc4ynz3lTIAc^Pfh7|t|6na$USdXslh}^3T%a2|gg!V_N@XKN4GRGG
z+U1VJH-^%u&1IzMUytVxN$jmr;@<HDYF}ANSvTwOVvx)WhPY$j+!#`Ks-%F6^+*~o
z^9_G@1o$V>u!-e#C(MkOzXz9!{O$cNkxJKqd<4INq9*E>sBzRffhHB#kneO0OhTtI
zwQ2;+8%I-v%juTLX`)T$J#$nzxF?F*Hg2b9=3B6?6L?RI8iO6i(!L&*6kTXW{J$ho
zW7W8GAeNR+uO#J2Gy3(G`0^chz+((KuC64V(t?0zWL_68IQN}M1*PS5^{xfS2guye
zPK{;Ccq&{Z>Q$0ht3rRr4k^*UbTS?NX9eAR*^D_ugq}|;u|^p~o2Kld$ve$h?j-a2
zSQlLLdyPgacGAL&7EEc7`OQ%tXntujjb2?w+3CBHH(BP=57gM#JdrluE~l<%EikW^
zI9co|&F@U1d)s$X)_yaFy#hY}7olD2NV;^qiaJ`&FrJmT^D$SvigC0e%t$5TSxBwS
zZwD(eYTrbvf4-8Az9sZ>PUehdO56^gMAcu`lFxWEP6vSv&${CE*pWolqBebQM$CC}
zw%PBFrr|NfH>)V_d_C4gf>#ATi|e0HrmjJ?G|<b8j?+b5+u?%Fy(Uq|tCjTkF?E<b
zTH=(pYG8K~ZT(?8<;RHi)?Vg$lU0x~kqQ^qP*kZIC6GB@)UdLgc$#}uBHw8iIH${8
z^R5bSWF}JPyw9j~Y(4r-mw9HB3LQOSsY<FM<v+~WdRXG393@8dA4}QTNn{f1L(I|T
z*={)C6;Hu2M%tpPN9$u^9~9>a(^n^RzZO=By*>Rr(N*Es(s|+!kHPy-9Vq%c+arEc
zGBx)9|Nrl=(6kh>&u@Re^U`xOg#$4s*>1%oW~N1kd;J*#fgzzmL4gB9`t=VD8WcJp
zFr?q0;J^X>2M!7vG$3d|P;h8yNMPu|-~o1)qvGO{{DeDTZ!u90cEW!+SC9HL-RwU+
zcKsJ0>q($Ty%^8OK!Sxm(*n>F=FXFNk6J4}kKujz6ukfNbDo5J)G6uy7^GU{qy>t9
zI5bZpJ$iEO@)*+EmXRL#EKkBbD%i(iV%oPz=r<T)eLX&hkE*ciV{A66T7>ys>rdi5
zsuTamaMbD+aavYzOjO&p0lyFL+3LkVfF63K5AUhJ2~nXi3UK=+#@T&NMYm_|-w$2A
QHg;c2;Uqrx{tx>8A2uJqcmMzZ

literal 0
HcmV?d00001

diff --git a/test/models/data/domino_output-unet.pth b/test/models/data/domino_output-unet.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9ba6b36de77486f1ce0aedc655a71f9f3154864f
GIT binary patch
literal 5501
zcmb7I30#!b`W|4G??VIyOi|RFGhR1DL~-jqE@Xt_t|2Z60}Pu8vY8p0(anY2Gg6ru
z&G@^gXkI1W_Yf(vx}cdEX}M)-WoEggruR%{KlS4McRzk-z8~M5^PJ~A=Xu}r%{wa0
zMXS*$isnDARvLpQE^S&;YFboUcGmRltghLqmaJ}Z<}9=S^r<N!!7iGK6DQ7fo}I4g
z7^aEMO3R2%^pDEUN=nIejmogZWGAJ>MP*r1Gt)AnW(2yXJ9X?BVUV+A%AV!~OS<!e
zA+8}QY38i44<5RvyF@t2J^|_42p4&eot~Yp8@|jbD_uW2OcUl578j<SpKg#(!n9#d
z^V5}uv9W0>DVEr*q_osb*O3`<mJCbW;H20r*L1bhm~_{y^hRUS-NKv_<?M;hi4V}b
zr#DX24gXc5x<5qK%}@7`Pr`H#)FulP9fO-@rF)J^_saGk6=qP92M1oiAQ?H@sHbXX
z)VMXInSXp*#x!$Q)C@~TrX(5?tT?3m<Ld4J-}}?4Hyq1$w5Nl&*Z4IE{->Z(=9Hv_
z)M=K~tdQW2f8jrz=V|v|JskuFyqtc?Sx)X~Pbc83>Dds0m{+naX%6K>f&>2cybt-L
zBxPl#SR5SQ<mY|#a(+j9ItL$3kA^rnT-hxkB)HrEN?`X~lbAOL$$rjC>ms6H#4aY%
zn+jU-lAX$T#=)>iqxh+WXu>QT)h~^P{0>aGW|Y#@G%M8~w7~weOw8|3R7f^jIn)9X
zXHDW#Hj(#7YpM73NEqnKM2pjc8n3id#qua9{*H;Ven_eNtz`K!5(2gx1^Z0U?#VVP
zX^{v%|1gREXNlYjZM3OT0xbQ^C_;7-&0Aq5V?-nvCBN<QL~(~~^y}Vu*raVQZcHZ%
zy>Fw0OEC~Ln~8;?L}%LDs8jD~2!EFev&8?<&qm?HEl~BMNt9O-m1Ww=y^#g929uy%
zqT}oA)Wa_Z-h0+0u9OpnZ?e*Y+c9v}XcC3r5ZxMMrGOkWG|FQl(^al#v5mae#K5vL
zCMGOIGOo4KWUU#(v?fs+Lez4gjq=r47+c6h@p7UKW*aU0B?ewv%S6l~qW;l#TGuTK
zX3b?HFI-T>p*3`HND>t7WMai0q&%yQ4!4Pi!IJO#t4K@b`2IN-Xc1%-{(S}Y5H{*H
zA{yrHFo`_>5<0csN*P{p@cloS7<0XZ)_dFNh?^PwjxtgIE79OiHoBJ;3kW6=ESKEs
zegS2kje|R0OnjOk_bbd!*7suJ-3}(P;0-~ZgKbo?#{%gACQ-Zv$-mS}AAgVln>R60
z`aaUY;WqO3H^aQ8a{Oqbn$HWU+xS>$7stfTendHAZFJmjffRp}xTO~~Y=D)fwvB-_
z6BFv|NIP0e{XL_hV1r4Vd<Cg&lATibB|v4JQ6xye^&V)Yq1g$rY9SL*p-2;?Kdap3
z{x_HFeYcc;G+3!;R00g^%Eak|NXsOjArs9|`7slB*Aq=YXQPazQLuF{6W_f|WDl{D
z&v7#}@sZzm5fqzfCGQWTA*4%t(V_y$JlRft7Dq$t*Gytiu%LqFR@&Y_9=wM!;rY8<
z|7sh}f7JqA=bJ><CxQn2(?%OMSYYx;Qt$SH{BGN5cy2tn)-y3>wV)X`8#NA%fiE>C
zk-k~btt(dgc}gr4tTu_5Hb}#l*r-jnc&Hs~688=fb$`P~my%<^=NJ>sIuSKWv{Cxm
zSm=02?w{<RbJ|AxuSJ1Al!=UI1PuzX(~h`Ah@Qwq#QP=G{kV;CR!oAAlbJZxyOd7U
z*yxoI3oM*%5?fv*iq_d_Ra-MeZ#9X_uM>g6MkCk7K;#EZ7~e%Y$!w&O8PriGQQwuQ
zxWr1Pax*lO{_S~!Xlu5OlKw9mX36+yGKr|@sEsaMkAV)8Ok&v>qTRn(>G0=~P%8Oe
z@f8&8W22!jCcyB?^8Y}hi0=!iusjY<R2jvdT}ao$t#tKPJX|_y5-S=LjS00=^{q(w
z_EnRZc}39K)dloTm<4W)H;P4?Qu@wdr<!%iaOyoK*bSt+yVp?j_7?D~Hi^5Vh)QI>
zc;#L!yzXWa-eZZ>4{UTtOo11sn1n5uNO!|VbH0j(%gIJDH&alk^h5rESori!ljz)z
zD7VB;_cljC+|Nvmo+#+_GCLi<8V`+LGKq?jNSlV)si?IDCP+UTJOy1CD*g3yB;4*~
z65CaxFC*-fzc~_)#F&JeD^bh>E8PM!j2L1RHZQ5C%%}Szra(~y6XQ0>eA3@eS>s|M
z;1`o9x+VGFk$T3)LCPMJSbI!R?G!uZd~Alj5^vzO5~^4w_jhtET(OwMzMqJi=3B|l
z5(DL>CQ&ByM`5Owq9-T7Gv646M&^&7atmm1T{P6a-%f11h~&D|O1iWd2>;3`1};FV
zn`WocNfziQ^FzQuqTnza*@~kf)GGHoj3}V{TB>`=47OM%>fRG{_=uIJpOpD+9uvuH
z1o`)|(ubpC!Etg`HV$}hpggu5?Z2H|eV=-Abq@&+`d?432Pf#w&yD=I`8rrK_b@8e
zPI$Xc;k`OB-t{9bEHRbiTZ>g}eOTe|EM)w()jGJ?;Q(%FV?aN<$}@g9@-=65&;+W{
z&DjI90u}!D2*!_mYJlWz$MCvN!Dd-1Uz^JK#atbney0X+ru*R7WR<7fG;+~g59!h6
zxUH}m9v!LjypI^~X*2-bD?_!J4vnW&e)fowHyP;+5qHaR!eUSKEK&Km<widLs17QF
zYH)7bXYlT3m5<qI<TXhy5EoX3>w2`n86hgSE@nLZm=4zOufSmwyzuSs6khZj<C)uC
zpxM_|=zC7V+!ZSK3S@ly4_dewP>q_dKKR8(g(qA!^4v&0%sg6yBWxNh->dL5I~o5Z
zLl1k3Yp{!VW89@vIepCdgq?aQx7XsT^FDZQjmkrNG9J}Z2ZuvzvG4;uR`gbRzsp9x
zsKOcEUr>wVR;l>?7Ye@<$@ox%9`^Br=nkGZ?7YI~tuXSuA$llN4`5+gBV4`Qz|S}_
z-ejv52A5Z0(6=6#e_P>c=Z(Cmr5?8YR)vYbH^p5C6z*nboV9TQYe5|rzM;hhEfwDH
zu8}7W)xitr4&lm|^*BFQ<!(ugCm(i(Ud~4`?qVanzEI)$I>sYD*TPj(C06_CaIit;
zw^thZwR<}7>Q;-#B5ssu4=H?`ALE0}dT@TO3J-my!$p%69#PEr4+VN~Yj*$_jPO9O
zttt<fx<s5YK>XN!=;z!V1F95m8OnI^9;x4iLs)dp3F}*_JX&(z)7Jpg7gb?Mk{kZo
zTIH@ejL!@-z^m2!aBvSD4xOU%t>s3JpoM_1YjDpHJ-WA6`1E6pcfPHK^)1SASdus9
zA658~iIPj14ratuV0D!%y024s|7nbe#_A#C#{=kpw*?M+N99FPjGvLu!V7Eg?o4+~
z36p!dk?~aNt4Ys)f^|b(u(9NP?~0KJ#~I+9T7%X`9{AQ&m9LkcteK^SZQH6bu&s)B
zcd2~Tc*d7JuZJ<GYcXPFGaNrp;RgyBUs<k$4JWFxxT_9hq*ep28+lvlgHx|o;pO@!
zIAx2<du?GnGEWaH-#vh5>|WUU3zg4#nQ?8c22$Uw!c{gM9y_7(-Pw${`C1RY%d7CU
z#!h(ZeT7F)Wqj5U1Drgu5AXZ9;|dRzE1dD1wQ?^f)!+taH*~+M@a=U*P6h+CDXqeq
zXBwk9TgFTq#%FiY1J)l#_oTZet{YVT!wn<fvfKr}^Qyu*o87VgONCFoZ{%JjE|8O7
zg+t5qcx8sdU+gXU&Co-SxAx(s@y+pSg2HdV&Ujc4E&Q^h7N7O>#Fo#y^0-lqx9qQj
z`0W*VEaZMksE^csIODsder>iNz?g#yR%uiovyAZ)sg-YW75=lGH#X}bW2QCZ<Nu|D
zRcb9RT<ne$+p2t+)c(R^Jp@)(V6nG5cH}A_8Nm3>eOfp?r~-3VdgE;;S02)V@p?}^
zP|{)a%~jA@YBer~@lWsQz<qKBUg+YDH{~2H-e!E0s}7F!JA}z+oUqLvnVSlX{CJr&
zq<QbhPZ#MiOy<8PlNoQkMh|T@2hn$jJGQD;`HrPhQx6vi4%&~-W<8cjO_v=q@?%EH
z`KxlAKEM;t$Y;B^82Ol!T6m^(KmO?IiF4mlxW`II>>Hrx*(&rS1C~9n^89xgKebT{
z+Q2Ft6!|Q6mU(URI3rIUql3bchj8XE8tjs-@CzduU$aOH=Zw{un$rX;<k*Ydq&LQD
zVRTU?=C*2#OTJO~nm%%mlC*F`#*FuN6+0WH*NPb5{f!IcRaD~fb1pbwr@~WZO#d3F
zg~BBValpTt;Erz<o+tIY@SG0Tt*yl~Q<~$vNQK`R%=o^&dRQdmV{y1U298j8%U+CM
zIxo+K1qZS7b6&XdLxnF3l5rGbfFYAA5PX~9<>M;<_JZ_Kww$AB4TiRI$2@OWzIl*b
z$7@=6E}#ZGb}-<EISOAloAJBx2IzjG5}z+>iA6FW{4k60=3i;y-ppD&H`NKxhbw$z
zF5{yMUBIbxHQvniz_})sAC_J#oudQ&@M=6atr1oYQhC59M&2h}5AmB1VpWm{o*u38
zP6ozz3{hZr^?oc?8)N<N2EM@&TT68iFjMY1df??vDnBW8Ieys%*yKvAdCvp2(gzzl
zGk(pYz{Q9v%)8$XbEL17vGUAI(!sM)2k}^(2U?v}{<1gY{rwgAtW_nR`qCB6YgPVz
z72~EH9n|S7(D!#Y49-`0?bk-`xk(E{*O%kHg>E?g1y^35%=jHQ1u`uAas3HjTxL-@
z^Odon(SgU>8az401+)6e_<hUBwavBAt@sGW*16%$c$tUf`^4-nXY1-@sl?Hq{yx!C
z)3KrViN9(LroZ(<(Zkg-<57FUOaK4x|4y0?4bABP$B*w!OGc8cK+G~bN-<GMaUsF%
z?+^$G>ean_K%bz%9=*Ev?bSOVD6ntOfZjd&^bHIM>?^OJUVVdl_U;knV41gDeW9bQ
z4mjGVFeeA$zbaRc)@NEdzIg2VFLkUZfgY{J7#{=aDa$hr06kIMc@po@(n|Zs@RmOX
z??37}PeMLgDe-*_(o<^E0L8y5G*2Qu`sV8M7*c-2NDt~PPr^KE*vDZ;sSOhP14fp<
z9^b=9UD)z5HtU);2=k}cpTv38C!HR{@$_sEr(p+=3e)TT{v6)MrR!fn4{N0l@3DUv
jQKKoA;Eqd<bJRICt(rMLA6E5rj=Gjck$;Z<5BmNa5aJ}4

literal 0
HcmV?d00001

diff --git a/test/models/data/domino_output.pth b/test/models/data/domino_output.pth
deleted file mode 100644
index 432b105c9dd4efba3e386de72a9e97e2d348c794..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 5255
zcmbVQ2~^e9_Qna3NQo5Z*?i`Jh$uvRn<CtSd%0Y$h&Unw0&)ex3!2Z&L<K=X0R=%3
zh<Q{H5dBV>QsM+ofFk+KIkg;~nOWAmH>;&q|NmR>thLu!>)f-y^PPS6J^TDVwW32u
zBO?<NqdzrEBQqnv(8WO^q28epx+M|1v3@=}AFCz7T8X2H(X8Lv>h>Rm8I4dF`RYQ$
zeFLq$BXmL9rKaBD{tF_4w0_<?|B$7j;ogxpreW<yjPUBLUuCI&rcZ!>So`NLrV?$a
zk8axYLDR4fUhVXAtiw8bb<p>|OCrKLxo&Q!3o~|C7%AE*{1hFd!#e9n6de`qqQgu!
z`1*!wwf?@kpwN(|rf%VW{^9<9PC>pp)37dYYr;%*VXtYz%oObc^{WTA4}6Y3F6{Nd
zPOgvis9k?Y?Gzo>O+TXOWI#3F5NL3ILl<VD341fbO0DQ@vdGEiyK8zz22=lS%=#O>
z@k-3B7KVl|_R)Dq`iC#obCfu~{zvG3jo>%~c+I`GD8sf5_V?gDjT~M9U+})pKh&^i
ziQ~kVVShiK|F_o$>2z9u1BrEfwYRaJfWiKrM0X?mS4bGLVyv~q(e{60;Nh+mk0-ma
z$<AUtS-71-hP#M}4({Bos21&0c2LBJa*;lII$yTghbOC3sP1Ehu&!3K_O)WHJeo%1
z0#xEi?-}eqv>LO9?4pC73bCNrgR}PSf&G+Sv{$AO<`Hh({$@2s&P}IB8<gT|s0VNR
zJ|DM2is_J_OpMm5xV7q2JngWZKC^Wd$;N8FujljQ)pW8kbr&PMX?XV6#jx97N|)*-
zV(&V4*5y>9U(a+Z*y$?bGc>$uTQMxgWYD`i)#9p=8+Y4Viy?zD>B1BjF-tXzTl-a@
zv40XhOp*xCMt2^VR)#vaBzlLGqBT*&70FfDcx5}8*1Cz?Ev~%3vKq&3ZYQ(R67g)U
zhNqPjBI;r+(KDI&%5)Al-l~G+!xY+;uMo-S)%@j^{m5Cdjk>6mBJ#K=SAA8ErOK^z
z{GMERwzzZ5of23*$f6d7QcRxi!E?F-eS@><gFaGmqE5q;rj}xnCZ1w$D@5H+H&(4F
zfjT&cMo*9k<Jlg(ZfHIhe3VXRhxGBipyvH)hv2d!p3(-miUoz!c=r7ocy>*tvNsgM
z_7@jkb>jd!cTJ=`Qj5%oZk#t_4=NUA(6Sj$;!UL+f7rhgvd_~fxx0&yI=FG#g(6I;
zOQq7qa^Yp7<mS9R2)dI>#!Yf@bi9f;@2<d*iHWrQef@bgYS`h2av1l@qwlPg!ezW0
z|0A^)pEhMu`E0rPXxTI#)}<218&l}pp)#R6q2c_>O8jD%M6Kyl#iATF*SXX}Tar%p
zQ4%p{f{JCA%i(XCOCibnc!p`XZbB^<4Bkm*cbvtXBzHD;E5(pCNp$C3SD}5V;P(G2
z#_g#)Xl-Yu=u<U^t+RLI?$&gYJ#-Yubkq1JX$c;kPNMvFG7%cCk9Xl-oVQD%U_Td8
zROrE1ODdtV*h<?%m15>E8a6vq4b`))wANoPh8BBqU33lFM(?D#-4$YRwi{o&U4{VL
zt#s})rH~1I{=Hp}$n<U0maG;XE~<ElKk!xWVlr-V5eb9c`KL*H(a<%WmdsL#!6QAm
z-lqh!x~0*~2!)una2i|o-iI&Jv*{O=ix}DL%Buaf=;OSd9v^lQUi$b*=T*bfVl%BG
zSJ8TX8fR)sQ9U$;Vz0`?o^jK8`Qs`)oSRDB8|C70s+uh)9f19g?R2?CDZU-#!7DqJ
zVb7GE<n@+B@Ju!5Tq}V6;q5f(6S=sUrREqe!OfZ5$hKT2HYz<ha90^_^@yX94bDPU
z>(0xf%Wxq#i6T}jg;8HWR;tP{>~IFfl*`45@@c%NvIe)T@+eU1Bv!`F;EDq^sDGM5
z89IenJ4D6W3$>7xr&DHuN?3Jv=bI<?A;vVFN`8`wD_1o9WY<1i_e!DOk5%GPtA2m>
zS@0{#qkYaQ@g&8Ix0jS)_@)%<6{rwpJ2m|9Vi_{ZlBgiqMHu_Lv)Z~Cg)tR0$VDv%
zl}+cB4MnJuY@#a_uEO>^70<V-!jT)PG{r?B_U(7$k(aA5ur-NR>G$W+PRYi>dob)>
zJ)bpl@xf3J{@39uOnR72F;nEi=_>`_@u-2(zBt<Wg`3EcPve%yrP$h0e}A1+h&jP(
zmTcG$i`nTEJ<VB+bnxK#pMfz;;_2t3PU8Fn72mqE8?i@H$<5AH*qgg?$jl04EZ#;Q
z@2bSe`5IO~C_tZ0S(MY+MYN4q^6lI`=rv>qU5V2FA1%}zXi^EQ(n5;dsul;6m7Md{
z0aVoIQ`Bi^Q8iA*KMbnDbo*>Ntj~|j*-F+NDM7q6iEd6;3GYTXwz*%5yvr%{v#Cs!
zu5;sqUlhX6DucdR<tAz(J=kpTZmgM_L2D}|;v-8X4@;{+&f`o{Tgye?>l*HOy9&dG
zXHe!urI_>XG;Z5ag$A>1I$)_3vHJT%JEs^&D1!`@YZ+;mPS%$#gZ*EXYtmm;u5l7a
zhyST^Juj_Z;ZnX!o6t*nmR8^O!}yLlxVzhlo%$!Ew)!F&k6etr;bpjJ<j9xiZo|C$
zmniwj9H`$c#blL?7xYX(^6(23dp!utWAkyaLBTs}l5u(WIdbR|fUYz5AhEBUy`Cna
zDf1ld|7a$hLW?lz7b!cA+JZGEEp&c{H~LkU;zo>=-*bz?`g3RK&94?<b4wmpBuZEo
z6NRp=_4G&+4&~BZWVX2Q%Du7hT6>lXlICLj=q#+<=fqu>ry=H03uTtffYPB7Azw;a
z9F0Mm>J)YFG#}!f0$h0{=i~>m(A0iT5y4uVa@>va51qN~<O;|d4w8c;0;Lyf5Ps2_
z2h^`eq49aD%kV-xiqLXi#w+Aec>C@V@}3)xJ_8Go(?`nnpKrjYaZPl4Yb4C0vN1S8
z%3s`%MT6f7+UTK0*6n;Kz2#iknu;^sn<?JO56AA6Lo!*yx4J~Za@H|YckqK}Y93mL
z$T{;#B1ZbOk@e(2jP@&oS*x7C+ZBcS1&1g+F9JU;Duv^27uNOMgl`u&Q-nqh^`PDG
zyDnj`XY28^;~C2JUJUcXJnU@m%xS&i;5FbRnZF&3!$D=ZZY*aP&-KWBuaPe0hhX1(
zSy<jp!7ES4!ggviCCv<j^y5O%aw)f8w*|@vO=KAsfR#Q4=pm$BN?T#m@k`q1tHo!D
z)!0-&mFteJz%kboWZ8Qm`jk{);Bg5jkKKYB)y?F&#S1Y*it%B%oUJx&!Q`~F^z?^?
z2!D`+8~2=fMCT1y<8g*mQ9+pMkPG`8Qm)hI<m!Jm(8(S?ka*_d*#l?3WD$qeO~+`^
zT`eN?{MsFmv9@LdY%Vm@zhBd0w?!7#H%wtO*CZ^u{Uu@VBxEWIFnzF;w|>0={n}2`
zqbp&MO)Nw&TPZ6JufdASPYBmn;h;q|7Q8-{<2EOtYt>oGeKrGmbBf^6PQfV+3GfVR
zq+@sHBGSJU!@iYpzwFKE`+W-?KkbhNJ@fHNoQwyKUWYk-&rtl)0Ng0ZMx?QXdmT!E
zt@9a5o-r4_CgfmCmV~<uTZ_PdoF<3DU_7zS#Ey>@oO(YA^`?!a{oWt_nu~F%zl1fd
z>+$sIQL0D^M)#0B{A42K<-HRz(ef1S%b%_HE5__PDesDlgUN$4bXUF@!<$QC{<VyG
zR2<S4pQ7VC7D9V12TKP^+4#qJ`0AR-{QFs0d#VW8*PYnPDisHRK1UrjLFm(N57L7q
zY~#KWp*vgX`+@#AUyy^z<DK|U<~B5&H&J$!1P!n6LGa_L>>rSfpEH~2qcA_rnOBHU
ztmWK&P#i7~I7e%S1)$768^?x7`Lj!j*sp1!4EUhJvjp#Vlyh6xWMu4Uro}#iSlhD#
zce12B`Aj?(zNbH<`~JB2EDzteNZE$sk+-UeroG{dxXN6N&2!=st3(v+J4d<w=Hc>^
zLRkDL<p+^*nD29*ro?HGysHrZte3HQ=UC)lY@)8+0%3I`2hAVKdHD7X*i+d=t16ek
zq%;?@w`BaFJ`T&98tF<mUl>OgLpwvkqk61E!I;Bj^}{0AjV;G(ha`MBeG87SZK4Z9
z{Lt1Z7dh?aTo<K}!G#N?ygL^?w(Ld5H*$9P_cmDdYaj<Tq3^L$jM?kRCSF-MRMSF5
zr-E=RVGl;kmGiJZu?Se%L}dj5NIjSXm6ePqZjQ$62`5P%?T@21#mH=yaoyleXj{=j
z$=m%AF3Ck%os4^!Z^l&JIhypjADjo~W1@qE+nz>ahg}1uT+^bXrU(;P%X!Q@@$gii
zrrEh}(2g#|cDXa3I}?io$)_ni!5cwSvT><R!mifq(S6%-iklt|FS|^bPj}Y8!4r`0
za+>-}d~vRMKgxcQ@W!7KkQ3ZU3#Bt)HL?I@YG-!(W*sDB&eMREp3n}?!m&>zoH!vF
ztrITNREfT(1mz;l(V0ixTZ!({6SO4Q7uRl-p}Ut8&l?hrK9kQ-(5PU9AIryMX9@Rr
zUxRtOPEyzT;jpO72EK9T6Qed__xZDQ<Lp9=eV7TqgZjF)YzundKTES7`Qm|F4u1Gj
z#*eZS(9-WT&2R6GAMO>Qy4;zws@LIW<O#Z#>jzp^2(tllzCSDxk6kWN;F18$e6JMA
z*JK<YpMZ~^oTd9oAvpE=$9Np)!UygrBR-&s?pAxE{#p^XnK|>+DY3ZQvy~RvhG71o
zo#?qx&N_XaNi;u6Mqkgvy`e>@3zu;HqcnUrzL}zGv>4o44vU*ozB)M`@ypIpqE3Ie
zH|FDN`^g+QFcX6kF3?r|J-K;tEzZ?Sd4Op&_L?7|M+w2Oj4np(1PO<{z78tQNpc+*
ziTgXVF?O7cW6Yv)rRofw9lQj$d^7Otdm^&9$0@y3VX(h{PxLSv^U8bTk8k|9|N95U
zcvHiQr!xYkx7UAn8|?2_9A-57l@+ai{rFz$A0FhR4O-!2P(i$d{3MPe{%Zo`CfL}%
zWA~2D1Zx}HiS`rhtnIDGTie;$T3b)F9siD<-9#Ik@izJa1Ix@Y-E2na85&HjXlEe%
zhjR3yK4NM3;idN1I?}5cFUlbOU-GxrD<dysyrQeT3iYCx(eov!Eq{XgLq&NN<i&So
z_m@B{^?QGr&vO;zPY5q$(#r^^^orYGAn4_wm*e_kX7qT8!=N`_MfffMUWIt^*oM6X
zk@zQwzfLH%!q|B9Z=M4>jQIn@Am<s{@BKFh0ZZTPEuQy!zh8fP>$kl>*S9(u6ehh-
LXCuS>=X3u9)v$>-

diff --git a/test/models/domino/test_domino.py b/test/models/domino/test_domino.py
index e606b9ecf8..7e0643b92f 100644
--- a/test/models/domino/test_domino.py
+++ b/test/models/domino/test_domino.py
@@ -59,110 +59,118 @@ def validate_domino(
         return compare_output(output, output_target, rtol, atol)
 
 
-@import_or_fail("warp")
-@pytest.mark.parametrize("device", ["cuda:0"])
-def test_domino_forward(device, pytestconfig):
-    """Test domino forward pass"""
+@dataclass
+class model_params:
+    @dataclass
+    class geometry_rep:
+        @dataclass
+        class geo_conv:
+            base_neurons: int = 32
+            base_neurons_in: int = 1
+            base_neurons_out: int = 1
+            surface_hops: int = 1
+            volume_hops: int = 1
+            volume_radii: Sequence = (0.1, 0.5, 1.0, 2.5)
+            volume_neighbors_in_radius: Sequence = (32, 64, 128, 256)
+            surface_radii: Sequence = (0.01, 0.05, 1.0)
+            surface_neighbors_in_radius: Sequence = (8, 16, 128)
+            activation: str = "gelu"
+            fourier_features: bool = False
+            num_modes: int = 5
 
-    from physicsnemo.models.domino.model import DoMINO
+        @dataclass
+        class geo_processor:
+            base_filters: int = 8
+            activation: str = "gelu"
+            processor_type: str = "unet"
+            self_attention: bool = False
+            cross_attention: bool = False
+            volume_sdf_scaling_factor: Sequence = (0.04,)
+            surface_sdf_scaling_factor: Sequence = (0.01, 0.02, 0.04)
 
-    torch.manual_seed(0)
+        base_filters: int = 8
+        geo_conv = geo_conv
+        geo_processor = geo_processor
 
     @dataclass
-    class model_params:
-        @dataclass
-        class geometry_rep:
-            @dataclass
-            class geo_conv:
-                base_neurons: int = 32
-                base_neurons_in: int = 8
-                base_neurons_out: int = 8
-                surface_hops: int = 1
-                volume_hops: int = 1
-                volume_radii: Sequence = (0.1, 0.5)
-                volume_neighbors_in_radius: Sequence = (10, 10)
-                surface_radii: Sequence = (0.05,)
-                surface_neighbors_in_radius: Sequence = (10,)
-                activation: str = "relu"
-                fourier_features: bool = False
-                num_modes: int = 5
-
-            @dataclass
-            class geo_processor:
-                base_filters: int = 8
-                activation: str = "relu"
-                processor_type: str = "unet"
-                self_attention: bool = True
-                cross_attention: bool = False
+    class geometry_local:
+        base_layer: int = 512
+        volume_neighbors_in_radius: Sequence = (64, 128)
+        surface_neighbors_in_radius: Sequence = (32, 128)
+        volume_radii: Sequence = (0.1, 0.25)
+        surface_radii: Sequence = (0.05, 0.25)
 
-            base_filters: int = 8
-            geo_conv = geo_conv
-            geo_processor = geo_processor
+    @dataclass
+    class nn_basis_functions:
+        base_layer: int = 512
+        fourier_features: bool = True
+        num_modes: int = 5
+        activation: str = "gelu"
 
-        @dataclass
-        class geometry_local:
-            base_layer: int = 512
-            volume_neighbors_in_radius: Sequence = (128, 128)
-            surface_neighbors_in_radius: Sequence = (128,)
-            volume_radii: Sequence = (0.05, 0.1)
-            surface_radii: Sequence = (0.05,)
+    @dataclass
+    class local_point_conv:
+        activation: str = "gelu"
 
-        @dataclass
-        class nn_basis_functions:
-            base_layer: int = 512
-            fourier_features: bool = False
-            num_modes: int = 5
-            activation: str = "relu"
+    @dataclass
+    class aggregation_model:
+        base_layer: int = 512
+        activation: str = "gelu"
 
-        @dataclass
-        class local_point_conv:
-            activation: str = "relu"
+    @dataclass
+    class position_encoder:
+        base_neurons: int = 512
+        activation: str = "gelu"
+        fourier_features: bool = True
+        num_modes: int = 5
 
-        @dataclass
-        class aggregation_model:
-            base_layer: int = 512
-            activation: str = "relu"
+    @dataclass
+    class parameter_model:
+        base_layer: int = 512
+        fourier_features: bool = False
+        num_modes: int = 5
+        activation: str = "gelu"
+
+    model_type: str = "combined"
+    activation: str = "gelu"
+    interp_res: Sequence = (128, 64, 64)
+    use_sdf_in_basis_func: bool = True
+    positional_encoding: bool = False
+    surface_neighbors: bool = True
+    num_neighbors_surface: int = 7
+    num_neighbors_volume: int = 10
+    use_surface_normals: bool = True
+    use_surface_area: bool = True
+    encode_parameters: bool = False
+    combine_volume_surface: bool = False
+    geometry_encoding_type: str = "both"
+    solution_calculation_mode: str = "two-loop"
+    geometry_rep = geometry_rep
+    nn_basis_functions = nn_basis_functions
+    aggregation_model = aggregation_model
+    position_encoder = position_encoder
+    geometry_local = geometry_local
 
-        @dataclass
-        class position_encoder:
-            base_neurons: int = 512
-            activation: str = "relu"
-            fourier_features: bool = False
-            num_modes: int = 5
 
-        @dataclass
-        class parameter_model:
-            base_layer: int = 512
-            fourier_features: bool = True
-            num_modes: int = 5
-            activation: str = "relu"
-
-        model_type: str = "combined"
-        activation: str = "relu"
-        interp_res: Sequence = (128, 128, 128)
-        use_sdf_in_basis_func: bool = True
-        positional_encoding: bool = False
-        surface_neighbors: bool = True
-        num_neighbors_surface: int = 7
-        num_neighbors_volume: int = 7
-        use_surface_normals: bool = True
-        use_surface_area: bool = True
-        encode_parameters: bool = False
-        combine_volume_surface: bool = False
-        geometry_encoding_type: str = "both"
-        solution_calculation_mode: str = "two-loop"
-        geometry_rep = geometry_rep
-        nn_basis_functions = nn_basis_functions
-        aggregation_model = aggregation_model
-        position_encoder = position_encoder
-        geometry_local = geometry_local
+@import_or_fail("warp")
+@pytest.mark.parametrize("device", ["cuda:0"])
+@pytest.mark.parametrize("processor_type", ["unet", "conv"])
+def test_domino_forward(device, processor_type, pytestconfig):
+    """Test domino forward pass"""
+
+    from physicsnemo.models.domino.model import DoMINO
+
+    torch.manual_seed(0)
+
+    params = model_params()
+
+    params.geometry_rep.geo_processor.processor_type = processor_type
 
     model = DoMINO(
         input_features=3,
         output_features_vol=4,
         output_features_surf=5,
         global_features=2,
-        model_parameters=model_params,
+        model_parameters=params,
     ).to(device)
 
     bsize = 1
@@ -213,5 +221,8 @@ class parameter_model:
     }
 
     assert validate_domino(
-        model, input_dict, file_name="domino_output.pth", device=device
+        model,
+        input_dict,
+        file_name=f"domino_output-{processor_type}.pth",
+        device=device,
     )
diff --git a/test/models/domino/test_domino_geometry_rep.py b/test/models/domino/test_domino_geometry_rep.py
index 628e760aa5..940d64a9df 100644
--- a/test/models/domino/test_domino_geometry_rep.py
+++ b/test/models/domino/test_domino_geometry_rep.py
@@ -16,6 +16,7 @@
 
 from dataclasses import dataclass
 
+import numpy as np
 import pytest
 import torch
 
@@ -36,19 +37,27 @@ class TestParams:
         base_neurons: int = 32
         base_neurons_in: int = 8
         fourier_features: bool = False
+        neighbors_in_radius: int = 8
         num_modes: int = 5
         activation: str = act
 
     params = TestParams()
     params.fourier_features = fourier_features
 
+    input_features = 3
+
     grid_resolution = [32, 32, 32]
 
     layer = GeoConvOut(
-        input_features=3, model_parameters=params, grid_resolution=grid_resolution
+        input_features=input_features,
+        neighbors_in_radius=params.neighbors_in_radius,
+        model_parameters=params,
+        grid_resolution=grid_resolution,
     ).to(device)
 
-    x = torch.randn(1, 32 * 32 * 32, 10, 3).to(device)
+    x = torch.randn(1, np.prod(grid_resolution), params.neighbors_in_radius, 3).to(
+        device
+    )
     grid = torch.randn(1, *grid_resolution, 3).to(device)
 
     output = layer(x, grid)

From e91f2630b30ed8256173d13e891f67c2efe987d1 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 11:53:36 -0500
Subject: [PATCH 88/98] Hopefully fix inference script

---
 .../external_aerodynamics/domino/src/inference_on_stl.py    | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
index 89d7a9ba24..7276807bfa 100644
--- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
+++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py
@@ -63,9 +63,7 @@
     DoMINODataPipe,
     create_domino_dataset,
 )
-from physicsnemo.datapipes.cae.drivaer_ml_dataset import (
-    DrivaerMLDataset,
-)
+
 
 from physicsnemo.models.domino.model import DoMINO
 from physicsnemo.utils.domino.utils import sample_points_on_mesh
@@ -378,7 +376,7 @@ def inference_on_single_stl(
 
 
 def inference_epoch(
-    dataloader: DrivaerMLDataset,
+    dataloader: DoMINODataPipe,
     sampler: DistributedSampler,
     model: DoMINO,
     gpu_handle: int,

From e9dbac9c1c4b92f52936dcb270753ba270bdc00e Mon Sep 17 00:00:00 2001
From: Rishikesh Ranade <dr.rranade@gmail.com>
Date: Fri, 17 Oct 2025 11:51:53 -0700
Subject: [PATCH 89/98] fixes to scaling and adding configs

---
 .../domino/src/conf/config.yaml               | 23 ++++++++++-
 .../external_aerodynamics/domino/src/loss.py  | 13 +++---
 .../external_aerodynamics/domino/src/test.py  |  9 +++-
 .../external_aerodynamics/domino/src/train.py | 41 ++++++++++++++++---
 .../external_aerodynamics/domino/src/utils.py |  8 ++--
 physicsnemo/datapipes/cae/cae_dataset.py      | 22 +++++-----
 physicsnemo/datapipes/cae/domino_datapipe.py  |  8 ++--
 7 files changed, 89 insertions(+), 35 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
index b1b25515b4..b074681ce4 100644
--- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
+++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml
@@ -86,7 +86,7 @@ data: # Input directory for training and validation data
   sampling: true
   scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl
   volume_sample_from_disk: true
-  max_samples_for_statistics: 100
+  max_samples_for_statistics: 200
 
 # ┌───────────────────────────────────────────┐
 # │          Domain Parallelism Settings      │
@@ -118,7 +118,7 @@ model:
   use_surface_normals: true # Use surface normals and surface areas for surface computation?
   use_surface_area: true # Use only surface normals and not surface area
   integral_loss_scaling_factor: 100 # Scale integral loss by this factor
-  normalization: mean_std_scaling # or min_max_scaling
+  normalization: min_max_scaling # or mean_std_scaling
   encode_parameters: false # encode inlet velocity and air density in the model
   surf_loss_scaling: 5.0 # scale surface loss with this factor in combined mode
   vol_loss_scaling: 1.0 # scale volume loss with this factor in combined mode
@@ -188,6 +188,25 @@ train: # Training configurable parameters
     drop_last: false
   checkpoint_dir: /user/models/ # Use only for retraining
   add_physics_loss: false
+  lr_scheduler:
+    name: MultiStepLR # Also supports CosineAnnealingLR  
+    milestones: [50, 200, 400, 500, 600, 700, 800, 900] # only used if lr_scheduler is MultiStepLR
+    gamma: 0.5 # only used if lr_scheduler is MultiStepLR
+    T_max: ${train.epochs} # only used if lr_scheduler is CosineAnnealingLR
+    eta_min: 1e-6 # only used if lr_scheduler is CosineAnnealingLR
+  optimizer:
+    name: Adam # or AdamW
+    lr: 0.001
+    weight_decay: 0.0
+  amp:
+    enabled: true
+    autocast:
+      dtype: torch.float16
+    scaler:
+      _target_: torch.cuda.amp.GradScaler
+      enabled: ${..enabled}
+    clip_grad: true
+    grad_max_norm: 2.0
 
 
 # ┌───────────────────────────────────────────┐
diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py
index 60c52beb32..cb161cb06e 100644
--- a/examples/cfd/external_aerodynamics/domino/src/loss.py
+++ b/examples/cfd/external_aerodynamics/domino/src/loss.py
@@ -227,8 +227,8 @@ def loss_fn(
 
     num = torch.sum(mask * (output - target) ** 2.0, dims)
     if loss_type == "rmse":
-        denom = torch.sum(mask * target**2.0, dims)
-        loss = torch.mean(torch.sqrt(num / denom))
+        denom = torch.sum(mask * (target - torch.mean(target, (0, 1)))**2.0, dims)
+        loss = torch.mean(num / denom)
     elif loss_type == "mse":
         denom = torch.sum(mask)
         loss = torch.mean(num / denom)
@@ -306,12 +306,12 @@ def loss_fn_surface(
         masked_loss_pres = numerator
         masked_loss_ws = torch.sum(vector_diff_sq)
     else:
-        denom = torch.mean((target_scalar) ** 2.0)
+        denom = torch.mean((target_scalar - torch.mean(target_scalar, (0, 1))) ** 2.0)
         masked_loss_pres = numerator / denom
 
         # Compute the mean diff**2 of the vector component, leave the last dimension:
         masked_loss_ws_num = vector_diff_sq
-        masked_loss_ws_denom = torch.mean((target_vector) ** 2.0, (0, 1))
+        masked_loss_ws_denom = torch.mean((target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1))
         masked_loss_ws = torch.sum(masked_loss_ws_num / masked_loss_ws_denom)
 
     loss = masked_loss_pres + masked_loss_ws
@@ -359,13 +359,12 @@ def loss_fn_area(
     # Compute the mean diff**2 of the scalar component:
     masked_loss_pres = torch.mean(((output_scalar - target_scalar) ** 2.0), dim=(0, 1))
     if loss_type == "rmse":
-        masked_loss_pres /= torch.mean(target_scalar**2.0, dim=(0, 1))
+        masked_loss_pres /= torch.mean((target_scalar-torch.mean(target_scalar, (0, 1)))**2.0, dim=(0, 1))
 
     # Compute the mean diff**2 of the vector component, leave the last dimension:
     masked_loss_ws = torch.mean((target_vector - output_vector) ** 2.0, (0, 1))
-
     if loss_type == "rmse":
-        masked_loss_ws /= torch.mean((target_vector) ** 2.0, (0, 1))
+        masked_loss_ws /= torch.mean((target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1))
 
     # Combine the scalar and vector components:
     loss = 0.25 * (masked_loss_pres + torch.sum(masked_loss_ws))
diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py
index 2fa0de34f7..d00d6dcd8d 100644
--- a/examples/cfd/external_aerodynamics/domino/src/test.py
+++ b/examples/cfd/external_aerodynamics/domino/src/test.py
@@ -56,6 +56,7 @@
 from physicsnemo.distributed import DistributedManager
 from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataPipe
 from physicsnemo.models.domino.model import DoMINO
+from physicsnemo.models.domino.geometry_rep import scale_sdf
 from physicsnemo.utils.domino.utils import *
 from physicsnemo.utils.domino.vtk_file_utils import *
 from physicsnemo.utils.sdf import signed_distance_field
@@ -159,7 +160,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
             prediction_vol = torch.zeros_like(target_vol)
             num_points = volume_mesh_centers.shape[1]
             subdomain_points = int(np.floor(num_points / point_batch_size))
-
+            sdf_scaling_factor = cfg.model.geometry_rep.geo_processor.volume_sdf_scaling_factor
             start_time = time.time()
 
             for p in range(subdomain_points + 1):
@@ -171,6 +172,11 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                         :, start_idx:end_idx
                     ]
                     sdf_nodes_batch = sdf_nodes[:, start_idx:end_idx]
+                    scaled_sdf_nodes_batch = []
+                    for p in range(len(sdf_scaling_factor)):
+                        scaled_sdf_nodes_batch.append(scale_sdf(sdf_nodes_batch, sdf_scaling_factor[p]))
+                    scaled_sdf_nodes_batch = torch.cat(scaled_sdf_nodes_batch, dim=-1)
+
                     pos_volume_closest_batch = pos_volume_closest[:, start_idx:end_idx]
                     pos_normals_com_batch = pos_volume_center_of_mass[
                         :, start_idx:end_idx
@@ -184,6 +190,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                         pos_encoding_all = torch.cat(
                             (
                                 sdf_nodes_batch,
+                                scaled_sdf_nodes_batch,
                                 pos_volume_closest_batch,
                                 pos_normals_com_batch,
                             ),
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 070bb71412..9758ed7e2f 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -100,6 +100,7 @@ def validation_step(
     bounding_box: torch.Tensor | None = None,
     vol_factors: torch.Tensor | None = None,
     add_physics_loss=False,
+    autocast_enabled=None,
 ):
     dm = DistributedManager()
     running_vloss = 0.0
@@ -109,7 +110,7 @@ def validation_step(
         for i_batch, sample_batched in enumerate(dataloader):
             sampled_batched = dict_to_device(sample_batched, device)
 
-            with autocast("cuda", enabled=True, cache_enabled=False):
+            with autocast("cuda", enabled=autocast_enabled, cache_enabled=False):
                 if add_physics_loss:
                     prediction_vol, prediction_surf = model(
                         sampled_batched, return_volume_neighbors=True
@@ -189,6 +190,9 @@ def train_epoch(
     vol_factors: torch.Tensor | None = None,
     surf_factors: torch.Tensor | None = None,
     add_physics_loss=False,
+    autocast_enabled=None,
+    grad_clip_enabled=None,
+    grad_max_norm=None,
 ):
     dm = DistributedManager()
 
@@ -205,8 +209,7 @@ def train_epoch(
             io_end_time = time.perf_counter()
             if add_physics_loss:
                 autocast_enabled = False
-            else:
-                autocast_enabled = True
+        
             with autocast("cuda", enabled=autocast_enabled, cache_enabled=False):
                 with nvtx.range("Model Forward Pass"):
                     if add_physics_loss:
@@ -251,6 +254,14 @@ def train_epoch(
             scaler.scale(loss).backward()
 
             if ((i_batch + 1) % loss_interval == 0) or (i_batch + 1 == len(dataloader)):
+                if grad_clip_enabled:
+                    # Unscales the gradients of optimizer's assigned params in-place.
+                    scaler.unscale_(optimizer)
+
+                    # Since the gradients of optimizer's assigned params are unscaled, clips as usual.
+                    torch.nn.utils.clip_grad_norm_(
+                        model.parameters(), grad_max_norm
+                    )
                 scaler.step(optimizer)
                 scaler.update()
                 optimizer.zero_grad()
@@ -483,10 +494,24 @@ def main(cfg: DictConfig) -> None:
     # Initialize optimzer and gradient scaler
     ######################################################
 
-    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
-    scheduler = torch.optim.lr_scheduler.MultiStepLR(
-        optimizer, milestones=[50, 100, 200, 250, 300, 350, 400, 450], gamma=0.5
+    optimizer_class = None
+    if cfg.train.optimizer.name == "Adam":
+        optimizer_class = torch.optim.Adam
+    elif cfg.train.optimizer.name == "AdamW":
+        optimizer_class = torch.optim.AdamW
+    else:
+        raise ValueError(f"Unsupported optimizer: {cfg.train.optimizer.name}")
+    optimizer = optimizer_class(model.parameters(), lr=cfg.train.optimizer.lr, weight_decay=cfg.train.optimizer.weight_decay)
+    if cfg.train.lr_scheduler.name == "MultiStepLR":
+        scheduler = torch.optim.lr_scheduler.MultiStepLR(
+        optimizer, milestones=cfg.train.lr_scheduler.milestones, gamma=cfg.train.lr_scheduler.gamma
     )
+    elif cfg.train.lr_scheduler.name == "CosineAnnealingLR":
+        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+            optimizer, T_max=cfg.train.lr_scheduler.T_max, eta_min=cfg.train.lr_scheduler.eta_min
+        )
+    else:
+        raise ValueError(f"Unsupported scheduler: {cfg.train.lr_scheduler.name}")
 
     # Initialize the scaler for mixed precision
     scaler = GradScaler()
@@ -586,6 +611,9 @@ def main(cfg: DictConfig) -> None:
             bounding_box=bounding_box,
             vol_factors=vol_factors,
             add_physics_loss=add_physics_loss,
+            autocast_enabled=cfg.train.amp.enabled,
+            grad_clip_enabled=cfg.train.amp.clip_grad,
+            grad_max_norm=cfg.train.amp.grad_max_norm,
         )
         epoch_end_time = time.perf_counter()
         logger.info(
@@ -612,6 +640,7 @@ def main(cfg: DictConfig) -> None:
             bounding_box=bounding_box,
             vol_factors=vol_factors,
             add_physics_loss=add_physics_loss,
+            autocast_enabled=cfg.train.amp.enabled,
         )
 
         scheduler.step()
diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py
index 9970c186f4..9c144fa0c3 100644
--- a/examples/cfd/external_aerodynamics/domino/src/utils.py
+++ b/examples/cfd/external_aerodynamics/domino/src/utils.py
@@ -431,10 +431,10 @@ def metrics_fn_volume(
     l2 = l2_num / l2_denom
 
     metrics = {
-        "l2_vol_pressure": torch.mean(l2[:, 0]),
-        "l2_velocity_x": torch.mean(l2[:, 1]),
-        "l2_velocity_y": torch.mean(l2[:, 2]),
-        "l2_velocity_z": torch.mean(l2[:, 3]),
+        "l2_vol_pressure": torch.mean(l2[:, 3]),
+        "l2_velocity_x": torch.mean(l2[:, 0]),
+        "l2_velocity_y": torch.mean(l2[:, 1]),
+        "l2_velocity_z": torch.mean(l2[:, 2]),
         "l2_nut": torch.mean(l2[:, 4]),
     }
 
diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py
index 21fb8c4783..b41e217635 100644
--- a/physicsnemo/datapipes/cae/cae_dataset.py
+++ b/physicsnemo/datapipes/cae/cae_dataset.py
@@ -1186,7 +1186,10 @@ def compute_mean_std_min_max(
 
     global_start = time.perf_counter()
     start = time.perf_counter()
-    for i, data in enumerate(dataset):
+    data_list = np.arange(len(dataset))
+    np.random.shuffle(data_list)
+    for i, j in enumerate(data_list):
+        data = dataset[j]
         if i >= max_samples:
             break
 
@@ -1210,7 +1213,7 @@ def compute_mean_std_min_max(
 
         end = time.perf_counter()
         iteration_time = end - start
-        print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds")
+        print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}")
         start = time.perf_counter()
 
     var = {}
@@ -1222,7 +1225,8 @@ def compute_mean_std_min_max(
         std[field_key] = torch.sqrt(var[field_key])
 
     start = time.perf_counter()
-    for i, data in enumerate(dataset):
+    for i, j in enumerate(data_list):
+        data = dataset[j]
         if i >= max_samples:
             break
 
@@ -1235,15 +1239,11 @@ def compute_mean_std_min_max(
 
             mean_sample = mean[field_key]
             std_sample = std[field_key]
-            # import pdb; pdb.set_trace()
             mask = torch.ones_like(field_data, dtype=torch.bool)
             for v in range(field_data.shape[-1]):
-                idx = (field_data[:, v] < mean_sample[v] - 12 * std_sample[v]) | (
-                    field_data[:, v] > mean_sample[v] + 12 * std_sample[v]
-                )
-                idx = torch.where(idx)
-                mask[idx] = False
-
+                outliers = (field_data[:, v] < mean_sample[v] - 9.0 * std_sample[v]) | (field_data[:, v] > mean_sample[v] + 9.0 * std_sample[v])
+                mask[:, v] = ~outliers
+            
             batch_min = []
             batch_max = []
             for v in range(field_data.shape[-1]):
@@ -1258,7 +1258,7 @@ def compute_mean_std_min_max(
 
         end = time.perf_counter()
         iteration_time = end - start
-        print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds")
+        print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}")
         start = time.perf_counter()
 
     global_end = time.perf_counter()
diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index fb9a920708..eb7b004d18 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -851,8 +851,8 @@ def unscale_model_outputs(
                 vol_std = self.config.volume_factors[1]
                 volume_fields = unstandardize(volume_fields, vol_mean, vol_std)
             elif self.config.scaling_type == "min_max_scaling":
-                vol_min = self.config.volume_factors[0]
-                vol_max = self.config.volume_factors[1]
+                vol_min = self.config.volume_factors[1]
+                vol_max = self.config.volume_factors[0]
                 volume_fields = unnormalize(volume_fields, vol_max, vol_min)
         if surface_fields is not None:
             if self.config.scaling_type == "mean_std_scaling":
@@ -860,8 +860,8 @@ def unscale_model_outputs(
                 surf_std = self.config.surface_factors[1]
                 surface_fields = unstandardize(surface_fields, surf_mean, surf_std)
             elif self.config.scaling_type == "min_max_scaling":
-                surf_min = self.config.surface_factors[0]
-                surf_max = self.config.surface_factors[1]
+                surf_min = self.config.surface_factors[1]
+                surf_max = self.config.surface_factors[0]
                 surface_fields = unnormalize(surface_fields, surf_max, surf_min)
 
         return volume_fields, surface_fields

From 6706558fa012b4cd84491ab544e6d4e1842fe709 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:14:05 -0500
Subject: [PATCH 90/98] Update README.md for domino

---
 docs/img/domino/combined-training-curve.png   | Bin 0 -> 88946 bytes
 docs/img/domino/drag-r2.jpg                   | Bin 0 -> 31447 bytes
 docs/img/domino/lift-r2.jpg                   | Bin 0 -> 31237 bytes
 docs/img/domino/surface-training-curve.png    | Bin 0 -> 71152 bytes
 .../external_aerodynamics/domino/README.md    |  33 ++++++++++++++++++
 5 files changed, 33 insertions(+)
 create mode 100644 docs/img/domino/combined-training-curve.png
 create mode 100644 docs/img/domino/drag-r2.jpg
 create mode 100644 docs/img/domino/lift-r2.jpg
 create mode 100644 docs/img/domino/surface-training-curve.png

diff --git a/docs/img/domino/combined-training-curve.png b/docs/img/domino/combined-training-curve.png
new file mode 100644
index 0000000000000000000000000000000000000000..9a56f9d76d9227e3acd9560253467bf00742baf5
GIT binary patch
literal 88946
zcmeFZWmuHk`vwXK3L-6F5z<H_-6;*i(2aC=4k0ZK(v3(+!_X}tIdnHlHxdI91Lx(p
zw_EnU{vXe`bM%rKhczpoTK985Z^&DD35<u&9wHzhU`R=dDIp--vqC^XaznijeDYLV
z-wb#`bX1ZMK`0%3u?74mz*t?%L{1Lj74RMv0R<5k;ofZ%;4cE=GX#|Xy+=ThMtuIy
zdnH7gzuO=oAOxEuAphM)6L`J-6Ak<WzWw*>y)48(J!T>OZhg-x>)!A8NN%@Zn;CG)
z0$$MUBsCln5C|x5{~<~#QT#+e5JHd=6IOOZ+-*kle9@WQdLs=nqSt_U8a_9Cv|dgm
zj~9tefZHDb)ubLme<dYCiiXVb(g2UN)kwpoJ|Tdd0vVNFf}=c57fD9;{hB~{#NoTI
z_vhXphD2Zy)fj7hgk{=7*|)d0M*1aRf9B4(@YEdB(b4G!vAg(elz2eH5fD*@5RkCn
zBOsIh_10Y~OSXDDJfNwVDBka%Pyg58?JDx@++4}0Pi=jDQ0w8WZ8J;D!Uys35@lSz
z|89xkn}+v<S^vG7$wrN}y)pUpax^yLzege!L?xX*`b_c#<@d;cO~RK83AKicLhRqO
z!tTKWKEB#~VDaB>A`$!;Fwt=Ctl;+Tkw5UU;DxW|f4ccV{$4*!R0R1yV_uQI*M~-V
zXA%6Tn`K~*|NqqgU)XhKQpO<EyOJh`>iM(n!RdzJ&CH_D4Ltm4+On*9cSrB0VXuB8
z<LN+RKk3_W<nnTS!?IDTf4&%vh%|$3S(t#FbwmZ6Y%iL=fz05C$G=Z)^(CSyTycWc
z8nI8z(O7s?_*Sfk36YFF>R#hsd_)@(`7P?c+IYVPU0Y&z$iOO#l!@9(<)E81n!zek
z6KSKeAY3ebZr^IlDG!F?(x)%1&E;TOtjJoAA5i}u=<!2DxqP}fT4!8p!ONMUhbY!j
zut%Y*jr$FBpSQsM;i=fDFTZ;$4>Q2a-}WTziTOpaTfHPwc>G)J&gHV+e$(^@<+xze
zcp}|gAKdJtv74eywqjfu2#b`eBUw&|=2DwK_^W+Z02)Q*$?CI+CEPV#3P+s>eLI3k
z*VhLH1if9>c}H6Xn1P;U`6M<@W$N^E>eCP1<CI)~ZyD<wAtpEeW(1RBQdU5nSw*sx
zTgp&t!XFW`{?^&7XtX+KW)DRAyaMIGdFpW@q+V1rFSG2ovI*A6TG_gb#7iS11`!bv
zQcAG>(+k)R_>5FxjonwJNOh*PaetfWY{6AeTZhwT-b}03CL&CMa+$OBT#oHvaXm(w
zJ7n}EcR%Eaf*3F|LIT*sJWmL(HW(unP=&I}Q6G;bJcHK+-5z<R3{!K}6o1*q1FU@)
z_nDn=54X1>dr5|o6+uDw8-D~n-?h}c)Wj+nYlQ9D`poHiY*CH@(sL}Bf5u!Oinf^;
zXX|~Oy4Y+O#Xotyw{*?Gro(&`NT~Z^&|YAtxN?`|=8(i+iq!>vu~Rn3y^&^k?*ZmX
z-okFv@!(S^@7z=TcMF6JnntD>?s>uM5`q_sCwz0oJtRojH1tSQI<7j}f%t0lJ9^8{
zB*X^MD71<azKDcm$T(YTWqXEE>Soi&P-3(t`0CnNTeEL8KsaZt=W0qgqjF*WA1P&a
zD~AM*Gm?2-ZMu+Ci(PD6&hruviZv=n3-Lnm@S-)EN=pX?E;h*(9>JFhTYm;9E>kz`
zU!D2fxH{2$uhHpAB{V6VZ49KEYSmaBV#?L`Qn#Ic!&69VNrYe$+kHi}-|OFK=vHw7
zi)m?TRoQl?-<on#Ng)tvhwiV(`QXlzQ9>);DQp@iB=CNHzSQ`3V_}C}41LXkb*siS
zrob=##+&O)(e4m>Yz78~JhlrJKKk+T@c?mCHk(YZ(?f3^t%`Sn>0YNZPoa(3+3lsR
z*B+NkH*menF3y&i3eVo2-8hhZg&urcEw2C!N$VY*1Pyb61ZiY5_-1%Lq?e|^`ROhx
zXXNjTJ0xD%i|JKDH;*AFeTAM~PfY_J)_9hJ-v~!PMJ9c#gnaT&&5FsDa>AF500{@F
z^O}F3vyH=QnwfVsT2$mgGIB)w&mSFld1zkD9Wll+UY$d{2ThMQX5-TX#8_Ek;{C$-
z##%2MTaUk$)0ab@P#V%#)VLp;MD_JjXQnyxJUfG2;lHC<Z6GAG-G20sM-t1TNsp|m
zt)<@6k&EwB?NME{P%||(ot=;ml$cFE{egJ6CC95o>(HOXq8M{?bvz`|sh(|0x&8Qr
zT=-RMxk-^N-b7iMSK`ja#oQ->eM@+Ja31ysXfv!bu_YYd5|^rQ^#vVR-YoLZuNt#(
zg&nX&))Ku{bCJz1sI29pus*Q8cFDW3m)5W8$h>5r<#OK=OYq9>#CLt&r15x=SI%-|
z(d$gd(!v5l<T^icx(K$tJdE<O0`FExu$AzRJmZxcQsx3CwpX_j=~&yZcba*9LT&qM
zW`J|a>IM)bQxflApD>7c#s$vmhh~O+u8)VCT5sS7bNQLx7J669gnRjdaC0_2IVs1;
z*Q;^HH}EsgHD1Sc*;GNF7K|s%U#a9$j0AtRVetjij6AqUjZVl|wFKXH)%8*>)i%eY
zlKq*a?RnT);{mg1Tn?pcqQ2f~y@nPR72Q+l*1d+6`BX36T%YOY9K8_q_O$ZbQ`<(j
zk1=MelT?Mp1a|;ypIa5T4<>T?IcXcR<=T3=kmHQXz$~1TC*E>8Yq?l8Yntgbd3|bo
z!@-u@^*&GT)z!e#RYb%SmUiB=o2`MmLqQTgHFwTMx6g!LP=?xRdEULpvhN(?UJgDh
zN{K9LnX_$$E9qWcFNZPB-dqjcxGmi)*}$`0dx@T%R;aA1`Ua|;+2b4KmlyMj4>k@u
z9ppW5+YmhORmcW$f(jashLtxYNW8`c&R6jitoDHO;c(tozFa(OG3CKRcQ}DKZ7K2o
z2*7JP%sQy(O79BGE0Is*8o?XCSmZ+YR2P^DO?_<|$(dKZ?zGV0OlA3bHpl*W@54$~
z#`gGJ1(Q8B=DYL5ZW8c|H&MiU14FIf{m`vutITIC7|(BA7Aj>HyU6wTN%Zp-zYS@;
znm5czk(Pjlv-tWESmAQ+cOGH+{PJG_A2cgMBU0D%@?<t8zi$Q(lTa8rZRTFjT9&~k
z1<wz50(;lW64`uyCYG2xA4%33bOf2<*6$_~N}@3|f4!g1wMj@dLA~iDGKs8~r#oF%
zzy3@?zabn(*N9w@XK~$nb9R&MH@YyDKkGOmPS~lQc+#FP%5;n?RT{2bOjy|X<kg<u
z^%3Eij3aZ;$7~mAWbU@DQ5xNy&sUM}PH-9T^El}(^YpayXYS|c311&2ww}x~p=*-0
zot}&-q*e+^t)ht9cFQr7+*EgcFeF(=7R}xa7Q8Z0=G)TXZXNm5GRD&=CJa9Yf`r)<
zou69FoYu3eu*})x9`YzRB>mwmVO<VrB2(04_gYcKPEA^LurlGt2_3J(mZ>O$>#UnE
z&V1*qv5QI0y3gU}6c!Z9l>HRAB;Z|^8|#CN*}Q{qfNJ45GR+Y_JI0Ok&Rae{f--@F
zyGG<V7mOhL_kp>!v`A>arlGpdB5`*zOOr%6>l`Z;)Ej*Hwiwyr2O9;U@v7xb&kxZC
z;U?*}OOv0$J5Jn`GMRf<vGJJ-XE4SsZ`t9?H!P*5bSbnkXY-CxrV~7e-|+T;an8JR
zr}RhB^L0HbB2QSqfN_;PevAk{G3<_n1$<cc_hX!B`$8z|P92vk%amKY=uxzxsMOjZ
zqc8R7lxh=2MfIV^xe;3Iml5=eKBefj0VZ60>ag%{1;~~vD)BlFy_BcFR-(YcgT|jg
zx?2G;<PshULj0c-r^-p8DY~rqS0Hps`ERQ4<m9zitvWSC*5*ZEG1T&C!^+<b)o0qc
zG#*6>Ub``AdNJ4&5quLj=!t$waDtf}W{+1KOCN>X0a5CGMYU6moPUio8dgtl2zk-)
zoe(<|PfH^O-{)d0KeYvKnF$J^S9^F7yt`dgb~*@?FjVyu4W6SCol~o;`y9#hDy1eL
z50)+#@h#;-d5J-zXB7h`c)cRXR}_t07*N&ZK=p7Nulg(}R{p3(;<ZWtPx|lj{X5+_
zg8pLXMz3@U*u7dil~v_g;;ZGB9MN`;jvP|?!8NA!kK}K5q0@wdA?zie<JsjzwDu-l
zd*Rs(2BjX@qzJxtNGY%%y36e#BPvznx*37wUKT+|MP6{n*o}Nv!q&~pW_>);Xj-IM
zPQ?}JV85<{8h)BQd(~kA4j+lAXY74Dq;gM>9yAYcCi;X>%Q20zNfJ`v#_5hBQ`)hy
z%Z{avV)U)}J|O;iG+GJq>*@PUXw$iqtCB6;PMV&@WBF>?Aee~2OLi>F=<p+o+!*Xc
zWXrYq(}{=ucvyxmBRo=vO&kQOJ7Fy|9Ygc*O$d}=`UL4i&Oh2((-sCDutkiabnY1I
z!bS-D;LoT{f{8u8a5j&6jS$eQf0rY79y(nEjoq-t%M!3nyxfcc5rEJnP(q%DX!-PX
z)*UA1JHpbp>2R{lYrKn!^Y;6SRsom3BoSC~<Ia*H;MG7#`O<SOUhR#o&pA|&g9y^U
zErC`&u&0=R5P^xpS)#C_JZ3Y7wk^t8t%Rhw(jUm=wI!3$ttQtU7bwfb@vJf=Z&@1k
zyt%`o-|VUY<Jla9)aM7fp52V;9tcTOl^(vCT~Vm_y4okhFOP2rT-qS@)_i-5<mQ*e
zfgq~dr^QGwICzaOF*Nqt%i)LC(nSJiHEi>9R1Lf3q+3Fd>^|03-w!;bKv^Ls^9xSe
zL}8{-58-jF&-*v^+6kF^kSl^mYN~~*j}B4g!<27wE}H0~6i@?jHn${@7w8BqZAR3m
z*_@eaCdVr@p;Sfw-@^2Y>*c3TgwB?KErJi~H>(7Uh0*EeZblodW*}8Z%I`|{j>6az
z+kOO-?8^^imWkzS)#fLHTh$%0HYqDPQyI2VM(jUMbdXRG7{w??C%*Bq1MitX)SZrF
z$yM|608#sJw>egO&DYuMRyooyE0D{4+AhIfCt@XB>Cz*S)AHQ>6x>9kae4J_Aa~5i
z!@~dzeO6G8Ox92biHyF^JQG4J?PU7ui<N0L|4|RQ;kOhzQNs-gl<}CcI!H+{BLF4D
z3z&=f$vxlGV9;g><@{Rwc0RrDphdCI&vf-;K_;0}yeNoUBTFt@GtuRRefL`BoK53-
z*zusHrR84c^;TxnLHJE~OkIGOP!=-UCJMvUyMZVT<FgZ_ErryB4P0}c6LKxR<ohL7
z7L8o><VzZFTdyeJ(l6B-1%&mr6HKaW#5++u#Kpzc(p*|gW2BHyr4i`Y2pqL|^$FeP
zo{ejIpJ}!yY^SC8rANW>MiHR~Cj;FsiG*5SUU5AsRPB{;8ikE<XUud=l9MojSnWEk
zdTCmr<OfOFzHRcvQyc0048lvRx_Y%nqxhI(%o6f(Xk})8Dkd!s_UUM9nElGHEf7d=
z!J~mx)Q3l7N&DwnI&zGdvw}gQj!Vd5f>6IQwq9Z;{rix1Kp!_X_6eADUsN)VW$I#D
z;-YwER!t5FuH)qnh&9o>KfEwjM-ou{73}ltfo((fxAYeO6pxbOxr#E91d8$1CP_iI
zt>DbtAd00k(__1UWlH-NX6Rw_H6mlcdk1Z{+KFY?NZxN7?O`%&hSko>#hi5GZ@ZtR
zf<M)ec6vYyv-Duqi(;U3CoayJeR1y2VkP-?pHG;joYLH}$Y?r?*Pif6j*<bWQ#8Uy
z{n!nGl6Xe~dUt%~VVM%W$29Vj*@ltaU6o(Wi6V=2JkH}N-%N%u7o}!E#iG|_{p8wJ
zRfXkxLD8R+0Z#$t#i}fI=d8Kuov7<IS~4%p7lVBB@2a2Vwn+Px-tud1dde{Zg>YW;
zwJXup_^)XFF)WI$pmLT>8{(}T*FEW6IYzx|C|XTRxK|0R@n<`FGf51EpUMM`Ntn(@
z@gX@q=d~j=L{ul7VJuw_Ul}S6NN16HzL-fE;kgKDDze7~B46s73HiZ(=1ye{IAitv
zyFZo(2z@P1Sf1v@q1c(nqhj<&E!Ub<JXi&zb?v29s7V<sj6cVjk5P|$lmdDFqWZg&
zB>$_8rw;AeFi*%cvx|AUvZiH?IV0a;Hh#|ZQ8nc^@|2kK<07Vtpe_gZG7qRGB~A*>
zs>6KGaVC0XMWZ|n1lpnyS_@QcRh3o`tIfl+NV{Zc@&1%)aUC+sy;Ha!EXR8)xUv16
z@XPh(xzqk^yhW<A>y6U@uxO15HcF7?w9$fU%SD?M=pD>InD1M|R?7Le@CG4L5>&H`
zx?*-*oBD^ZWQ++`E~>Rza$caG1PLi*7w3bUX^R9iX+7GgeDi2sHG7z%Tp`%K&Gp(2
z(CuvUO`*?%yg5w9jfn<pBXe8n{P9e_E845PwlTOUxX&*-m)yx7t-pl!0GDISA->~q
z>cLep;jxvt%(S;X=THpJd2CO5@~J^Ts<4z|xra7#sgw1Tke_QF^S|<uPV)|o#2-tz
zn->=pR1}pq^i`7eeg0HunRco3mDf^*iu4ji4$^Qr<!qe2YqkF4e+wq89UPk|Vlvbd
zwA<{)l;chieFm?4`G5s-VKI>9O*&E~g@jCsEiYuL;CR@c#~RvMA<QN9?qzf<@;nyX
zeq$+#f>Y0|j_jw+d@&1*54tARy?fTR3##LwkgImkvQb|g?NgE>52<khb=>=}Iy9!(
zbf8Q;xcKx3{yeKsL?fA2AL?c+fCwq?kHO@VzPuo0!Z|OyxhPvaQPY^Dm`!(lx)IQk
z3-ejOUEqRwpGm`u@jM{n@38c%@&5_Ngw@hL-xz%Q;Yc-;YF1t6Y=KRbI_Pm?nXszV
z1ZOh)aJAIlW-rg|T*bj;lm$jXUUlDbv3J1eqUY?*UTrJfx`ix97kkqcX`hlY6~9;f
z=}fUtcGH*+h`@<BVgEQ}Tk*@`%zDh;6?#NtOK|6fj<Vk4=0Lf3Lc-wpl$vKTCDr`8
zZQNI%35_@6L@xU&A9^KYZfGadVQE;;kN1>sMW;Y0Wuh=OM*W15#%|C*6l*49I@m3r
zs>d1+XiOQQZAp!<ddy3fa~CDnC`fYt;w-{+%+9`oun$sB&(}F32^3!gc9j5ucPP|g
z>xj|@0A0=smy3Hsw%3XWj>u@C6(V|FEj2pBuIsnxi+0R%vGw!YtNlSn9-(QG-pWI2
zq`z@qU+<T1Py7bzmlL{cnK#HF4ZWh7B6<O`ttYx!Z>>FdPmM;2jjo*)84D)sxz7$m
zcjQtR8kqKKDP2PN9!t}k^JH+yPZFyb7)0@(et2VQ$?&N%r}=WfQPK7276MqwD-)-~
z<?e;m6_k}l*_@VCJI)%?JsEf1RwKOf%7-a2N=iQP?`n_WTznFN3GQ_TkK{ITNIOrh
zpFAuClZD02FBWOT?UYbk{mh$_l6trm$><XDDEb_VvPclvh*UlSBD#YKOOMfy#b=<e
zJBUC|!-E=ZeenMqgU<Si_PAN!alae}p&PQC{iRWQu*Pr{N>}>xl+b=5X+NwG%$O;#
zrXin@!k|@?r0rvAXf1~}A@dzTAWb&X9OX05zK@QJH&gHkZ@wT8u(wmC=WZL;b+_Tk
z-p*+m_EvgWSXue3&T~~o>A5a7E|^gVqQGru#|U3|_NumqVWbkyVpC;M#bt($3`?J6
z42iu^^2*=B3OQ963)<fAgx}uPsCU^iCM1?=Bq+RtpBNY}xWkn-_9eoZ4G6nr;a^A|
zu?<ozvhk1c7qP2pF8!G2w0yy}qoS@m6FO$Hg)78Q<Xu5ql%ML5&2D&c)?p8t)AfR>
z*pfTKV%DLToOEi>J$O&qt!p8W(T<<+1D_?lfKqL45k;Q{I6=p$W<P=8QQ+^*@<DwJ
zf5zbs8bkl0J87gb`bqB>>|<`0xx0tdWLU~1CcejTSUCsHq)f%EBM{1iGoHcxXk~n7
z8)UwLvvoDW&#SH_nL?Kfu{nrF*Wj_4SKq1>Ax&%+d#RY*^3ixn0D_U&5B`MZG1YLf
zUBv$YJ>?_S)cW8yz86g=p6N);gotoE1r4UZ2CTa-W<qSOZ~@l@w+NC}-vZ)Yg^gNE
zeJ`tgp=_p;MK7$W^=mBin$0AQuH-@%oJUI|$1JIDW|1q_me_JaIV63uLUP0P!(`!l
zquTp0LCc0EH_`bmI9BQ_#&Zt(H#FPcK6@{zEE^&-{AxOe*+dDex|Ej4BlO(On*j8S
zpYRg*+U9_pyL3boqk4)2*uk|nRZAM!3N%|hOjbnKtIWR0CqK<t<;KO_iwY45d-FLW
zJZaVY?Zb8D-JN%Sduz{-0wsl>H#HuVC+I$2Ijh-Zu)=+C^k~a_Z&D~ueT+>&E`IC+
z51w7}{j}tW@!_v1KGwd*`2aM$Ce<Dh@`}72-~vD82;Wc1Cv00p=44RHpapA2QN&Po
zqvDdyhHEC=YaZsU$g2auj0Gbp)FARGzR@tA-SQ3(hDq%V@353@#vikw%`$uPu*eJf
zY)xUW{DLM;YATt}^f@8z;)FUzQ@UX5@sv&PxVO5vkB`_kXG@xA{e(dX@C<=;73w6$
zk?I#iW;}T<mMOS22cd(7`4!amFA<d`g)s%$yvrK*dCIW9DNIU)u*?162X3zXFA3Mh
zashSJGV3X6<q(}xSJn&7{g$$wW1sCarDT5sl58rVL%#5Tue)zViFLmdzx`xSzqvk0
zr~qfUB@EZXCcWuf>UCyZG+n~?=IS@PNIYS=!u-{upn+RB^k7Yo>1#7S9e;d`?OByM
zq}Uwtb(2z47}RLQ5DgO@+jn?MA#j2+dcw~wRZvp0SK$_NU+sVx&*ad=C*t{(D<tK9
z2`4|hnx3h~FdLE5ine9{C>rELUpw_D(lC&1D>dZ4R2r0(;vhtxJ`a@WDN|HYZudh(
zfzABt{=%;_<t3-?7D1*dYTO_mk2&gVKJH+lPk45q1RdUs?Re&;W5>PzsUg{8=#)#I
zW*KRe*bwFO+T=cJ#q&VorPx4i!G_1P3D0e|<elZtiqliJg^k4pDMtmdVGfjG@)ZwS
zd!F(*ec7@TW;z};H1sJZ{}{-SF0g4^L1hDHxAF7ZxL}^i4~Pj!h+IStDax21%TLKi
zj);8s@EQ8$UaekPO5FbR8R5qQfy0enZoo0i2<e8wWSae!{Ozq?l>&ELPqOcMBI}@Z
zh+<-PCT=o@*?C(2<lGC#ewMuEy7ou;z<#dSq1>>Ic-r7O|7|>tQr}N1HdZsWu}{n}
zre3{7@Lj|~OIZ~##(IKT8shzl%<=6yBM0DT#l_QtIF<0NgbX!dDV>{$Fp2AHwI
zHl-MO>1v%5G?3u*SF*%6oBC@?b@su#XX+xzp<9>X`g9Oqx_lhXl_P(WtFNgSn|Xp4
zspo4<J~NKO`}1Ef59~4j!ewcYkuHoEZqPjnExjIovL3Ja3E=Lj2^Q$D;}r_m@HKft
zSZ`YIU0;w-U$Jow@ghAScFW?2pNwUoyP^S@@4CA@?k#G0BtV_Ghx|Jq;vkxpr2E2A
zG+c=^`Wd2}UcwjWL5catz3(mu<Kq&0#v3w}e<yiXc@f=TxX6nfHJ9e<Q=(~d$$T0(
z7uf%z_XFpvjL{F+Q}&?0jVLLi?{}shw2v??r;6**C0ildCrFgC2<Td9t^#twe>Xxv
zN+$iDmqW`WCqY+3Yo4qxMvN`_257`8GXJLy_9i}JtI%=g*{j#YK)&W3s=h&3Tibtg
zr1N-^Mh1o}yYpu#l>g+k{@OLk5FqnODNFI6+-)=%$e+%?&KLW=5r3z9X>o+ga|1%e
zelPm(e5r#1LR$iOJr4CBi2Q%;NhyI|;+SJl?`DjFyv{xv>9oU88t%(~r;~HUfL?H^
zaD{)T-TvyWhaVAHYd;x8_B$Q+x1_`Y*;b`?F|z+mv7Q=8&hk4-2mG^<|7#zO3-ltL
zB#Hc=DV`x?Th*J&hx}(nN@VZ#Wzg_K{$BU*->zl=+jZ7Yi}RlqNdvvm8j#`qXNvQ9
zLgi~2-2WfmbYDc;Tr$NtAyd<QAjLJ%2Zfdt7t=pGkator$6avUVvUc2mVN%*!CUg>
zi=85^m%!D}ju9tNzI+8OD9>Yxh%UuyE_8jfx!UyF^+cQca^CZUEBr(lJbT??X}G`E
zA>NTYqzV0e5Ugta?H;wpT9y5$GYC$-#hSEMy+wQk(d(|RaH7-*GHHP!ZpU@EM|pCo
zsz>w?#P=*|7=;z?tU-qEMi-$hY3A97=PW@h$wsU}ENN0HKith9ZR~)N{TP~taJ!_R
zRM*yO85`#YhLA5z#cZ}fxj^SMttDEu9%M?!rySuSAJEn+0bjrrMl=hx4#$vAS%9oT
zOH6b;$XLwto=P$o2J<B_Y1vp;r+P@@co#Vc9hJ#w8CfBVT3QwnR=X_lST5GFxHaaK
z#<IMK4zPzNFz_EatkiakQF>Oo{QOR=m1*Y58U+59Fp}EI7Ze-O+Tth6<{i&uIBhkI
z$$Nrk5ApHQS%<R+#lmlvx2N<n4Gtb$H&7J$$jF2xdUgb1)KK=`1YUbCxI>O~cm$4P
zg^oCfuZmq;C!`73&^R=PanU#y6i%0u3p^<xPG6eJFLEAQ01!FOr}}}E)W%|qIUdUE
z1{AHeah%E1#aT0#F#^t+;a=xcmlc!egk_4^TNgVy)NQx9v$@f<%TEUvg{?_(BK0;`
z2_jnHv96XQgGXmU9v(v{)ls8#47CNvpR9BFf*d;q)z(&BPSyco7%T|gxGG?0-=g!K
zZ>>J9?t))6(HmM@TcdTxH0r6SOc=`PG>DNw7Y=Ai;O}S-UmmK1Kx2mPy*7+d>U}PA
z)|>|)^1MvFPMp5#R`~5@iOad{<nUak+LK%NF_M4YI$Fz_(3Mk<Kg3W`<u;aXzf5ka
zIISGAl3RVk?vNAl;YC0{UJ{VLO5BQjaca4)71PSi8dSN&8UACJ)#Q<BVwr;}@RP-m
ztI-o~$aP7XtB2&rAZMZp&ePk(l@p9h8iTh<<q^Z!NkXcD9JB)0<itc^X}(Opd7-1b
zdvfpSx=1ST){o<x6wP-TTQAM!ZJp&r9x*{2gpB2Pc8g0cPL46kh$_5ZA}qBG%UsbL
za<Qb!E*U&BO)*pR1}0&L0gY#Q@h11cKKw_^Yx>m}KXH>y4ti5^X`^sW41^DD`i!Q%
z2&WP~DM}~BCq>OQjlI0QL@YDuifS|0RxIQs)RRsP4Aq?$zg4#`@q4|teI+h_*SbYh
z0R}SV>j`?W*i0Ap1`h7ql<cHX{T*w8SC?Z7g0Q|P99HV1)ehzyOjPcjbv$FA-g3K(
z^*8$yxRf}jwzSN}8p$Ft-t7^=R{*eoxgl>wWP<rl*x1=S`ATR*A>QUZ@0ZqE^91iR
zI5hW4h`hBNU?Xz)_5p{(?)%rIqe^#0_T*}{nN^v^-&Xh5%b=2feOAN#N^03?=3+Hg
z!KyP9KO<MJU20#I^82co*w|yZu(`4OoOyn_?9PC;Bxh!>OrnQ~Vx%MmrgH$wTB9h*
z!dcMT^L1JW`EQ3k^UeeA^@LjL&mT|mfO9b}bh8`3iQaKLq&|Ri@fabTn3&)&Xh#MT
z>B(LMhu8U}VJXYqQ6#p)Ly?g3UY_;ZhOK0$<CsmyMRHDS>87Zp7a2xM<?$F#&C(-E
z2-azp{HzCqB2Xx&zh&I@b-p41gJ{Q?Ny_S$%aHLNNCaritV&YQ%K~XYM{Had!F12t
z)N7!z!i#^ol(#v^?f7qEI5?gUj6Rku(Ll#DygvH4pukSzlL7b2ILIKoOjW45f^0se
z#-@=)3jfi*%W1J7_nu{bz^}5M;_DN}esAd85*(|iNE-b!aTxov$b2#RJu^G+(#uwP
z!x9J+Ho*7=BzMQ;Lqg@aFVcsM>mJ(AIdwgk0-ojIU0eT%eb)uT<q_H@4|LwD<45O9
z`XtrVl^0uHH#q(sn*EP1=IeRun9qp&{4e4soDa^Z6k@LJhRKjqGbMg;;|+DxV{36C
zKj{b@QyXVXLonn@L$3%Feb1uJ<rROPLN?7hR7K&~<ak*4(TX$TcO-RtVnQ51Gy##S
zMLuFwR*127RE-ll7`_v?_=*7I(wz|Z3&Ww7jrn3!M~{>JE#Cf%(<Kei!b(;fnrt!T
zS80zR;_Ag0ZxZtFh9}rSghMbCki>@eot9NjpW;pk_FgFT*8BUtf*Bc$dXACZD}|@$
z_tfvC920=-c>P=^%h#;+6j}a+DZVy9SY$(xmMclRIlIKqVcPM=wU7Glo|6L1CDfWn
zLiB<3efQPkMP8ee-~IJbC=i8|HGF1%g?I9JOZ#$2x0`A<aKwKc_s^MRy#*xrVQ&7>
zmkeQrm<J5InvEP@Ue~1tP;yUYlt1kdkZ5m}G(0rGyPG`3Y_(`5=6gfq<P04iyRX}y
zIU}O3VgUAG(MjA}EvMKf%bBC*4IOP+SgN2Pm50}L_}U_Oq(rvlznFbOnZU_hgqO3i
z(v<DF?358e->L9}RjLkch_u-JVr`Jp(-$_<gtM^Gn1j$OroL2WA{b69_#cbh%IP;<
zsCqger=l8HPvfJCA~+!HtGy$R>2=OlEHUm*I8Je+M<R*h%u$7sDtC<zwghlc!2ut{
zkl7eUN$#gZ5^qM?7+CfA#A}*sIr40TC1_bV>;DiK0RhUu1)?<b?p4ETzcI4sMRY5L
zA^AQ3;S^>~0cYI&MlA*FB40d`-$v;X@`f7))kpNZTO{;%t8RY-<U9r&h^O-&iK^c6
z-RVUS0sWlM^SZ0o8tqJDYK|M7Z_ny!;rtSvOT)jG8N4fV(XK#S_bE!{|HjXRh{(^$
zUos#GcaygTSCeS9xP?BNi~v&ne=OMJXTWKFf~jXraMraZpBh5d6k=v~=SXIgakskL
zfD7Pnb0kE3yLyBCRd9S?kkdhT*R{#{qSiRp9|-{nUw|alpB?4w3!4(Vh4#lretLGt
zG@}5jq54xN>GFOYMrkN58`lJLCck)>a@xQ4W|bS5P=+h^#Bbc~uenMm0-EP^*)qj|
zwcknL#P_i*gUlahmEH`QdgS&m6H|Xnx<pwcV<O*lqW}gVa911V`?F+UGJvZ|2f-W=
zcl@~5na9&}hzfu%cg`8WUy!wyejT2;Ao<JB&;<p)h?=i=Q;~f^c(>JiArD~Fw+_i*
zE=c&X66$NhE5)AL)*@7uQ)`Lw3|!8YYk_NMJA)X1Mg~;l9?-SGi3sB4OD+c1&0*0<
ziM%q~u0GZGUNZ4_PV^`k&|9C2Y*Nc*zBl(v)ExM-?Pjv^QDzulVcfZv_<G(hHCUHs
zl!vREJg_CfTFbHMJc?|OF$LvMv*_Sk>6oSsRHnQc?_usHw~!GiF)m8uA-iuomzKfs
z9*CWb4clfnM`p&K9rOKbl|8q<o@47{t%Q)}%zEig3K<&i-31AeIs^7+TqA}R%{OZ$
z=GfE;Y>-xIVE8c~eZ^&?t0YiS(E`Zg{Fb-3kAbO(k3_)dK0sdHYkr7K6!pt89%>%L
zFQ@gQTx~b5=ad3O2DkqH;_ivmQvs`FKO|#fs(T}=S`!;Q;lX$v*0jZTNA?hqp5A&5
zuF+9wxs{zt)rc^9f{htQsz~W%MRjZIAl(ogEmV?l=wH_JC>_}5CGeN8JNXz@Iz8Ra
zRwb2c4cmuzW{lw51c(c?k&$-y;Y1oAcOIh6y}hI!@1|+b@6%P%-Mx~Ub&`JwU!>6n
zNcULNNM_<meIX4W#$xaXCg189!%|n8X?Sd7D#<Np2GiN7Y|*SYrwKr{pYX3TaP9@%
zk$-^T3~1R4_d|lsZ<-5Cn6+4usYZq^ZnabD)YF5D&4at&`7iN>pj8#Bdqu)fbZCk9
zCG6v*b&Y{mt=XK<FhUV5cf#D1{JH6aXuCV>h!#gm!iJ#MDnl5{_8fK&X0_Pe12AUh
zO@vKJ9xzz+C9mM95enQ9LDIK9z*z;(gWZ+Sj_fOHzBI^L6Rfkak`P6$;=CzD`g5oV
zzE1!dc=AHqDD-_Q@z&7L0m~>!X4tXXT+M8mCdaI$bxF<namCK&Y>jQkUM6M%4||pC
zdbUN}UCOm@05BTbh#ff*68;L=&KS}sR#x^=l2sPK$<kdZa&!Bzla0z-YMOG|*wpY&
zl@3WIKmOMoWU+<Hjs3{n18~2j@UgO84|W!btT^Iux7x>A^N>zf`&_!9YcclaKl(=)
z0mGIf3IGY145O!#LOauaLEm^t(RhfHYw8NR`5f$zV{(fe6(1S@7T#M!n00HFz1~RP
z_eK(~Jp;_@(c=R17`VCd@l-A6#$1a1-qLd$zI%T*Ct3hdpo@MtGDP2UXDhUir%=rr
zG3S@Jtkr*DhjVFQ7G?aPEZD<_Gb|qBLvZLW6WtuW_4hq=Kp-zl-zoSLbWAsD-^Dvf
z8-bb3tZ5z9KOl<g0p*~lJmYrNIjaskh#JBmnp+OF-b8iXSsEtJ<k&YL`^Sm{gZ>>d
zP=wha708C2@Dg*IOg`Wyw05NG)STATRbN~{82ndz31B4m@u!_i_;O)R!Lw|ean0{i
zsJh7+)A))f)7MWNCyC)88~Nu_26q<q+t~YHM8^*h6^xe(7F)G>R1Zjdi5`f7DE-|X
z{$<Hp0OtaWCmo`u#CGCy0Kx<gW66lK`4gCq<@CDMbP`*pELBqFTy)aztO=zHA8stO
z14~XW!;?<-KOPDJ86pDgB!02cHZ{;oHuwQ!AC@WkwMU-E;xO@kIK{+<)Pp}qgW#(S
z_$wt!@dg1L)KbHu<k$xBA_Wv`H?Gau)+^Uayc~*WcaI4U1PqftyE56Ufb9~U<95G{
zlXL|!UOPVZ%OLCMTP(*ogBxi7lP&_>Ru2(y`1|UKPEu{oRuTd~g<FqNJwl7+JGKdH
zY_(cBKRmo6TaRxWk2srqbI@23rTPeROz=#lngNHaJhZ=Lo^n3pIR5a@bg~3*L#9}7
zv^X7(x>vJ(-+4H0ysf0;&=q2WA8nQ@RJ}u!-{$;*!HW1S`;JySi0KPO_I-MSx{Bi4
z@#?{B|7nS?&Z^t$)zgKEi(lvBE**qH;^c!wEaRt<x}6z4pw;2fX)XZu{j+V+=1A((
zta5QzxVXTsjd@#LGlO+vZy-|}a{^cN>l!!X-J-7=8LR;1c)(y)a~|=$F)C<)<=pOH
z>aKlj=2G0y5(#S}_tvL4#6ryQ_IL@)LUm17R+%&P(V>EMrGvTWD$qCJ>JlicTDP)P
za3H|${vEma(gQshKea0`)l((hne^E#iwqTpWZ8rIof#P!iItx-P;JtVwwCTMbd?S?
zQ;yrr&s*iQmsG|+pPGp?&YZqs)WRF^$NSSEBC1gs;10BmdO731CL~u}mZ0Q|TL{)z
zY0X^WA1frDbnncUI5!HAO+GgAvIP5-xDJxs{r0LK@NL34nY!?!SzlEa_kengDogo{
z7ZT(&vw0XqFA}Nna0W$JJ0}V@Zg%}`=SaMNejK>`A=`Mle(rJ&f;JTzSv2cV&59G>
z(V|WnAY^LtO66qOQ7@n5d9L*thSqSF$(KXrp+H^BFnH~42C7M<1#U>pdmV3Xa+_Pc
z7E;3E3~_;!`kym%5C_zPZj@46{=9CV-LU<8EyViz>Iv_WFILl55N1qTlo79|!B<RO
z9D@w9*!YrynEEe<5Dz<mrT*JWW&Hx=EwnZ2vN;fcrJ+RK#v-Q@qB699^l;1d^wYy3
zokX1xj@%ZWmxQhPJz1S{SSULw%;Yxz9M`SwjJma*$UQ3sICiQWH{+5wv4C1Qa#)=#
z4gNYGf4Q(T4JcA9{<@fUjkxjuJ^XQYpf_FWtJO?`tyi11R`qpOUYLLr%a0`4GbTBC
zoZRoXK3QR`rEF5Pw~GEpX5VrFLjxB^6g`lsu_K247}-L#9?jY)`ez`v^SF{PeMV4q
zDsKBr-hr0UtJC4(oo~JazDYUx<9t?|o5UW3q0-&L1vArYWzRLqsDYXR;dx2#q2${f
zlrsOP(%S1v%J-ogcdSMh8|ovP2$Ru1BV?BaXEjUnnRkxs{Za{43TI~(M4yK&HH`pF
z5<KM~;PCcDeyZ2MY^Xv?#kc1%V>btTU>;AEg}(OHCDB|Hm&WuJncPOG;~0rQmbA9O
z@O&CUS$vUdGN#BWsuB3gq-;Gf<~0B}!M{BjWHjuub2)$I3jBQfI;axLspr_;e(Pa!
zU>D_{=_QOyW~qH0CHhO+W0YCrSAo_u{%>mw2DZtA9S;sNaSN3Er`l2^d-zj3FHoQ>
z|5^@E8ACVV9c2jvXv;=_qpw{H&^Kb;p;U7<*)y;5Bg<>fCf|g8NyeLD17CVTSPZ*o
ztE3FG&y@Jhz1Q(&Cc0N!HRBiR`3+m+2W+inluJkB^t|Kt1o>Pt>-8^G=+rzCsw)h;
zl4iwv_z=&+_N&atSTnh~p8<>W2hdoZE&$^?QrZ6pZu5C$+M07B^R+!e;B`h1sV^cp
zLNaO2I82qIDKX@^C>a4qpy>URcZ<eUuY~UE7PjQAw?M|OtrX2FX7uJymOkPo9&xTY
z<tbWfn%E{Qh2t}pmR5&7n3z>4Y&<god6qMV9zR&<u5Qgl-S|f{fc(pCe9ESHBoiX!
zZk?!)zTZvYbQFUw?4)k2B36Y_)7M)u*`lw*<`w%#C4oGeHm*L#oXhNM)4?k0OwUn(
zT&b_3qQc>RWOQ5pU;tE9m}<f+00#JN5e1;H=aVlE$-Znv30#=*Zh!eKa%^sHUeS2a
zGPmfyL8R5<S>LPmdGbe^`+Bmfq9V>ke;8AXSy|IjX!H4cYRyU{ug6v^ti)Es*x1<Y
z!ZU*YIqNWBPcBP5AP^X(<nRKl%b`-YxN=9Pp6{;a@Bqb9;ujYg;QflqK_2LS<AJ~>
zP<OK1fhBk%HV#xjSoy$r^gzTIl_mp_Sb3h_>z~1b(?G`A6axd}bXXK?2B6*R02NF-
z%SYz`A$zZCOuka@=3?q}REBA0vP8?eEBxsx)6Ef+%jt~i900p7XVe2TZU(z9I44?9
zm$v~!b2kDx{=IO3HwRP#>1bbgM$$Z=uXdp~t3R@pE^)R>P9ZoCq_CVQq>7RarK_89
z-FZHvW13hcLh;*v0d9lF0*R7oTD4f+KuyOv9)p-~I!__f2BO=1*!A26Aor_-!Hw{%
z4Z&J~y+J{QoSvTUvXz_YLAKXNS9a1w$q$r4+%n3+s^tK|abSHQl`wV);Bc>MEK@vM
z^1ifIc+moz)N<*08SDl?<&r*bSBKqlL$WDsy}E8cP!5uKT}!=yqMHitn2l0^0`GI=
z#|;E~BYET3c5}-gHYERLvZbSideJ#TvLhogc>(T-RV(~74AXZdN^r5|w7Se)l^!kE
z=*L>3UJa#8!t|$LVhhtW`!{U$-zI=`{HhN0<A#W#;b+>Vl$2+&U!OB)S-KPktW;0w
z?oeI=WCpXj8s`I<(_6;e;YOxUO9YAFB5zq~DTl^F5GIiYKtRlt1F@o0Du)+^sZQ9|
ze{8&ho-ML_92|G->;i#x_S6g?n&#z|F-l<WK4AXx=-*#Mxb7ld?@{BL^izqFyS~`<
zvvj*<UJEkHtCkrFkG;g7e|>$T*^&2sNWyZ`sGb){<5=GoXi(RtyDrY0P8(urJ)LQ_
z97thb<VWM=|LklnLF{fM>%m`Ekc6876t!6NH;dGv@(?SgE1ZwQ*Yn<3Xc|#zUnNRR
z_QTi%xT$&r4dYrSQ#bBJJr84n&8(T2Lm5fx$?Ixk_Bb(PlC~W9h(cZ5eiQ?fiIi0>
z(<K_{N{^B&kAb9x&=hM<7;B7fbVWSpho&5%2Zi8Kdnh_+E6KLC`Z_bxYZ$1q2@WYw
z)}N348@X)3rg3khq9%uEPE1aA**mq<5Zx9TmDhPb{?w$YrPWNE(4boXu=LSE`&oWV
z%VMKLyF8N3_piREfFe!LR$F?}Hp&NMk?f~)`4T3QDv+<zDhaD{$c4>&OfIhHQ7(EN
zd&cTJW;3YBsCEZec$(}j^1~oZPtu$RCWh9kce4MB53)*US^iLdQ%RrJ&ldQWOnS1c
z{QLYiogAbbNWd^jxx3fh$6Q|<ZTzKw4G{h4w6^kOlixY89*KPW(NmD0@yttXjFQCd
zJK_P4qW8!*H2J>1z9!ylc1N_2CL9fUqgIWUJ|Q(JlKZJ-&oyhCrx+R<ILa4$SEng+
zFPy2qyvzQD+>0j;eYjD!G^s&nHCF+qklvrr;sllYw5Qd@gbPWpkZVxQN0Db$6n%XX
zDH?l-+GW=j<M9+osC2N?t998uiErs<3A!9Ck9MRnZ&NiRa=j%B`Q0+>O61acCg$jD
zezXTJav;qT2)(qDHhHx-9@m$Uz@U3wkTay5GLr6ZU4;?ZE8kwSHgfja-sav;rbLrC
z@o?Pyc~4J-96^k)y#cK218VUmM67FJp7<r5ri@=|1iK2s`mdX6=K&5@vYgmJm6(ER
z9@H7~ED|>0=hr8L#9E234X0yz2B{6Xc(vj6tU+)#R-eHqgM4EmJry}9sm0PfAK}m9
zdXP2_^i4G!Ti^dWI3Lo{W%PnT;Hd_dLmYbtR0lCf`m|9WHK+2nVs8m<drUs(GG!>K
z+YAJyy4O6W$4ypRiN;LoL3u<MCF&^z?mLIPxyr4Qbbd4LQ<smzztsjWLgd!07b8-L
zPu~?;jI_9k($j1kA<{EtVrSS>s9F)}nfA&iB|UE@``#i^Pu$vEXoV;M1PI>wEI?S0
z1US={zVzQN6&CelH!;o|jgn#d#37&Ebk<H2^mvVIi6kX8wHb;-{^r+b(IJV*T}X3}
zj*t3V-IUVJ$-LZ;y64EyYk+34<(@rW)dLiCOrI|K_*`C0weD-o#I@8CaICRAZ^!ji
zK@0k*Obq~p%9(yM<9hQUraZ6HVa+kc9AGIuIyT`wQlfj*ICL8Rf_=P8US|uU&{3X-
z3;u46?1s8<i*A<)|Ev`p4FtixPr5~9ivntI(z?P!ibY?<e(epX^B-EoNdju2&azst
zfc%{PBhw6XW{EDlXi1Z@rya>_PZORSa)nE;FF)x>HcY0W<_yAHI2ii%k-!Rxi75it
zipS4yz}?$vxiCM{VUajqfw{-46*;v@o(Ek>VGGto2Oj^T$TA~!;sul%{KRw>ZM87h
zLY(jc=IK0w0^iCc;I@p?96J0!Z{@)EQMR1Y!p(h8Xa}`kLx4jSq3=AzM>-E|G=|8l
zxt9&wXI-ykf`#^exP$v3qW(e+7$Q2Rq&Xd1aG4vhTn1Q!1XJUjOP*t4vBp#ycIN?m
z{_960RI`kACuObI4V*fIojahh7lOaO-&1&>`;O%jAhE6H3Iau`xE|>z8<`3lxQgr2
zj!VCv1rXwQ+_gDb&TShbok>f011W_mKk!-T+rF<U>KsqFw!OZec|Onjp9BNzo2*ro
zxn$Fft+uAKMKJ%T)t<h+Yk(oPEN95K76;O<I;%J6xIaX;5yb}-sucqjry1_+PqnJd
z6i-wEhq_?jO|)2@7F3rtP=Up}%WRujeOvG*4$i!F@3jRr932UCHvs>b>o$y-n^4o+
zD>1#?cWq6jF&FL%-U~U+w0vg08qUctqgC^4ZT{>m&Zpdl$*Z9ve|7#YLM&8<8g~`f
zV?LHQgCp_6fT`tIwp0>Od1XI2IccK6cNoCQpr}e$8&ZwR$<FRBA|oQ3@pOS-PJ-{C
z>3Fwcr}Q=2>k_)Li#os`E*>}0BwlqBgRNw#b=-aga)3{*8h0U3tM&e5Dw)M5_hTy{
zW?9s%1<G%EshS(>Zp-Qpmpf~WdU4B2f#@h|!EGf%*PT(0#S=&Z6!H%qei$?@vxGQO
zciOs?g82iF>~+RScsXsV2YXzQuD7Crl&oDuiePKrw)Ac6Nz!=-cLz|7+cQY*=|ggv
zP*IyX*&35$JjSoWfprq<S>Z}tXTe8|q+-7~5|b==p>gn2A36>3pSg|TI#6f0W$QQ&
zu&<x0I&NVBlwB}T8o&2ZhOq)zTmNx+4!gyJ7k2M;eBdw+1ChE7VYJS}m2Pkb*_zRh
z@YjPs>3tcOL@vjx#6qZjP{YQ&sAmDNx@HcJmE)$%!dNX=<qzE0K&1W|WVKCgaXpV4
z-y7YDip>E)PF!C~QpfvzZLz{?hBcPX%aj`(5{EFx!(Ok3@sK$;e#eVwXR7S1*goZA
z^F;*m@o05gz-^smMq(d6ovIrd0}ZnOQ13%E+5I2T`)ZwN98>FAb`Cq!tMA2jrj_i4
z9DbE+rebrCqAgbSd^pAl?At57hPzZ};RA_sr4j`m7%9V_d$5o>fEb`$=p_^x39FXh
z#v;x+&nS>V_BPp9p;E<^%PV4OdE&3CT3aXQ66J`=@ozIT+>QC`EJte}0Onk-iN-N&
z)Kv^AdMGPe%_mWE%ZF0{KJe>jJ*6DZn3YF6vjE|{JGXAqVY#f;^hl@A-|6*G&3JP7
zSQ|(!6X(RV66iH%27L;B@<_c|%STn9M)2d{Q*OGe#I(Da<$s_?FUl-u<M;;=$C05Q
zxMIn%TZ_Cj8n(Ud-DFvCZDee{$*D^7!-lXF2HL4Al4u^4(9~XpLd0tKV21_qG0gY=
zbq6OE0+L<`?Z!8_(Qh5!4Y)fgM--BYFq(}>=jV+DVaijxmyHrL!{NI8-J*BS5?k_?
zarNQRA&T(#OIy$AF^@?snw>>HUNSQHCm_%9pM2D?DF6rykW=_;<Je(JDrclcnK8_5
zUJ^{Fhp8DiKg5_&(S4diP|!U6*iT-nN%HzGN<iw54Dc0%lLFJ?PZKO{4}Nl>52d<@
zIgJbp8K<TClo)^jcEacMMzcWc*{a@a1KFu(5J-u&Z+?H5bCuZaBrYLI_?LxQgOHZT
zmXpOZ8BH=xD47#5)1emA^vm$@`rHQE%}U;=BnoY#4+x&Gky)cL47m^}!V-b|<X7WI
zrooo#rQtuAUwK6tJ~~S`c)1b1xlk27C$<0<*8le519exA$@ke{#!L~FbLOI%Nrq@6
zV@vlca7hxLjasQyIa3X%hf0|(UG26IxsvtN=3pDgZ2MGWJ>Q#0&xm@Pq30hs#$AZ%
zRaVDjrlxg8Ymuc78mz1=Scw95aqPU}nsKFhO}p$)FEf98<v$l)+m`i6edtt1uYGe0
znaI28@=q~_Zjmzk^>u?0I0oa!vSCz@LMh}Tu2#o&tgQ$1Q<XsGTGmo*CGwcX1rWv0
zZ#utGHhe*&RWMH_JCA%e8ZXgZ^I^-@uraaoee*k%ImMm15{9=4-uPpRAkYd?NFP<p
z8S*?+L&2%zfy}D`>Zctu2me3{YW26k!v7+OYnC+l$5AH4wgiF7Y|nRlv(+3PCp0p~
zD?hc7e$`cX2-r-&!@y}{GZ-+L<A#_!zojS;@R_NaxIQ}c&~ocXtPyzICVcNf-h62!
zzFbWScNRAJ`GmDaWlZMItg;t8sAs%yqD`scp%>UOkm&ier`JSZe1`sW++-IXlig{d
zy}Ev6#Ix*Z+Z9ecYiKav6JVMszmbpsW19KoTd!9_Uf@NmgMJw)Pl-uvc#0)(pxBb{
zeTbL)5#fCJ4%#@<EeI*EAAVvlw44@g2D0^;KI&ox+pJ6`b@*gD-bMX>XAsuY-p_Ad
z`UiZBY|!&$G1*HJOKtCmJ%{%;ABzT0bam!FAE{e-os$7d-{!Q1K>!%o$#Q#iAoJv|
zmZ1WvCiKrD+KbrH&*AU~7(XLJ;_)+Nyh3toP%KxrxekI~?+ph-Dw(vr#B1%8(-R`U
zgwQRC_uGCTjMyj6yxW!JZ3dA@GiQ$w30IPb3bXh_LYaAt!(mw#|BUwZhTw)4Y<Jpg
z29spMYbjUeL_?L6`6MAvnChX8MP8$znY`6`DxLbP?dhL)b4bJhLhaEcxiEx0&%>e8
z5mKWSwc~LA;}?sA{n2Vct?bL*<pmFRp9Y=#RM*>{-ilMFNY^L-Hfo&6xUJ5oRGb;U
zLXl{aRdBQ3geP=b<qt2h6c*YEJXNPe>7ev9!&e}(jf`?j+4BQ-7wb5p1Va1&b8)Un
zqR^G8sAjKa8<+h+(B3YId`Q7kN%w)#%~@?@*EiUy2#6IgkV#oJd1<C62Fp`lvpa9)
zhk`1#NBF-u3lx-Jms9pWm<*<wD+rwDFsPS*Ei5dwI$G^Ljj;9XIGuMK%E5-CB{1q(
zoGtmZ4rB^0rI25`Z=HXR5}1wT*{1P<Y`Oy!x&<I^y)E*eK3aoX1CRHpzKutK=*b)(
zf5Eaw*L@8id@>^Oc?jlobewrJ|0eRq4Ctd@iyBFV8%jr1k>y^Ol*ui1apAo2%xcg^
zRMyXW|NSdofuy?N9ROCI#HrKc;PQs2@QGU#=Df)20U6o7K470}UX3p+_Gac$V)++{
z;2{iHdtVnI9GF&7(=jF@B3ir;1t>}^XmtRo`KwLckNdp9jT<DwYdJ~J3*Z~p00EH0
zY4dGSm{r^rfZe!vyKkhM-ad-v-Cy7hun(|r3t7VcR=|TdAWvVLOyeo=84L+r7B6`0
zl&FHn)*^laAjRpjAuFo3)h&;coPY+c<{ieR8dkYA5wCi8p^ZW~p&C|A#ZCi}SI_Kv
zdneAzsVVIB4`KwhSmdSG>EN61CX2HsLVk)r{JSDJD$3%&yZ_NR-;Wg0ZW(GJ-_?O9
zhfq)6$0D{G%U7r+)bku!bX&n_Tuaa@Zf;({6U4Ra6#=-UIv!iOlf%RMYP#;Z1Ie$u
zGkmVy!N3*Lx|_(g>K`0u;1<UTsg6|;m{|mnoJ-(Q02a2*ummE9o|jZ%v%sMe%K1!c
zn9eMBhT#zd<w;42stHZ+YDx=YAbtSpq&l=GZ{k9^&h2d-Gw)IPt`uc!?m^A_6}*sY
z^RU#Wh7z;cU9Y1Df8cZkq%GWAs6dedI(z%jm|Nz3+!FAp23x($*(pBD8DJBMBNUvD
zfExa}+b6vMhf)1{kc&LUYBWd61$az?`xIoget3G?tWOO<o0<^sHH4?*MgoNxC5gLH
zK3AW6EfS=Ghj#Eb8Cgis287x>XV`)MAHLo?s;X`M9|i;gk(Q8d1O%j|yGy!DTG(_*
zmw+HC-3=n$-6$p94brW2^Ui(FJ@<IOWBlHK*aODcthLu(YtHBS)T6TjWAZEak}gvv
zh-!(UgjNE}&s+F~A3pA_3bpCRi7hi3%DnKWMtGf>s_y^c8gZb=nVDoh3q4fkZ{Mt&
zTCCsapD#*ZZ+^Q|6C~e}5jj=d=iqv|8a$82yT77n1JYBf4nUn*>i`6{Rc7)(TFO6}
z4#dwwgB<1oI(U#7fc8XIx!PWy7mc&wIzw4-CRJ#>AY$BwWU<Ia_9>6>mtoa_slmX&
z2ErH}i>`D^v_G#Nbz2bR%te^p>_=k#=qq=pX_U69WNH)z&A^SEGiwKvN_XeF9wAo&
zm;ITMVvcYJfDzQbo20juF}27(hz5~8512Jr=dq<TM`^Vu5tyq}5VaV|KI@>Bx!Me|
zVr#}$xB{;)cQY=Pw4g}$DUNWB{VCf|_irr;122MXauLp1-Fq8)3*b;b+pwCN-&UDa
z)z8K-t-(`-0?9mn2Z5aBYtsAmtJ#^p$7>ZlJ3Cv7!**%aVJUF1JO0&$p7J3(-!5MJ
zSJV8(ry5;5ke_i(PieJ_75GgDd+~oc;t=H6>Ogs`X{v0KyC*0^C9PvrC@Ea7-0@Kf
z-^($64CJ}~?l50St_9UdT1xHX%h5>hXH}>|ui8weu{=uv$~u}Y-uHQP`Hlr!WRHpo
zfa7%$K?137O}w1FCRB14Zf*j6Y-<A{W-8NVb>S&9s`JAy#<vi-z1dL7QLY+%f~Npt
zAjHKf``zwkq?{g05Suuwn?Ts=dYF5RKg$B)yKg6Q(kQheM-nA|F@{B@rL{l`+M+l(
zS_|i3H#VW_8^QY1yvqIM`Uczl!SmU|vN!cB8r17_4nw&-DQz|5=N1u254|jO_6}uB
zw>$iP))%fvR_a8GyMY}o?9U`W9hc#N0__|bs;Z6S1wX?;IkJ&b){szb$iTqBM%>58
zLQ~m(#XG4rwZ7C=uyX9M6BDPWrIqxGS3Ynew%69@W_~H%78yI=PSk3{W!4lxT+;P5
z^now08T&esPV%J$A9sBbegxS+Y(mUobLTgNh<zwINI$|~5YkDfpq@z&*PKa*Y*%9k
zKdoGICCwP+iAoY@>}oP^Xp@aQyLR4_o2prjrd#d`LT6Tcss*X5l6e(W6_CV|N?dxc
z`@GAWAW0AigQ2hlS5Uz3D?7b0fB9@_tDz}zeF=7PrFeQD3mcnrCdn|VX3oAmSC5Ro
zzhRd9|G+F%^g83*8`ffwD8oIgkyGJ~yunr!MpbK@x{tdNMd$5)k4kI|tuww;?;49q
zI>tb%Y&vikgghJ!t^8;SboNoKIA<GTuo4k&G&7WanlXB8$xmnV!}SO!J(dgMZn?5C
z@SAv8gNQ?Zc(Vqv4OA5@;CJzl?%eTzw>z4}>55rJDGkjsrK|05vy8PsdAxNs8d9|4
z_28^{WwDzLpVFOBReTn!f7Mti8BValOyhn(p`tiDmM1H(^VovWR~hXL=l2xH?=Pen
zBpFufHchZ`9=n&LoRIAmT@AnEKiZ}zu~4uUih=Akg5`{{6lK}-M4p)`j*gxwL&Ly}
z!$@FGY#t%u{%qQyZiT%+JldcNih&IO#H*F}J+8kypKmE1;=j;MFqQV{4{UQ>6RfE0
zY~`!(_Xo~=*caP#g4=Y*ndm9|$6hX<4lyr!D<@rk6r)3d?Mfx$<3H>mFIb`L-Voo3
zT$$`q%Y}Z-8wX*V3sSdrOk09Ax`Nc7*)57CgecnU3a?k;ZFyGNhWW7d6mz;AJOS$A
z`hNI|IG~r_WyUDj?WUOjdOHVXEp35hvt!Olft}nKIh81XVTz+g*X0^;&clHDz-`uZ
zn8rp_pShoW)GQkQP+vl6{<Po9_(n3F0x_*pqxyDtw#d30VmlYhQ~XZpMd}}oQ?6WQ
z?sR#`XZYd&7yR+NxN0bLwqAQFlB$}j8w8!@ZS$A-fQ9-m{Nsd=(;FZ#7gIe@qJz`*
zP&b9uG?W-5L9q}!t_I@G?ax*nfLw^GBsTNpm%s)!SOvEK_g0HTfDcMMJ?zw)Zaqmm
zs(&F+hdtvn;+v{D<qpThkmB>`AiA#2)J3;ACwg`-S+i#fTkP<U&E-A3jDC5D|4TzC
zDI=dDu;_g=zwB-pB*~Yh2a=y$IuP*QPaj5}OOE?%hwyCu9$kzv9Cw5!GA%j|^XpfC
zSO}p_x0u#-3~y+e)V&Ts7nmXO{3YD9{#kY(>^~>XYW*yIu!uy4(*`~*@`)Hn74{we
z2<dN#xNK61eQtj<F@O{9{l?yFQXX(q@fR)rik0ISbK#0sw8!g@!UF~rzwjSFOaRj5
zkQn$b*c!%`!PorgOCu9?d$of6-jF|>932RVVn5O(wfAGRO3fGGk)$+w4k)u$8bpd?
z<%YN4o+#!b1ZF-o^u^H50~~BU@@I1fa_nC!aLkMQ)=bN2#>m#AZx-6?8k9YGT%=g?
zEM8t!&LtvY688TicY>x_!hHU{W?WE;VB?Wc=}(O^$0Q{ARaoQ|&K6DKrexzi^&7|I
z^)H5d%gJwe{@uBR$gy9z6;K8)xJ~d2BE?3Q)EwK_sAFn1ZK8w6iWFcT#|Co?&V#jC
zl->{ZiDG|6Q2d8Qi)9DbK-yI>d>9_qEwmV4!(2Er8u~6Y?Pm-#_rELy1|gPpTbwp$
z5s^BMTZ>}`%zx@skAZ}f(dQ;8IS$`1C}If9ie>O0yD*^VfM=~EJ`Cf6JnAtlH<>+o
zeaQJ0TYQtw($?+wL$8OoHvcOiIRL!;Qv@WF5TTJ4pYp`3@Ra>fH`3YLpzx&%G6(rT
zct2m?3>g1G#Q;YnlSHAB-@RPZ)>SG!krD9}4D&cgp6kTSOn2-A!^r<$BN%u9cYJ^C
zEA$wAHxY}(yxZ^B+rNdMgOwtS`kgK3{}kD`;6%I$QboxI;3GU&9;LA6(&Q=MGCkaF
z(ecau;W_im24YQyP@7!LD;Xg|3&bui!974*uB_g4hj1MX=9Bqtx?Z*A#@)^N6E*y+
zTh9|R6ryAQ%t=H0Mo$kyj2S&LG7`ndLWhO*WubENEv<MmDvv3%mD}mvHd}_lgavlw
z++tI@GS|$yXLq26MoCQw4)M9AR-Tsl`V0w(S*7#}(2(w-9Hm|3!c1!YW;<K?Z9pjF
z)gj@4iDDPm^Q@&s^QRXPI=1O=fe%Of*c`vB?>Ua+6L=k#3mXE+Jh<Z_ds8LnAg(6=
z_cQ4L_;)-E5H3QJu!sf{w;P?>^X5m!KNr3Qo$VcTO`X+R7U&JW19l+L8hO><QYM5A
z!^7jZA-9{naK8&e%@2s;2iAlA3bi5d^AAq{52%CFcFClsrY1UvsfQX*!~W5H|BvLi
z^SPGM-Y6-D5%mXyZ$@kFbOO_q46I5XJ=SbuPB~8|C@L$v_-40DyW%|>7@Z-rIdnEq
z8bRzb)4|J{z{brD7iJ_$!J7e&X?M2OkAFVid&_P7`a~{(yOD(`Y&h8JFOe?E#Ok=B
z#yp4x1aT8C0|9f-TyW;hN<LZ%*=gs>zN%@Z`N=V+m&89gX7liTCqR+Y<{)ChTVlDJ
zJVfFLXf=3)m7YmUD5{s}S~6&qR2qo}&m~qEvn6vSIb#BT!j%e?2Lt~vfC3rmR4UU`
zw`Au9A?vH;QitP@+YJirjLJqbcZNqXf`4|y|3M9i;ID-;-|D9A;Fp`%NfjmYjrpfo
zSZU{qj0`@gSP|U3;fegi()kP<HeU=^#da*dnI==ddVWcVuq3CdGR@`Mr0HjxjX5*h
zv~4w}4sZ6(LSF(Rx?_RoA8!WsO?aIUC-%X8AGo2b-p%323a~LXKas8u_wkOF=e22e
z<u_MO<8KrASwrP;xp+75#C0>#olx6PsrkX<Gr@niEx!d|6Wxg5ppGMhEivA+whpiL
zoMbSck0KSsX_@L1MoFRUxwTk;HuOBV9n4F&zyCxNo^io?mogvT=Q+Dc8IHqN=%4#r
z&6^$Qn{N$kl4zVzqb?F!_bgKV6Hft^AiaR<SNd)8l&)k9Z_QIm?rmLaODKy<Hrnvs
zTg}}9!0D*THln7sq4vjY`RF;R{vOlSp9KNbZGp9Il04%B5BwJ2<U)#<7i!wH)w&0L
zCfB<ld;T?3nktrRKFQ`~wKeYJlzNZ?uNSt_iz<|V<Nx9Ib^qu8$%VZ1Q^*gwlgtEW
z2}miMj$SKEH5Qgl28loif$i%d0$Uy?n((PwAdt2fs^ph5Sl(CopMQY~J*6Fs<XJ)Z
zek<ykerxAh3B`H@t5KjkM#V&#r_K3#P#)^X2~}-=CaV6TQ@-<k!TDn?zR<r*yB`JA
z`f`kx?{+0>xYvHzDD{O-HH?rbd}5;45Q&F$l;_ViPiO}~`*8{e<73`5BLd|=mXhAs
zGAll}JlbdN=ADj}#xt=T*N1f2omdfOD{1#g{IM3lAO>eUwqvE8j0lg0uV-u!@tR_k
z#lAapB)mLeoC1&CrRl_+Sck-kLhavoaZ~vw^d6L(|Nnq9)s4nGV|gwhUUL5S>ZGKh
zVb<dw8tMg2fMd^ykH>8SIU)x@VNkI#kT4Hwuy^D6@>QT-8>gqI|CtUA58AJtxB2wR
z6%(6Qp_&wkSPnlA^U=JjrntYrl8<|;5H?XQ^&m0VmV8rjXEI^8w@^R%c%PdhQb%g?
zkK*+jNO^I|-~WNQaC+aqpRY9iB;C&ctNoEn7b@2bC`(eXwzR6uX0Ns~YzaB7%d<p-
zwe|GU_)kYfm+XO@XLi{8yeiRC;O<m~7X<q8+jCirkU=eLP~r(1(K-XDCo5=P-{izZ
zH{Z$q&Cz_u!|hh$>xXaEkI;n5ul%0PxT!ej9S|kq&K_^yuyC}UxE{@>E(X0PeD|=t
zeE4qu-g~(>9wtNUYDPQ8IL2^hq+*a@j<>{HD9QZWxO1Y=Q-Pj8w6#Lsm;kS+g;p@d
zQ0YvS(*a1=)JK<ot>-i<ehvk*B3}PQng_vi?)wcxLkdl&W3tdl@6zUD8Q!%pw&v4M
zc+9oXEZuaqeE>{Ufz(&p<i7+oe*-V2E$7;VAln<VP(Rvw(Q-5k(wN?YG@1-o5-&1`
zN{|40bnK79goN{1*l)`x{-l!}R7>8YG<Xj@&(1w=DRl_BpY`k1&1D`DuczHSytG_2
zBdmq6S?z_UX=-6&THinWpS1(d?~+?9@Xg-SDWz<w_!`F{?w(XlmGd4_pJuNED}$3_
zQ)o8f7?y$em8njpNeHp~c9v2_elW29uOkbl{iCokKz@nL7s7#nEE&QTD0u%nvR0mX
zv2_K09(*OX88-`CKhfkvsGi~aWo`U65alK<y!p1O&Rw%b7YUTz+Q7=AA0TLR@Jef}
zdAPmaa(g>0pt7W4a!yclj~G&=uG#a1D7jmh;@>6gl+Evw>zjA!o6=7i|Hx~)aSnbi
z0r;)nqXS7kSk-R$^2iD`#b1!LL~DFv00_1ChTQ8w+Uq=t$G#DRMrk#Oy1T3j)t+%#
zu<uEH&lDQqyIj-8IZ0BkkznZ9IoK9IX!ZM7NA3CP!?}W>&0cu}<3YY+;1w#rEo?c&
zv<g_d+=DM*eEzuak0bu8uwgO&$zy)HJ*uDjKZwlFV&ndSv`AuJPIfEWUiA`f9LHt;
z0tXYX8}OW3kLTMk`?G{Jym$@gaBze^G)A*ZQ<1+0#dB%w$7wWfH^VJ-@A5gDexF?&
zgt`hGi_Lds6xRxR67fc4hj`ICycPL(y};;YhW2fsFJT)dgc^f&hovm&aw%tmPziAp
zA}L}ufSH=>oANZhMn~ML-sSzo^t6YDLS}=G0{%P5BFJ&GFu3a1Xm+-T>eS>(sE`9C
zGg~Qs;#09Qj>(7X@=#2ThP{Mf*-Xw~SfsEOGJUto)z-alnytO~ERphS*39EFhJ&-J
zMb=Zgn!nPiX3%zjc252M#21$TmY6S4;iwOs#QrayV|)HNHn>=FEZk6^spS*;=FwC&
z!hnlE)cbh?KmSE3$NGGKcBvM|=fV}l?0=dEz+}KIbxUb+F*~=yD7(Cf88S#4_ISR_
z7FK56WL*2;G^UA*X@K*4Z!orln=(keSwMZBSy2A~nX}>MBDi0$1=2-{DH!%Xv3S8I
z-0@;d^4vjZacr?pR8^r{gz)O)WBJIZMF#A`%nAf`T;dx$x*4-ilS+L!_q3(lXbHOl
zC*nQJ7=Q$ZABalOrpp1$VWfIZ(u04<+a)rTm5ZR0<(vvXdF1opaRZV<4}g5GZg=-x
z8S>nKJPBh45DGMHCR^=;^;o9KQg8lw^&Ef(6V9vnq6qvrFpIuMsEV|FD^t?ttenGT
z(!=x89j62_H+Of#`<rQqvsd#f+vyQrHUizB8g|mu&SQ2vmagI%2}{g$Zqad)4j<%s
z58fTLme4(BzU!seuH*{j8b6Ed-0Y@|8p409b%gXP$fJjGvH-uU@bj)04;1IfJEk&z
zjU)3dAs?JO0FQQ%+}}QRail;=6-=utmR3D`ru&zM$MniU*!gZOp>>^@Ng-pdOa7*}
z1!UOK*v5g|ZPRBcgLAYYU1{+#TG;Y9!zf#KfrL<7=uXt+gqA{#K$6gSszCnIy(8rE
z^g9sC&Q=2;xy75jz;UZM*Skum0GD}6wZLRvgQ~=pOgKle*bQ8?5=sYa;@2;dE5Fu!
zdq>a|XmKh*KzoR{C^sz_PXEk#Oh0@zo96S?cYw?dPh?O6e<UUE7PP{32I3^*kbSR4
zmedw3_Y6mAAS<LP;RZs=G|SNJDoyUx!9l$n{c!xzjl$t?!CnzW++&~Oo9@dqo7wM1
zOXOv#TaKmktP^ZN;9-UMCtN6|11*SAiX=Mgp^)xGp^9~XY$5WvU5+cfx_Gl!Y1r$?
z=Qz(Pjj>)QA#c9KyO~#a3Kv|o`clfuO*QK?@}kMmICv4up5muiqCsjy%iQf1cgMMn
zaaD`9o>iInRNbA^n}oc<427*2*YszNn69t%zD^fCijZSJtohb1v0VFTZTInfciK<J
z(8BmECm^W){aG=s|583!=c}a$Tae0Nijy922+tBD|8HAS_8+M*v;iKH%@NaiRe$zY
zK^NiZrp%*XGE~oB{^x+!l)Frsr{1h~&ZdZpN=%=XNwxt@YP-|U)k|GCQ|Ct|peR=(
zVIQKWP>@Xi+Vrx-Vg7-O*ZJ;s<`nm6{)v`{lh(8o$x{$};1(hEk73E6=7Y|T|8Sm1
ziPy@ejm-tx0ixs|>(w@)-)B7QA*?fysQfM3^JI{#rCv4leZ41$-<tezWq}RB)4c}x
z#iHZ+>d&(V`!$K-wVapU_YgP|fJO0wq^5mn#440*bY1qEJ~5+XNI$oye8%bbmldIO
z$m+uo_0sL?#BV(vg%=7o_caO!3n&Hda0=7F;W&`7fTkTgmp_vx-$%UPZ9-J9Io3GH
zDOPOGv*2xaPh~{-KKR>JM)WVy+p7?}f1KJvbkKspWi!d#{mu)3W9}k-&aDfFvSiXQ
zX%78#OmjC2<wgNq;bWw3oyfGBpt{+*6dlLX8%bT`J*@9)ip{~mqxwqKYQh3##Pq5!
z=nBXyv(uc$q*<)`tNUuFD)=u~0!AUQ3jt8R#q}%uPN{*T+pnZHc)(_s17cJkVdMUx
zT2ErWp=$$stk~gZbB-tBoBVVfW>Ko(Wq~}szIyHb6D-F*|6-?M@qCrts9)pcd&K{m
zJ!1*{KI?bo52bTLl8NoRG0#DYw|ghFI5|mm;R88TOV!;_&GQ1yluVgr2$nY1TjrKi
z<<s<HqpdX8bUmP8N=KAD_>_NUKP>prJebDIRmo}n7iQK^LBQ1fOJtb(!8Ae73`o4$
zwT^*e*c_TLJk<zIydSOR83hq)4>ur?qHZ=?l4OS%tbKI}!Tr%E(LVQEJ`rv~pMH{)
zjpnbsRE`N&pTGS$-E(BUtb4cG$ONk)V3MhZ8sh5BW-29bB3Nr<O9QZk9BM{&4!2wm
z{kuv_$HO@vz88-ZtE?L;jctN-sj!Xv4uegk|2r!J<Ao)3mz=n%^z9q?2~<UYX)3wM
zf+UOa0G(E$XV-zi@<>eo-bC6_ALovXdggQiMC(~V1EyVNMGip~db)6c$PW+TwvP0g
zeV{910rAQ#%D63$&|bGgv;0$D8Tku(9Nj`wV68zy?Rm=b?RRV!ELZnUz-OJ!k@Zl1
z_(@H+O0^f$sbCzWtzwe1-$mcV=Ru-AyKz<Fxox;tyznkgMN)I#!z$-h#Crg*kXE!e
zygF-4nyN0JAIq=CIpftl$TK+o{9Xl<GM*8E7R&WH{_IK<#lou$X%x-Gax06Yu-F5&
z;D1}@OcP*ur_f_0cfQ(4@Ca_EekYT){B@HQ70Xb&fac;(7&hMtI#$57*r08x>bvD`
zjwAce;hwZ3fa9G5P}@zPUZYrZHH3ZZdO7d64?VN|+C-LR3Xr?gM1N<O63qK4u-xYE
zTbDneLy60ht4CB(sb?Jk+4)FNG{0chMTsW+%!K6xh4Y|;li{bQ$xv=-jlFQ>XRP<c
zs5;H&)UCUSv9lu)$J@#5i(hObyU$`Qls#H_s4>Fyzg_+7EDQ|;VA@F5oDh#zlsJRd
zh?5j?;bY_u)zp;BlbDNsRX)w@jMe6PBv00_Rwt+5KMp)F)jd2a=A*FSp%@aajwtyy
zy$~<CT-W3^-Jue~OZ>>Sq$<c(9>tje%k#SrV=!gm2Zl|rZTZ1e)?6zqKW5aTn!5{v
zg#Mh|f989@z)=H7oVtUaLck|K-+T#o+*&E)REroktOqCY68SxOi_6KER+kTGQkP$f
zZIjdMPv#EK>|}K7IDQg4m?Zc*fl&l~?_A$Sz64$JAs<g;gpGI3ymsdJ{WpLZDXK6e
z;XeP?_g`2JWFvK`QxMCueTr<q4|x2(Q~D?43y12+6miZav{GZ)IFWBh=1FO;h87MX
zG$G$Q2(?SRPo@spDXkiZ>oTNsuS9>%`9%I9-tc=5x4QJ`=Qt*~sO!&RwZ>B~B+_n5
ztmc~JjYgzn_($W5cd=aVUmc&<6}u0~%zH@ZgnQW+x)yn>=uBLFq<Gt{z23;=uuQti
z^%^Bvkn?`ws6pA48c9!z13e-AU#r?1Yv{Zgoyai~pz%}{pQ_-{n?0Z#b7k0cIz(=o
z+oIT;zQt`o-nx(MBi)GVWw{&5IY4BKpLH}Kvchg_4wi045Pj3$GL6A&%9<z;x5uz_
z)muF!uY=Xg<J!^9a?Wg7>^d%B^CX#xemKT}WMM;^Ev_<7f`ogIp+CDXO(;Ub?r-S$
zIrU2ZVw0Rb=Z4fTt$uJ)aSli0=h-xGaAzaIN2c?jhHULG25T-dx8kp<jXjLaZV4DW
zXgh32W}HY~yccMr<3LGaO=>Fj@PMrg^woLDud(q;li8_7Mp7U?sDO=5aaR}@T#SCn
zfAN?2KGPO{q1xl(sMKdleA!{s66qNez>QYVi8+FWWZ>T^r0?g&*cE|>C>=*WzFLRU
zqjGRAXX>We-m&V=EJ0Yw9xs(*hxv>1xdS0qc{wDxwY`?<@`rlyf}zjDF<;Z$B}t_%
zibE}}zGqC!9vZyPw-$H@UPH4O%!i#~bBv^Ap>9AJ=k!fW(aru}r}!xz0KU%g-1-BB
z;*n#;^B<tyOkP<1QLcrhS`7NdP!X4HH9~Zij!t8-4i?1V_3sDsBl-5+ucbAs2&Y2m
zPFzlG!&SayA0Jt~Df_-d5c4{72|s3-L8)0<?$EqcI_zQV%!j>i5<fhIg<$RdptBG=
zfnYd|eLJi$5t;8wJ0I7`;;E40Y2C8dJGFx<tBkKIlatsrQzxkcTl+2Nc1t{JUQ1O|
z`c{0zMLDvybXM~z$v^b&eok5F5VfmeP1QHnuxQ9?E=P&1^ew07IvZn>ofY0Yu7&-s
zmS~VxU}T_3Xj;#D6y9xhgjJ%~^e|3bZP+=Y+2cTkgaqgBOGXBLX*UY<)to_FL)T!E
z$B_B}gJsh;xsS=St*0{*xnyHWQS2cq<I8|^<&R_YUC;4!X}?G-nnQj}Xky|!>)zGy
z*{XG1=KQnTQomy1$6J>^?&qYhrAodIPeC~5Q0<*->h8nmle`8Gk2Y-C26%qXm}WV~
zId{*D75shlzfU5}_n|OE)|45XkCtD1P2gSjv|CeUbw|I8jwoZAK}$kB{V^^TjWnCH
zVYrUlptAd~QjW&=!fxDt&3YUz_~fV^GrG(>p5pd?vtdhLPK!N*Rq6fRtj<T&H?)x}
zD5WT1`koI=UCljC>un4sGs)G(z<`H`6&3<7?pSalGwx#r)Q&jt%0fb0Z)LN6-rA(s
z7AL(UgVeT<#Y?SFSR!&YJa855B+D9P_S6w)k6&;q^$lqgEKX!g&xbsc@m%yHdW%XV
zi=pMzpzT2DG29`$@4$`s_aU}hk$P=iFYc^O%=ChNF=|V@^d)fNd7i=Qqnn|1MpF*5
z)=SKl26GzuH<?h3xtzo*&*`ozrg|;7M_zUiB-|{MjvlIzWPN|QdXdaKl=|qUBL`QT
zG?KnQs!1e*W7U?qM4U;>E02oCKw)X8|9mL$lF3U>CM?-FUO?`M3D1KMizf0GovkMK
z_0V0Evrs%|+_SNukn+&*%-IYz>|T5dxypq$SLPNkk>~v0%n6qlOnCj=+-`FRSSI0*
zQp%U??>9|`(Uqvp<9fc=O>C(Cd;Q@3x*o%lTSrGa0MPBU6NPyMSXbznnInOFL<CRI
z2~$3uk2??D1CWHLJ%abqQ^i`Mh-gG32a1AsPCUQ?mU_p|&OQRTI;kS{l57x<Hdb%H
zW&wIzC*ZhJ&Jb8~CpGVqGSbl@Tyz*<tk&Rv<#SeKkO%ZR+|i8_$Z+rsDbfpHpPX3S
zCn{@Kn<iir7Yp*XW`%Fcy~x6OmvUzrLSb!X@$GW#%Oq+p+TNgH;G-+JBx976S<6Yt
zz?YT&H47(WHlw?R<&4W0GE;K=L^-6V!M_vs&k$KEob|R+JqFbh3R&UFuQId5X*VS@
zcBr<{*zkB~q#p!+5@9~(8pVW%_e+oTt(Q8#9ez`qd545#q=GL1@Vn&G>e|||-e}65
zm5u;-xC_wsjzfJNJSRWcM9Eu#4jP(QYx!`0H__(fLr+JS1<ZfSwqWp0?p|rr(Dd+7
z8m}c_D3cd(o;@4+h)KB(WZt<zm7abNEEQw?7Ym}|;@(_r28g@s07%RMLM|aZ#S8&G
zq!iRQ@fq{$Pq)nKecHzb>k=f88%F)5bexq+_4k6>smLG+25f3a&-KyJ;QYUp#$+_>
zf17NT`mDC>9=|=shP<~GvEUh%oTGgh02^_EJAmW2w>@%SSg+_p#ciV8yM21Jm~xHx
z#K@S>1p3hawU!VaBAMLz+=0YC%Sv@!T{(d9<%32g4LVh*5kRhmd3k}*N>F~~^CD8Y
z)7`eDVD{Iq^o_vaX9<G2PQlQQu>s~bInkh}S<tyZ)j%#}z~#E+RzPC~KC^%q=nYO2
z+HM2EFgLGcHghznnK3!+ia?W=*JVEw#CShc%79z*Bl(Djrr<7rGwGR9)^$?xw|B;p
zWUwbaPa|Y4P{SJ@g%-+S)^bO~P-_mK=j8NY=@JCMJ4?+BN;k>31Wnf}AGR=jIUX}t
zwNKI7;5FmwTZ`LUtCr${$q@0!o~MWt;>i1|Dy1t!O#9|Alm%O^U{TGd3_{^(<r+NZ
z_s^ncmq;0P_5l@_UURCn{^yfqZb!4zHK<u~C)IIibgB{>WU5W#R1RDMbM^L=pj}9S
zdilgBY2*@xpks{AxL>-x1VLgSyp4n(bqw_V4of485PY~Au9*Xd3z1+n;-573H-t9z
zek;muz*g5iDeXrF1CQ}OclZ}p%W#rzRwO1r);g+KgtV>}>2QA#XL&V0i|ymZ*Ivy_
zZyqP*i|kSgA#-Z>;3mX-;7H&5SCjiwQAk%#+^>py%{03+<5?Ezol-gcy7MB*O3~gN
zZmE55;7KLh1Tjw~wK4h1_9}+u-^GTE1HLl7oKeFjED9=2L*r%&m}E$5C&uc8$axhT
z1ti`X4ie}xgYjaLUN`3)la|0i!)`f7HP`CJd$80puhBzG=25=lwMMv{36unO75fN&
zxk$d<EN*N3T>f#Wgz(a(72$&yo>*2-9JLr5CDS<)60a8DppONa%nQSD$<%9C@>1^+
zpUlEOY0@vMA=?!AA-0<zmocuA;v!x*q>u7j*b60z&_KiEoOs83Hr-8H=dacOuaB^e
z4?X*XP>gJVy?-a?dtZLB5?W){_aP*%h_u*Tlq^QC>8&vF<@=d(qoLAO31q+d97at_
z*Kv6+%(?p&M8Wc+4P6YW=de+%3h&Y!)~HYo`vQEc;}%{qe@){p5g|2x=xA8W-k`Be
z`Mpi>u723#`R5naKE_vuUx_&)<TQFY)N1H4WEdIeni(2P)cllK>H3h!dgB^Q9rn{_
z60WXJyRQaP5&qt<;QdO4$o2vvfCNKQ(s+NGSwQUdM_ekLPENyin60gktwLk3$&kMX
z{=#?3sjOtx>+T*ZM${4&1PDDZ4lZu8n!`&ZlV`BSkO0HwBC1GEot6v%3$n#Fs?Dk1
zNxP&E?HgmUd^PbA*wz8<9F|2ogyHyKTdnA2sCx;@^wwOstAh8sGDq&oU=bM}TM~;|
z%Tev&_~m5fg{ZQs)2>q)zmbvrr?L=Ir;M8XfNC{OKuB0~HsFkBr|Gp#6?1*<UNXje
z>}$0*?^Pj?Fn`^gKS*)|45{N*%TTQj53myLn^|UPsH(nkbYvrE^pi5f{8DyO%%fR1
zQw%rjq%j&1`G{|TX&Ccjd9(jOH7Z7yP&W3)c=<BpDca63WJn!W2JIPX_U-1<#LS$K
z>Y@<+DYC{5T(oaHc}aV9r2xGM^^OqDfNE=9k-_PN$KZ3RxCA~?91UW5r>R~G($_HQ
zDO?^bG&dx>#MF@D>PGsd<ovRp`kuW>XNbl$M`mmMb?y7*lTO^SVZFPFv(4*8p|eCp
zoSjcnIig!@2fRpLnYM6dLGWH})uma%U1BFlyFI%U#sx`@!73e8#xX}m#LnmD^Ia>_
z9*y8{<LN@U9M(tt)(@q}4_Pn$N8Ab>(0Psek~=R8%8DCK(_OQicRL<v1+S1huFrk$
zhadX35y%jPWn~#EHkfnLWX%cuc*J)roXuf>`>nr#8!yO;C}*-(G_j3AFTo?vX;fNQ
zbW~x3SbQvQQN6AH!l;4CDQNFU|501ZE5;@;lLE<!)pn4?eQ(?QbTt8e?8lZsL2X_-
zb>sFOF}u$yOKbI`qr&ysf;Yr!is>Z4k*lMEO{uP1Wj`!1H;#o;uFUl|wRElkt`Nh`
zPhMuaWXx3ls(;r8le5{va&d1ES>_D4LtIN<jbjqQ5cl-XUzSz}c~o+_e^C|hc@IZq
zTFu$@iJy(RjK`5t%g_i#g>a(Nrn7TF1#4h%Lw=R`;_{V;G1JxJjytah(?LrxuSYAt
zee?NJ*SW1eK;`&S@VWdKx*p?eIVDbhDA6d>|G=n3C9!~_=@TzEQyKK@KIFsA+dC8E
zEdul~q}50cC}2q#hY0T>#qN^b*xxVHdCfab&@6V4%ft;=-fzgj$T+EUjJqh5gH#$O
zSJ`}bxG6Cgm!cRjNuoT>8~l#g4Q?xvR3azzF1|26jd5bE4sV0VIwv(qw7QNjW9li>
z=-rG@HSfkZNu@jr>z9E*mF{q__Po=Z$LytAmGVdaxrIFcYoT`@7g@WvnweNO5t)Kf
zsBT_;`JR_g5iPCxAdz!!J-4naE5fRtU(_ya>qs*<Kk?{ru3zOCZk{q1C`H@ro~*F!
zd-Lq_sa(8zh5kueQcu?PAfW`y^P$Eyy}sbxP4{Pfv;&Rp=8HLA1x3q@LXY|aU0aPG
zY$_M!cPYW`)=Ag;P6$j%1S*HCa^%k!(3iV92=p{!z~h>WY&CR#xG4MH(WVTM*KWo4
z-eiR-QSLDs(HFH}fey&m{x_6m@+dix-tYH;3s7;(l#t)O8jaJxd+8-nq6y$qc~&og
zD%@h>^{_)bPv1EQ2M4|7CDKnf>hXl@Ur5z2G*lMw#NV|TDSpRueQ#vU5YBi$<?%Kg
zcPea<6vr_~w*=1X89f%4U1p8Kj~jO};Q<W@U7vf^7l-vsyKgQ{AE#>0$-UE*)u)S3
zzExn+o@_cUotQ`EY>u>zqCDwzo^iZW$5~Zs8DuH?Lf7W|`23=@%~=OETs@vCUj9+b
zS?*;7WhM1>U0L*AJ?hax#jk8L!$QPly<kq^J3F7GCG%Yp1%wzm15wwbNYZDQ_f|)q
zYlkU6l?m`eF6Wk7J>K5PEq3w$p5wI|0uS$j$Mh}#ezDa~#9A-^!tabH%U#5R%Da>%
z97d0HKUO1k1`X0!6J4?V5PTDH{?Xf7_|hALQ!LG7K%2s=daFXXl?<K#6r;l(Q5l*r
zu*-Tg3~U54Gfa)!*%nM(I+8;Oe&=NE<oBb9!Sj;aKDcR*$+*tdGX}a2<+-HcX)WJh
zuUU+DZp`n9)I>m{pD1)zIvFsgscH&+L~<MdI($Y@-{5w2s-W_#aUA`eBvXt2`Go}7
z58BAGJt2G%Rz4Q<u*cT+`j8qK4`28-ivO(LNuol^NhWBrJB3>mppb#wUjF`&Y5@iu
zazcf8t%_LcjJ@3<*j^i@*r$)&_DbruLFIp1XeTI@J=l2-WP}kkZ|w$6Kw^HL^WKyt
zTpv>?lt0@6keMmYG70xARG@zi#`DS1ra57SKWF={!A$EfBuVbO9epl1UYR=6Ld0%^
zFY;_+>g6&5L;U9g)ot(Iv3Q!6ofuz1hKV^Y3{H<F*j-CJN2dB$#JN0pr;CDK`b1_j
z#Mv=VXoy*1cmCU>Z2KXd9{M>BZO6`Pp8=&c_D3uf+(9PoYN;j`eE@9g7GEG0+Ukt8
zp5`%Q!nrmd_cNRV3%=Nyj;d-qv=+O*1$bsid>82rz&R(Z#?rm7cPzNkNPrhUP4xy0
zcg?x=WPW*_%I&xf#&){e$iYVo5%NfqmQd`5Ai<^i3Tb7mufL45OzdlLRD+jRdAxKZ
zFgQ4zGn+9b_9@zUaG*{dk8xlf_S%dkvSGhulZL0wo!RJNB7c!bxGoAyLh@WMMCdCs
zY@<UdYb}`}1=&SuNDvVU90{D3tq~YO$C>7N+;<9Q&SHO@)iO`Z>zWq?vduZh-$0^C
zsqhxHw|ojk3`OjzI%8=7??z`J%FdLIEj30}ndb(ByVpWdS*r}G=Py`bp^t;=TY--`
z?Z`7Yp4mVFN@AbQv1uNI37pbU8?Anh0{}{IPB+9bwr!9stdgIsk79o2`li!m+9;8J
zKdd;q^RE24>tG04-&Bo-@aKRH536neihvw#xo(U?tR7=W*_yK_=%{v8>>C>~=DXvf
zh;Nw^%6A9LLh40|7M-z!lH|vDI7u~stX1>GNc>Yt_>zs}VD`Fq1}UQBmh0Fw1cJ$0
za&2Jd=NV$?i!i7%*b68{&%N4vBOl8WIK<m`ORB>di8cXtL9~opl=~x*L;l^qxgh0G
z$@gnO>+?Y;OPqF*-bbZ-oof-7VEIWKl8Y)^!{cLO!Z)(PB{@KzHhvH785&n(B-Kzp
z+3o*A@Y#y8kCW;}y_D)Kk7n}`%pITdV}G-lN=JpXJgcPMq`_(R>Cxg&O`*8F{z|g}
zp14$x{mjG&J=+Xfe+VHa_Yo5OS8Z&`SBD>2@)Z9rKK*`>k$sR~MWS$C3+LK}03j*;
znX=7<gIU*8xFF|uNHA})Uu_R%2pZVloF9x&y{;E<$i!@7P9=4oPy&()=A9RD^R<w1
zszDn6QWvvXFs?~xMhsdY8mjwUuzf~8Rxj~)dHH-@Ybq)xoO%l>Zf23px=K)#T|Kf~
zICzmtL++O{!pd#E;yYEi!xH*}`PbLq@enPEOMNsDNcvPdWOSz*+90#?C^Szuvb=82
zLc>N!@Sl4yeE!*95WA9FP|7cBC#_8VPXmYqM;zvh!HKd0hCgak|1K!^r^e1jX~$I;
zg71RoLY~jqMutlZ6r%Y^FgzJnIa9=*G53{!x-0vn;F3auq;5&Jq2r*`+Y)8g)Y?xJ
z?l3Mo)dqB_aYA$1qttQVUgE5h(T2=fmndc3tCW{h?=pN*D5e(fm1_8J({)K6btWu{
z))uK+DuwZy53@II6I}#u@A)VBsW)g03zgD$n&L-B&H>GhWfEl?<Fr$x#r6TkrJ=Nr
zR)$7w@O#t#R4)tr^U0`<Sdcqa`^P1PrbL3eYy$c@ENxb`L*oBB5b-Af;zWGYa0mDf
zfWM=#+MlIHs&Oy8-&6ZRSHaI%FW%TqWn;!KmHMey7;$e&_b}>97eO7FFmExpRgvXH
zG`XI;+@-E_VQk-At?CGto-i33lC_2UhryY3kHqJ?d3fOFupouIr_Db<qb~WY?3W^h
z%|#M=YqPW;fw8$<UFr7)pmU04Dpk$_L-Gy@FEsmPn~E&u!<Uo4dr_=%){;q!UFt%x
zw9@=ZaM5Y4q&c<l^)h@UV-Xg)h*{!K>5z1cM(O{_0{E+aANdVXi@o<@rtskCy)bY{
zC$>7XxC>X_vE7rTgecECyCW_N(IPo)gpcDL3O+38lUj$Mykpq-_X#CK$_@2}Czi%x
z8HHY%F!FYr=yLp>R2zxm78%vCOGt>c(#fvWJ=<jI?w;gG6dUaueX3QLaqV1Kp%*8S
zUYImSrfa@b_=oq)#rYmX1kc0iLx^m}QJ3b}YkQNfi{f<FXZtG?qy*k5y(r8ii<Z%L
zmg4aJy%ia;5iMIpY551H$qz-O_&0MthdhA`#)GSRqVjN6kt5DZA)g>$@VziEwyp^e
zPdr4Yio(WB#`lXq&&cY-nRXq==v$1}y=&1@v1C?cHZ!k{E(BG7MVrMD+SHB>s9Eiq
z@jo;~!tbI@WTdFOhtw6s<L)>+x(=nbtBra^c~I&i2)|9r&FaV=Y;tO{{Mx4SsY@~|
z9BoP(5oylw$%3wUBMEH1j2YlyrVXNhK(R-7zkMOHGur(9cJWeW@OvY^-_qT2-^0w2
z{FfDf%2VM|hiL2fW+g5$3-*&NVyHKwgEK!?gPdS$$=-xK*ZW*zJ<|1iFX7oJh3SU2
zn?~4rmd$K8RvIfz55xKNSVxeV_!!@`%u)`Mns9Z$xDH?TcZgE0kA+s9=jCHMp^?Wb
zAIg&~dMm(f4ZgZ&Ybo&62`*q@bh^}gWjpZuw;M&k6kES^Q`!7YzA-VatY*X=H+ds^
zFt5%XK?Ggj5x>Lk;fr5K_38LgnAeh-3U%#g&3!hT{(Yq_erksE9n03y4oPz~eXcoW
z5(&3qkLKy8^d90DqCdw8T@Yw4BgT(t5QL|?ZCLq}g8q4kM;I}qGAS32jiaTJ;rwAR
zwsWR(&PbLNkD{e`rCzVPJS-&2QaKepxhsUN=kHC1=mn^S>yc=oA4cWZpl*;sQisFO
z%7Zr|C>8H7V<1C(HhU#}HVY(lj|~IY!u3d%jzZNfryNQ7-O%y+(RX#S39)B0twdT{
z5O!qTp2CLoKJOBV_!||FM*0|7LhGf?sv%4^fZ?k{Wu!8uz90pe3|D{;<?>PRBG~=n
zKJtd1#Yt!d!v)?Z7FE%Q2y(ORGcP_E&GGJq*oQvmbZ&j(nvsoZxycV<kHUZ4t2XdN
zl#PA}TcqZ%>$h;dZm;cc_!!?N{<F$Cv|XNw5lvO3wGILMZ8Xax91kG`Wx~X%y2^O{
z{zbOm-GUYtA@pSL<sU@c9`F&w(7;({_E7pJC|7P)_xOZ8Z-ODWejcFAg9YxZAt9J}
z&Y1d!Vhi0VK|>KUC+RP-k;<^L6Up^faXK3Z*b@ZR$SetxL>W;jl09?-1l=@djjxUo
z<9TfcrZ}s-C4B=2Csx-=IL?rf36EPA2l^K+mY=Jq4&hVcNEs2cQ>XdznB8O=^R*{U
zM`W@e3*mi;k41swM}d7$khtg5vw1Fk{qw82Ue=FVexzQ8zVl342w2H7$1%3qn#GzV
zl1)$px<ViC5tul-VKOH1M5%|<oz+I0!uy+-dW?roWn3FKpS^W_#&UDfc^P%b9r~l=
zI9^V-^s^ZuZy~3kXH)Ou6fQG&T~Wl1wU&l-Fj{>{MDaNGdA@TAz)hWItC0ydZEc91
zQR@wU3H9i_fpcvw<9SCiRz^0WgtU_P;Ul=+x+%#%Fp^-?LE&4QP$3=pu+(44wff&G
zoZp#x-CE837(kNA*0bwJnmZegv6$Ru<#C#@n^V@e;2iIvU0)xi8xQU&G5@5ZilDg`
zGyzjy8;A){WZ$<)=u#i*WooNVk{SM<!OB_yfx8bMj)q)T;?+hLn{m0Uq`%M<8O*dD
zrZ5>2tbNE+5s7~2rI{y(VIchECdb>r{yrn;IFFo^@FU@tDMN!1$)r4i5Scq+sDty`
zmGNWAu5E+Y4&|Xdvmz-scw;!>-(riTbh@r*v|p2i->kp*{6f&?jvy@s)l?Hb7?tWU
z+{b#FDL&ak{h2-9&|McHmch(Cqr7VNeV>MS=e8q-gSmN{>dbR=;xrZs+K$P>E+m_@
zCv{J0kg8E&uUczSTDwXsFlxIFDdSME;L&X1J;eqaRf~6?%X&F=T9(4G_!CkuP|R{v
zUKhwxOhQy$ojfmZ2@4_%3QfIPYh-E?3$9-UBxGVo)r{?JDQ)Bpi(Q8uB(@tx3Q=o5
zoUeqfve#uMYS)ouf(iBP4vJpIe$J;~wF_KcOHq7^X)p^YQzKEuN1k=1fh)avc9_=(
zx8k%(WcpG#la}1~D0ZhYJRwIfEuo5ci0+3u{RhUUGTD@%AmN9Dg*PlGExn~nZ5ep+
z+BWnFt8&(K-D(D+?i2ZnzGSVQ9&0s**Tb^w5Q%ItdKli<$QcH1JvXWi?#ws_EHM^`
zQX)2-AD@?h=#h%IyGhHJXsmBddR+qBw0qYwmGS9NwF?q^^S!m;evP$EIO0FxTq7;q
z`_+zzgjZda0@9riE@P&NjugAki@H9PJC6;LcY&=|=t3CAgppYIRJvZNajwffp~yc*
z_f3_CWatOAa7OD*O`@#Ib97SW9siu2l1+)0P1Fw&j;$s524D4F47$9Q5Q^p9HCC_7
z>!)T7n~8rE@zUaf4W!}e;xTLOq#R?Pm|YHybN_xgkuM1DhFITXS-CE2e@C7Us7CrI
zGCgVA)WodK%Jx_J*@zC9uf}0>ayIA_pdjJ<D84=JbMj(<u^ztQwn&%wR6X^sLgA4t
z0|A;CGTu~aNwb+{Y*<AeD&+dsg`1f7i{b=<`4ndOijB%#Q*U5WRWXC8I3j{(1@lPv
zeCZz3R(n#2t!t7>qFdrcaO+VQI~FlG@U|1Yd*_j1LBQv4kDeF}{6sfk(!=O|4DTNI
zrF(w<tvcOFvU3`Jd%#7pKhc3z#&(8@yiw^B5v-xFf%dPj&Vvm|BpH%hTxjS9Je+Wd
z^A>0!EY3f8#ELv6A^D~U5YsF|h2PPPgD#u)l4a<!A2PVn1s;c_Xyc}mrJnup`f8jA
zVe_)aBCE2UAFO3_%%{E%lRu8Ap(Z7IH+mruqo^C1gqz8>1^G$a<L?{dTTIP98ntFE
zCWNHJ>AmYP2<n{`G4OYYnj<~pGkql%Uz_Ehv>)5}t;AwYW^FzTuZJ~BhQe&O>s79P
z^kbLBY}*W>9)(}DEjqa>E<WGvs#PM|jVcq2bqI=8FXHel#HxLYx|p@-%bxU}0zaaq
zwNjK8Bik`O<LcE@te%bSY~CT>?!1uDzqUsO__-6ou~{QpoF_(~U|K*Nq&slqS%8#{
z*n|X0m3+CZPfU#;X%y2_$0RF2sM|Hj_mcw2Cs~gW(YDVEGQ4@xT;?o?ya@;hwmT3>
zbJdE}M?vbMC6ME=gIuFV&{Gsf8rz!zt_^TYj;GUY_i>yy3sTh7)b3#nrS<+nAdZem
z^a+BWwImGp{Qige_FPv%Q?E}*FQ3}sbqAamcM=hQgr7pVtv`xfcPNqAevq-jY<`&h
z5i+6d5p{3s^GQ2rN{sgFhlQ^5!x7Xd-<#cpTCqj40QXfgD!J`beM~N#uVN+PpK7k#
z+Ae9cM+2N;i`9$~<G<6#5K&ilQ(4ka>iPP5>g&NYKBa^99YS0}LP#ms7$ol7<c+}a
z|D~oT4kRf^BchW~Vbp_GFV!1N79Rt#3(t3Rv}>)O>j4>Ewa<h1?s8ijjS2{Opfb{_
zw<CuNxLUnk4i}_Ra9@tBmu6=AN`&DHgPxZH>VZs-f;tRURiB|SE=^2`B^Y>fw*VD&
zV}?;z(CGEq-Wm7i_5AXEyaM;evt@h-3M3O=I5Jbnw{V3{Gh^e&h98L;YH`b32W=ux
z2*h`TP8rCQ=Qjn;RFzdFid3&scU$KAYu}1UD^~`2N8IE}*uGYvO1VEr=4UNOw-@UJ
zEoPk5qB+0xm&A^KrzMNWC737<!ar7=JmfU;sf&B$v}x$U#mK@$x+Z?QD9oi2o1n1c
zL&i@TZ+bnQP`yzLnP=BxU}DPu_6cu%udGessP%>k8c#7XHYNh~Lekjv@%V4-NeW(L
zg4of~L9S&v2KC}h;INI;2by;RRud#Z$hGCfG2D%g9yXsS$WILkK6nBfYveW{G*MbW
zDk;195Rp5OEABv@mZ%;;$!6)`0+^{0U{5G=hN1~v3w8nNYhW~~=+~kXkq<Wn{UOk8
zm>i}^=WFNc33g49`rd*jCo25}`NgPB_sD(Y`v~oT?^g3Go4wKGZo`m(Y&nOP2J_0G
z%F^55#4x#%Qilft{u)cX4xW$vwwq^X)oV^j?-ZLP99u6J1_z5{jQ?8b;Qh}3V&ig=
z;yLP{4W%NRQz`<dsx=P?)2yy}%Ji|%r0+BJoVfkrLc?iInHS?%0WiMj`u?J=Y}lyJ
zxUjTzsx-Vj({E*C{rz_Zj2@`FLi+&7^Z;E%(W?1%UD4+{hUsDHqE!;OV6)0sv5<@A
zN3ujMhtv7(bEA4<TF9aXk*H*97q#ktkghc_QWzp35$aOnd739L*?A6YQF^JcATOjB
znceLEz@}Q~+-OBj-^<gA@2`+=L?gDyRo-~a(+Yu6vxejVSw*2!f-0T~_Pl(NK!JFQ
z>kgwOr4mxOP-1Uth?I9Wj~v_u3S81JGnzhBercs=U}^8d@-t~84c-uU9T7L>H_|Cx
zz$b@Bgc5b@J}%N=_2Z@bFrRp>+4Y#^rX!i7UVkWsEAsmM!0PKbP?=kRjEQQm8<)8j
zkGfJT8=I-zV<-`kX4i=<aHrn_#vQf(LeP8sDeLc&YQ*EcU}wC?f?dmooPr=8p?h%P
zkZGQ*Nnn#*uH@VZNon-U*PO#jLsrvvd|$^PNg{fZ16c=%FD?uTaftiJMQgOA8|HcM
z32>a?Q|QQSC=MiHOi;xx0hLFJ8$WPPNx={Hb`(GBs;P@&#R5n36aY?Ye8A`sF&$Xx
zSn9$7J_BqT^-}NhbW}e%mxECL55HKZvKl~j9!1RCT({BC(BK4;(=5jTb~6Ax08XRO
zpbd>aD2VU^-q2LGXY6~2l+!(HTnuC~aqPwF9{)=LDj$mS+5XKjzoJ~(WTq4ak21dR
zYIy^4Z-e0=70T$lFYQ=TTPu<!jtV0WXG7zM2wrIX!X;st^_G{n*6(x5Si-nzk<0(&
z-kSfaQ7>y8$2K`56vJQv`)$j9s3QMDV!i7iqb|cJv~af;fs%Xe;TWU%H)o%Ge!-3U
z=Zefceu}@+>@$KVMSw`vQIA2=WuvODp8qLvX7<c8LMr;<MDRfr=x);^#6mLZpcrnH
z7W`XSsoQQCSb2AbPs9#35DS~U%Et9kY24U^@jH(T9@<3S68~h4o31ytx-QR@2$ebz
zB)2&mR30l24$7t;C?Q$l6w~KhbM&r6=iO~LDzWUP7vi1c+k7_rAa67M+%|yM_-QN2
zQqy-aEmxHB;ggLPNP}3z4n>I5lh=awLf01o;qpApu<u)E=CAgC9Hke!))SrBA{r=a
z(9~{m_v&Nx(!kH%q)cjQnsOL*&DDJWE&;i7fE_YA%ZicP-P@~?u|0ddMD1cwJiOsT
zs6W0*KnTG+Z>z11hk7{{PYoW<40eE^q+(Hi+Yv(yUJF@NLDZE#lmOOK0b?J*098Er
zZs(;ZB*g<#8u}#3AoEceRa3of#P$)Dwd2Wg;Yu^zyAyW{&|UKg`2QO4cY@0q%D_69
zqj5=vBl~JdoETD@KwnvJXdw8`8bVGV;nPuFvkSw)$tC9T!jea<;uXFAZ@&YN!Yuat
zx(It;HCt^<O!6Lr_2r=0H(sPocx7doZG!hEDfdIVtlbZ+;XUQ^2lI8qO*BmukG4u#
z&Mw@19!;JpJxHM3en51-nY=T6l6`wv{5!_JDm@Z=u}j`-@^$ptP{`>;(eNadS@D2}
z=8_z%>*|*dC*y{Ts~O`JBbw1v-5IK}EpGS&*CMN8w$k5k2EFM!TjmsyPk4z=wGLG?
zs${EMm7<6E>{~X?`!C`~A(6b+!&_~%$2y@m2?j4<eEkr-UTiyV+ruEp&D<Xf4>pr_
zE_A`_xoTB-$s73~@;Y=T-4E{eHY9tQa~QGtZCmz`LFBGqxz;O8O-#UR?|;@KeHkz~
z$Vg?i@|ir&+bg_(AXq+k4P*Q3O<hgq$7@{NJrpaY9!3csw3~Mq9Ft=l;w{^yJf@s%
zCfd@N8}h%eh3uDN-A3mG&b&1`Qz>ubU+2P<Pq&K`@}e!xs7teH<0K^$Db7R*R$AKr
zAg*YhbR6bv@*67c{e)E2T;chu#M0ty;V3M0VB!oeL3P9TT7G_N7u})Ln6GA5XL;{z
zR-!JhFx3t9zQ(RF0xvg`MYvbqDAg%VeCCb$-r9yv;tgB0!Wt6J>;xHGVx)?JSLw%I
zZnbE$m_%XH`dL#m!K@x3N@^zSRLA3qnERTZ<)#AGoD8e%<<-pLv0H&(2rIk>yzhuI
zlp{h%bgLmMr+oW<UM-#mfske5v$?aYqN6z#-gg$G@iVp<`Y(tU#|AE%CWy<LS{%PR
z-jW>Ia*N5UTl<S$*m-P4?VO+A5udxh8b5h;GjM7fKkF(<0xo-(_A6^MDha%q|I26^
zA0#sd1_tsV!1N1Vtz&}}hlFk5pP~40DM=5@=9;p|xlOc#1YC`$xVe-YJLJA~Z)Y%I
z|3A+Df+@}}+QI;XTkrtEo!}5OxCD0#?hcK+Yp@X9J;9yE-QB%$2ri9lf@R)(_uiVB
zs;Q}|`3Zgcyyxt**ILhhmnCz6{foD)<DT<|x^Tu!UWj{vN4?YKGqYo5LFXSyjg@Bh
zRk4e?S1!|rM@60b46LtTm)uEp%FofIov;g0=vB@=CC-{TE@{d!@ABuisYl6;<|+n-
zbaO$KNgG2|rcW^5g9{acuZEjyrTVSV9U+{ivOjSe-36@qSWxa)<iaK+XOT>iPdxZT
z+MV`!CrnbNVhWdYWj_`jW1=rUcH;PjBUKfDS69iYiradXc(6+M^@={}??Th}-*FxE
z$HRB5bZj>XZ;tiN*|i7-y$$~yc;JV#@?ox7V^`@6G_i0<bl|ysr?d0KKkzIpVY&W;
zd8k%ErPEmU`p=Mobzx7JVOLGaZM&;?y6>lz=PN>$s=tGDUC_bMh{wRRME3<as6qGe
zfj`~>*||fS{npmOo!g?m!Itam<c@Dk#X+j|dL^A5h2n23uvRN_uPy?W`L_W1a#T_C
z#8H5}(umr3*ADOB`;xF!95yR0GCGv|$(hQepPK^x%^X^`J4C0NfrdF$U=}D5tbbo_
zqudp8xWjvtTIc=#$)XCSi9m$UknPm6F`?=asoS`CWhtO*?5a|$ax^Q`Vl$CSOPj5+
zJn%(nlGSlcPDW@k_*v~3=90B~&}y)|p6aOCG$-U?5^~dMTeiWAq&XumP@eA)T^X?P
z)r6~~Pw#Vdoh9^)b<72i*x97B-EbBK$pl(57q@rF1XV$XiP~?59%3Fze~8n(VTi;2
z_`R1VtsU+o1NkSNDu!s!dGnn6)3a$Tf0GSPC)9FI5!a(#S5K30ewlRN_e+M3g+5{e
zT6s_$iEFMX*46qRZpYNVl|=}*6fjYVuQs5r(?4eaM0j%G|NfvZ(oK=qzf>V#?KF?y
zYA5=4Ra#w~LHD1=AW)P;lk;u!rQm7fNguXlBIwT3Mw0V3RHn((|6$^ZZ9*v9?)y}A
z^M&J$55F&~U@p{ms@vHQv);jT)pH`S`P?l`_MlIdNX*M_dn8Vw`qbw}P*ntZ3$qXL
zdN8$7;_)bLn2gIho(wUd8;CzGwn8SH|D@m41%m;}4tS(X8_S4vM;pw1x57V&C-tKA
z{PvetZTp(k?XROIE~L(cbC5X{qe*eRNp(x4Lwff@n<$2CL=}G0Ivkm`Pp#RDg}dr5
zgw{C&bRiep>6T{`?bLWZX3W4zZNs|h88oHc#M&a3x>#8ENx0HIhEl&)WZi1uF{iOP
zG}8^zlEbaUm}g@(Stbx`_R8u${AV;9J~&p20*bQs`)Zea2+(0YNFb3N$iWCq8BQVq
zolG;E@gbCEmAUdS0Gx|IlR^Paw7EkY2IJ=8fl6Uc_Ct;Yfg$iKw%2angWrGq_Y<c-
z|Il)FSps#H3*s^0s8VD5!K($Hs^m1E^|nK>c;XvUmY9nwjMn9}x*H_Zwn-S?fW;2?
zMXHK2roCrr6cZml?2{cPhL?zF&N6H*e4%ila0PQ5(R)TFmhnW|zs!Z5Os~qk!4aOm
z4Jgp9-^ZD_g0n#A>#8s0Bxe=aZR0n4`)c>KhaIPm7yIaF3)8jo+>LoF;{4Xe?MxrA
zV^OYB?Ezp3hy&=^nd0;{UoyIZco%^{zh}@VZsxg3@?75N5hsPdIS-(@N!Bp!B1U%n
ze%ktLm}lrRN~;f){&`+IQT*B?0D6j)Ww;j(Q}9QUZ64qzROO}8-V@zyvRe`V2e<cs
zr11KuY_bAc;jW)M@N`3lHHa3~iLE7L4pnDR0~SUS2Nn$|M$NIXcm$M*q0W5F4wZ3j
zq(iIh*`e7cW-i*^$DElh+^^Hp@KB^V!pqd7ZPUDOV<6N@8(t;ylIV`zD=s4h0;f?V
z-!pEOt3A7_Z<Z*gmPwow35SD)bKFuqDKL(DSSji>g0K<o`)q<Z`TyRwIl-cuH70O9
z^;XlxfZH|0F!044_GABJ&B1}C&+DJVM!g*%P;{V2A1m{nLYKnSvExdr0Ry^)&#Es2
z#KD<@)BlYan1$|Q!S4a~+8<@=CI8;R$4?5~0G#JYloENk8)zZekLtC$6ww)JLp}bJ
zb_1`Xjj#>?RPfRakkBUHAP<S{xL*Zj{__84yY;>?P1yH~0rX!$u7HP8>WJwz@rLm}
zs)0;%ME#OY*}tU|DY~-#w53xs=l4xncS9bRVQRxN;%PcZv--MiaJ@54@kE+wD9x5?
zc$dQ&uC@K}6dRs*z?VA3o;zmYa$?xIbz5bl#jjl#nK!Flwgm~Ll0McG#^OO1tFg@D
zBMyu>_THS;?hqW?iWaU8t_)YW4=9>^7d;2_`<+x%G{Zvz%M*p&eCz9vY3iJ=!k7nQ
z%1;B-U^2GE@YEqK8v0%|qG!b9J5_A+#Q=qXi|>^Y3had%D6pjAq7Ymj3N*B3mgaTw
zf8N!9h6SQimy~tj*Vh#NV}=P|CXl12a>esYw0}hSihc&{7z`i^9^r-;xeml~<^RDE
zHMlHA*8e|C%(1|<jy3@AWCJ9kEg!f5tdh_5=!fGHk(V;MV1RwRbSKjl(+Z^Gw{DNe
z|2>V}FyR$K-aK%`w{%zZV96vBrxa_y3ANTq3<Vr|=-Yp@Aew6Rbs^k=FZ8y$9pTqx
z21R;6XC2`oo125^HSpeQQ5Z@*6(x1#A>!{(YQS5lPtHlW@L?zqu(=Xh)qW-*e1noL
zypA^UZl0#oa8kcMoIif0jF$0d{kC}2-4}~NLzPmAKZ6rp&ZktkV8qP4h>aol*oe+t
zs+R6PQ*+$iCy@=YU8+1^<<`w}F6CDT&k{b18~sazh|@T)&%2VbkrXpY0n?Mp<I@xq
znauo%32nf9jXT7&&tK;bvGRD{QW$HOgu7euz=WOysD``n(rEBd3=uaYk@dhi+MzV5
zdosp&9DT}&0c@fru2?<k2#G#7_6WRd7%i)U%DZ|@K78QGRQ)-KdNaqiMrZtDw*h>m
zP&>r3*>|w$Gf)-6QvttLD5E!c#IfJ>o;=7Ja%E-{h6;w4CmJDyG}iXag*V!b9C}E<
zz&ysC>=cSgVLU$?<cAGA5u9yKGa|pH4PY9bkx94*@m`cAHfapGV@oz%IAGrLd^GH8
zrlbB^)ox(Lm}g*h0{bDs33<7Cbz7HoQ@17X7hh1CK6AZBJYzyXZt!x6pO>N0m=?Fo
z-iT=3P);xE0ToC;PACKrZ3;FI-y6}?Zwez~OE@J-m`~cxH#-_tv6)XwNX7zlxzJ!*
z#OiJd4aHlB(MzhN(SMeUcf04g$Jg7>{}5m0bL0YCrpoQ|nt~o=CjEbo)nB$pg?5Ij
zqT+*yVRt<ALS4k<s3b3*mZoUyP6aIxI5WO=xMG-yLrdo*Po->@vbIlZyXa^278K$|
z&hsAb{e1db?Ki&CF3{E)j)#3qcBg|Cd$6>c`dKj~Cv|!i64_jxX}0VfYG@f|pEv0v
zzFhZbJ`MCw?<I>6U8bM|x1|`8>&<_2jb!=-9~$ItM*%_>zjkDEn1wF2U_6c2WZ34a
zc_(bo9aqTyg`fR!c<_Y6z$}uHYKJM>_W*N2xK+Z_TJRO<@+jX^*qn_0(<c92R*!%>
zEl)0B^FKYm66!Z-8ZmW+v^d%;JX!-GUOiO9hfbkpwJdh@hmi@$yBE6M;XSm>+QXVH
z;t1p@ln0p&<|lvawz_kt{o2WrdEUH)+8OIv0ZP4B@fsF$Ft{$*X<4tXmDJ(x;KkU8
z*h$WuyYUV%^^U;9ign|8G-&%W`=w#i;<K5732a1QdvKst-tu>nY1#{`e+iv1cTUG-
zfG6!gMVBn(O@0oo-V5v;;<k_x(fTiZ`zRe%(N$v|G-~awaQY+?9UObqFAXD)EB$Ua
z6rPe=Tk|A8_r;`=4ALstSU+Doj+{P|5b2XpSJJ~-I-ZsO%+mB%A}B9SK77ku6cdxn
zgndGZ3_CB<V{MI-w)l<2kk6xZsWjr%B5lEjk^eBCr8fE({E2>__Eqa?D0UFkdgf)<
zmbSW>RU>2^%g<WV`!p<}3Pn(8NwmG#b=ut)d%t)jq_K5Wzf-5V{ld2n&C7F`XXSaI
zFZC^D*?$?9A4yp3OO+-g8i_Wbmw;Ms$4||pL}L^LvETOvTy<6iSuWU4wox+_rtRhv
zhQ%DWl4AId_aAJ6bOYb^`ZVi_Y$!RFX}mE(kci`@3pZJ_6lcSG=w~0uK)G3{OCMb1
zo}TK^T;d*s%ZI8b)Bz2U$YrDyD_ouxw<=SU@nU7Y>My{`Vu0Y6U%t*Gz#}aEGRFNY
za#vLMNLSw8Bj1DW^a~ZmrOD1TY6w-68~Nfc>Ix}XnvwcY?EG7}&)ny|fY6E2(Sjlt
zMOA<8mc)eU(mHpi`LSuY(FfsYi3_nuFpHX%@<f)2jc<w5i^;PBl!&S3%b69PkM~bj
z!#cGpbO}aSHWhi5Jg*+PLD+t?E3z9e+ZzYrMJV>L(tAj?a6u03CoivlrN?RNXBZGp
z>BBN30pH$5vcD>;0pQ?Mm!`n3)*f#&7N2giD~XM4z=9Wq{|X+>`QQ9i(3?tE3R`43
z*au?!D(icr_a_-&gBli3tCzl39X&Is4ti$d$*SpLiNPhbKMt0*xkTMcfBU`&PaeEZ
zLv8fo;NS_Fcpb~W_9oJ~lQ5Tz<^ySkem;`)ce4-I7IoqLS^6B?#1-r8>Ufo;Cc*m?
zBoXw&BKphC=N~ZWqYlc@`VxlGQOn3Yn9rr}LqvVCdUPSaYx80s8qul#<a`tic)%oh
z7@z8i9zH^8iIS%Y)UErUU6DmOQ!#L{lWylT!jgKF$DPzD@#1&2Q_v8Gj?b!hX-qD$
zXJ_CLSDJK%qs%q&gW~GZP*&ge4W1|hgn2FB0+9h`meWA=tSdv|ry!wh{TgD9nS%D6
zv3EbBmK?$MTU+ubL1ygzJKOadQo_V>gD3O09?W3FiOvdghG^*?_ip$7{($GVm^F##
z&mq107=B(q*|g>j;RpI4hVVt5{~a8J;y)h10Smr$1OepRXTEx%IW<DWfk~w)`?fzn
zYiv`jeL3-FOXa%nPF6z#+<ZU#DX~tU)x=^k%)J%7?wco{qYmwO2&Ayq>fV#AZBeG(
z4WDQM#coFYnYYwVR`B;Q;~K|er_NXgf$cyzks2Hzqx_G*hzO<F<Dkh;rqakFryq5i
z4b%_ghzhHn_1%uR$JDI8<%q6T&bL&Ag!y8^Bl`6;_IADtdEtYza&O5hCR2DdCxwlC
zmqC9i;|X8bSv@1Rv`6(U)|SDTMu3_j&T$t2e3<-}bK?IUHW*?(G}Z^TLh0kz_;jKu
z%uXF$=>D1veU?ew$-wmRHv)M<M-${{2uxyyGV0$d0o%SS-Ou_0l_@P%qNLGm{qC^o
z3)G|iB+jT*dz6sD;fCBRufK)*FzR?(SaDkF$l8e~USf|t*r8;V#pGNKM=?=)D!#0F
zM3ik{+wfYud{V00l@KH;0-h$rrtFfZH<J%GGeTJ`zqk35I^Q^NM67&`(+AyCQQ$Y7
zzMSs=lesC-u;18l8c-+XO-imV$6$dr?4lK@mSdl2HW6^XPJZVYoh8D(bIq4rw%Z{8
zP^;PNGL(STdiDqIah9erVCtQAgdo(Q3gaPgx*@^ReQwvks>+$4B8}B^5HPBd@>|0)
zS1u;_`C7P7X6g^47ouLqJK|`iDX9>ukp`Vcxsk#;<#1o%6{`NgzG72oa|q|C`&VZn
z{o6I9!U!3m|81Kyi5?4gv=hKzvm8gRCDj?hq@~cfPsW?A8N*t@0*8ho5mO-BM-^IC
zS`-HKQ>Ce8T>es+Km4`i?j>~rsxJN0wJgv|tjPQB@W~RAojorm@+2mhhRy@~pH$xJ
zB!9N-prql-2ajgL=4<ol<!h@r<CEsiwbPjPBBFy#9y1->zSfV$j6|I!o0_wSXF9~g
zAfLXLRDL9_FxHAZCTDLIUj_nrHL*7^VIedIXjUqL$H@_mE+zAx={(Ol7zIrHSI7<c
zG@kWwj*EP>;>-L<R)muOZ5W48z|jL-Lqh+>gtl7(b+$x77{G-h7~*bIy!e(izQbd3
z{`#*ZX(VY#!dq`MGZGjE==}JWXpZwQoOamMI&+S?x?H(r3r4)!rFLgM0*3824Sm>)
z&3M6lI<ntxI~xbLGWLv^RZY>%MoEBtE4ES;$I*qakLf7ZHaaE%6?+e(F~PFZv8sOI
z!|=<;dnWfIMst&;=+FMM)sX5BDHm=k<Z7EY*jQ;Jxqk`A@POMNXlH+fw@Z^f|5T!c
zP(CtU!tjC?b$`AH;qtZ5Y;}lP+gGaiaj=-;qey%rqEQ{oJ?*dLuEh{N+h?ZD(&nh&
zmXf)osdk=>`q-SZb}U>3LSo@-5)2?Es+5TC(EiX?BXF`CT1=r016#8kES(=(5vmbF
zB+C0m79z44zbp1ClR`fIE`FL~oD_Ps=B~$<Fis<UA#u0v$@o;oeNs=lSLN!Kkocp^
z>(!%^4vd<lgTRMU@MHCCj;~o(VBJ=k1>-P3eXQ|%{dD@<xWxsfaiCUzN9FPH@^NVe
za`?L|#Pkx#hRf9OR2L6{&cHY>hSS2z&FH}u@TE`Yb?LVQd-7HW9tVMDbO2$ycllk5
z_yLz!0)q1CZV{1_&dLOj-`hINF$KvU3b+gZJjQ^BTpc_isq6F1tSuTyW7KwJVqY@(
zdj(L=xPKzj#nBJM<NG-HL)K&F=<hZwGqX~7z$lMn>%QQR%a3ifv-QHJe)xidb$96d
zewDJh<)Kc5&Bs$uu|xF&M!xB-84o7!Uvf)#t17B!Piv2}<6_Zd4-tY3h#7!g1Xu_u
z$&(e-|J!R4$mWRy+h+_lu5jMOUyLOG_<E|EL`xGXT*Ebw0F~fLHc=(015e*cA{#@7
z7+#E9j{0lic1GOp?P*A_Z9r2?rNn{Y;lVyGtatKhb<ymY0ZyMI_^hi>_*bYOtxH<N
z6T0iUPeD#4DQw)YDnG_Pn#eVHlis@nGFL9RrF3^v=ZW-RFnC#fJe4lU*&*<|vymBN
z2)I7_o4GxVXFAmBZ_#2gG8EX>pl80Eq~jjSy(;HXv40DF7-#RgrOv#v&Hs5J|J8JF
zDS{{{NvsR2ia){$J+{w4v#3Dprzu*%)jL0j?ERTZdu{n{_LTN85|xoZ@SBbVI5z{9
zX)QTP3Yz}|&k79AfbL5@O#Ot|GyWE}F{-a<F>{UmP}=BPHMWQ|mAnKr$=@MlX*l5t
zQc%`%kH?9uND+L$D<K^zT-;SAzMuJPJ6V)1(<MZ<v>w{CdadaiAEroq96Rxs2~OVH
z$@!2a6Vs<<zy~mng|DXl5=7!tk}B25)CL!ClUZ<BdBo0KZarP%lHTF&|9<WgXh|Q+
zlsgmjm}E#{?eBUgQ=~k5<^vzH?$aJ%xnYIa@-SG$aW_r<%58DtFN5~KOQ0n6CRp6~
zOMm$86b{XrR4kWvO}uf|1BF$SF=-|Pg=13@x@OaueBRHtK~Q7)nfU103JZ?onqKqb
z+)sNX>&GEm(heH>5UOVPGwHLk`Bb4ai>ac#GLHC>rV}aqvvG~QbxcU?ig#sS^`F`e
ziD1`wi-Jz`u*0?7&gfESFjhP_a(i;G{q6MHL!(to!>TO@D#&QSo|9QtAe%*IG^k<Y
zZ?Ue;r^LpK<GX4KDS?F-CMv+cr+EH<skvk<(bxuD`EmCw5JTdULc&MwY0(i~;rklB
z-6{7e-l7Pftn{B%_Wrvb%k?Q|69=0E&{puU=9kh$J4~*|-MFl!bIQz*`bnRwCtFXf
zNq@r+3^e3#bi3gCck{)alWK}uRa}M8<cvhEr!{?Jtjn=+-Vc-Kky_qc8GYRLspM?a
z%4w-YKl5*Q(7hYlOPkeW4CXsrhZA!}5#BP>2&h+Xw)<hi8U}AsR*lV7g9$npb$H<Q
z0v-RdTsK0n48s6B3uS*umxE|-TZH_1`O3rri%bh>z`xsQ04V<565$;y0q9jYjBx9Y
zidvyv!_DbpM_7r@#7Kn(Dt#jK|139yD6oiv87)lE5&?TVTKU{Q?Okq#(Mq6yf-U(o
zyWG0|d2)5^xsR`i|JBZ{MiHay-=~7oc3Rv|TEwgZ*WIbAIcB7!`W$hD>7PM}gONI}
zinuvjZb7N%_W4;YaKpkIZ;+iFvk#BrqUA_g6EdAJ9u6Y2J)Ax_BEY*mUeZh~I+*8V
z?QyS!y3ip<D4v{3%mF<zf*4cptHczaCi<Io(P3<tvu%?A#Q_|%sXR9HyP3cFZSKlD
z;=^Qn)3kIbu8T-1E2%Pq3I%K)Fz!OxNO!dRWz@#FUJ`mF0t?ZGnLduk%|~x>QeICB
zi=srMUD7V9seqSy00#%#f1BCo6D-(|66HZA<(9Azby%-d<wHogulb#F+7T0TXF%@s
zHJ;l_iC|YO>W8;9@N_mRvmf+?H$U@9OCXOe80Uu+Zep#442vl%WiG^#{h8vbg5%&i
zD_V4O@?)4(qqSS<C*l`|d}DYHUMp?QJ-6S-|AT->(@*O)(p>g_T{2Dsf8=RVkk1|V
z;08#A`n|M0qUl-E$sO`2+q>yiwg}IXIrv))G9pL_48DDI-X7^?bnmY|%IqxRxro&@
zT*mQOTsHA}Aqw|*yO(IQ3p*r>y6GNbLzi$C|HvYMf>$+muT8KVG621;tlsB(1CvF8
zRO$}h`SFClRYEy5vE~{`xG`daDGnH#FdxC@<^LJD-DqLMswfDz$@|1+j1wvt85g00
zPJ%wo1V6EtVDce_u)(6g<ZN)u6N$TixYqsbcwPRf(c%|D1p=iA>>F+%`U)2luQ%bw
zbJOv)CG27@g~qdFY2z&Nj;juvB8qNvY=&Nn>Ep8}LmvrWZuxn#+rP3Tl)~wl>3a#&
z=T)q${T10y<+pQWrg_U8@OLhM{j-29f1miCkFf<)roWED+{ASa1KL<}$0(Q7{ytPL
zSiroku5mv;)+g%Tg0HiVQM8}c{VW4(-aYkc9=glF+y@~8WrC&zJy+0RKavCPfPcsM
zzmottILMF_Uwm4_L~AUQa_}p18nbso4-JW8;`|$9N7NA_8o8MXIrr1C<6;A(l%lt4
zPGgZr0mE5oP_vuqF;J>4+Ug>k_e!&(i}^{$K+HJc_dW;O<wxii9R_;u+YN66|Ac6{
zsTUH;(9r4B-^IRE@)MPY9e#aDX!c&}bro-y04=abVCP-LAv$F=OI6)PBPS+gC5~kZ
zX-BQzT#oIAB(Kn51Z(_yL=Ij=CpLc}v{lT<sdU_-cdIO6TF6_3NmZ=&Jmb||Z2g9t
z+FB{(k>PK!zRkMNGiuuY-Nl>dY|WLNjP=v@(;iN|pdJ58(`s~KgsAfG+^8^?&S_0u
zz*GGXx$zzdSBL4yhtpBmKP4lW0=*=A=TOPAKnFHPz;F67c(?kd*sD6$&SJ;K*XXv3
z%5A>m_DyL$3+T@<2DKd!0tQuVeTtC8JhRIWCvl}PDGqv{p~<tr#<`C7bKG7fz;5(S
z+uzYeH^AHC{+IqKA2lS=^+(jTvMW-SquSm0B7Y+_HQd@uy}R@Nd~Tne<S%wluNQ+V
zAyH{-rmfoj+)!FI1a=#U4cwTiD%@|^U4L_#!|KjzZ4)^B%U?WYFgooIzeiVL8>O(z
z%COZ?(v;|I)|;5_H$(GK^)}m^F+Q12Q%;2bwBoy!p?4Xl^ba1{jtTNKx|KVo5?lCG
zgha%uwgz}53l}2jA4vWc|E|JJ8D;+!{~NHuK=8JQ+9^;$5-{p6JCZKt`QRN6CNyd=
zdxLu@>1E@CO7Zl)N%qVAIbir;Rr?W_q>{3KvgkE9O{6l@`B;Nc)6Ax6FoyqeI-c;m
zjLX$PViWmc?mo!LeIY#QaU0z_)5FW5Jtnznul~_Wa>D-Vc)dR1iZu)x)gD+{7$QnZ
z#wCK_&`S1<5WMjw^W&+{&mXItO!WKkrKnygcc?~)?q$wnC-Jd92^jRA@IS1IwO4tQ
znQySWpPJ2Q9*k9_o$6v6Bfw}=Xakud)AkjmXl=DgX!FJA`q(%F80R)$ujfS+W^<FO
zH4k%zA?hUG8-c9RfTEg#e{Ht!aSN#bvr8nf=<V!nc=0X#`?@8oqa+IVp6#^{5t&Hf
zp=-<bi4PZw^f8GHbaH|wS*0s{JKC7O+I^x6UZk0d%Rl<};tl9khfI?g<LfDGTG*t<
zw2@L-1<rcEw;y=9erxfntL{8(-FXySCQ5TGnL`Y0K%Y;9q=rXR3~`|#MG`0R<|;2E
zDL|CnF)QZFzU`S|nnJ!nxTjuHtzRid6jG)A8q^Ol6bc<pTW>_+zAO$w{ON2X_~Qj!
z<%_59WF#;jv`^jDe5#KSeV+!!(iv6;*c7r9j(@eAIG8*sa#<0`x+lzm&7usaKU2J>
zXiT>wX8d2Xkyq3haC@J5AXOk0%C)+-!ecIZE%OvDnvgJk>|5gvd?_s*x0rCROGDLH
z!;azk6PhJ-4F^qR=m>}z|5dF?Zfs*#K?J=flNf2Jq%@_?VUMUNc;aQv6qjUb8gA8;
z!mA7|UrV(a&(nG9DWk3cYdsx$rxF{)5MN=mr-ny((^$oIi8Q%@&m`wl==>n4+PFVK
zgLP4E?d~0hz@Oc<g3WAKOh(2YcoS#P;BAa;FE{&Wdk-S(C*@FhWvN$W95x{jQfyXn
zjoEmf4YkS9b@lY4k$zu7|BL?T-)acQt8_2{4@s~_BFd!%j?)>Ut}tIln-bw00y*`*
zHgzg&ArsERX(s{0GS!Se%F7c*@ti&`&V1DIa#ET@EzS>fm3<5ID8}}5G9-aDx$9r#
zjUp#FMjG+U-kxXha8NIWC@IK0^XG)LWF{ES4p&kq`tlWWLbf-hnC&8-J}@tR_^+WG
zj$P`pX9_J!6g7h*6gL;JE0d{7eE#I#qC>U9Gthu~#m~FBhHekdG?+dFFFVKMCI`Q-
z{SxEuHJ!>NJiKZ6A)>CfM6oct>@|pi-s<BhLE0C8sLo0IR5a_cOP-mK?{unUvRQx~
zUc<2Hlt2>vP77BeZcP<G{%`pGWra%nw7#OJ^<eaaih%_aS{-o8j{q+rLzT{obqwt*
zu`{tW-M%^s&`%qSfl!Y1_sx(2z5Y){{iJtfl&f|Afwz=`+dT<#Y3{TN0!Mx-TR0nS
zeT_Kg=-!9ynBV_;TFjn(^L`V3|10v|Bn$-~3O!2pOYGaE0a<(8C<W&BP(-i4<oUp4
zs=Y7o*SNxg-$LKyxBJ0-q|I!nb4fW)``_0W&j4>sxJKX|Jq?<`VmH?`dG5xoWa`T@
z4uDRZPjRPhemnhR4>u-Wq7J(^S31_#Lf0vGQ}FXDY2TSi*UL`dx3ZtO$fdWIqO*q}
zg%s#-!M1f$TG|BdAH`l3xSfBZ+2je$TLwazNZ@MQM*=$a@iTIlrLInoW|9jbFXtav
z+PxU8-#hV!AGy^vwH@NYu);O4j-pty*ElIESyi9tL%aBR_bU23TuV}*9qymbp9Y=Z
zMN39%GQR)rvH_1W@Vfv{IO|+ssRI0J>&a&E@)5xS)Rc186t4R?9t+3SxJkX?74I*z
zF9rIdO|F(lz9%F3eg!pcqK~`1+bV}+`0V@c8K4N(RjXm9aJGhg(>nbD*J|%JKb7O$
zAG&V9uBjvS3*X8F-hxI^noL;T>m^TqYeD{SxgkFJ?zo`J>9^8O80tkvN7hUK7yN;5
zMjc>QH>Zh^M+pe0l4FqfY23P#He6WOWTT2xiiZvyQYqi(Q(rXiQ5+pT0a*F2gQ?YB
z+DJkT_}zeVha3|Wx}FC$*c1<~j0cVKoK`jG!c}SCI|I#bC>6+TZOv1<Def=kT0(*B
z)_?aaU{WLt#@xOx#rXn)EhcL)b|r+iFba;?+ucGsgF9$_z@J??*8>-SE`O%1wBxaX
z3RRLf5<w`n$asg7VWXFIwG<WIOa_fJi;<Dhq_qZg`ICSLzy&mxtQ=DeaU8GJOpJH}
zQ<Yqc-UUJaeMky8npU4D!r&U?40xP022@8***X;Nw$!fHS5OmVXo^lSH2c)B1@qi0
zHp?i5^(4^FD$k>ltG{-W6tUQnsWL5X+^n{$F;&6Fy_|FrWg6-Cl=xt#2_aWHAB0{D
zD8@8MRSk2J&)sX0($HwLv-q|-M~W1jV5&mJ;!Z<UC{z?VnuUB{=Fw~_S3<+y-VDma
zHes~MC`HQ8m{)n8LLiFqr@ndEV}Hvg1^hpWGZ2cQaPmNp2<Q_mVqWxo=T-GhPJv05
z+JvmBW@T}7PS3E2C)QitGp-Qvne5R0&_?c<b{C=bti6~!Q91*T9s`mR+qSZyFCNap
z8O4-~WlnWo4Fb~b2~SG}t1rhFTR%A3wWnC%jjzm(-zJWmHCnN;CY*^Wul~=M6^0z-
zcYNeGRvzBGQ_Z{kc0B!gQ<s-eCsdfM%e#W`9kl|9Dh@yRWV?3-e`?n<ZFe2xQzHH4
zC~%r0Q|_2B?8ZC|EK?Y=2Fsgv$`SX3{7H8ek59^Q9g-?w2yv^0QZd^xJGpXHnd#?R
z2bxxsV<ycYH1@ByeqO`01`)ZXFAUzUS&vmd9Tk3fh6q<WK8>$dt>zE6k+ddEdrwyf
z`Nh7-lMh#?j7y(g=|Nl8*LQ!OsJP>rRN*y&zhbiCanK&5%cm4g&^on;A~0IV5Xn^9
z^h%PuBGE$&ox3+anfcucJIBoGSlG*6!?zvGs%aE134K&`-HzjEC!xVQPi%Ut$>y(y
zH~(eP0c|Hs!T3df$$p-@=}#Zk%Cbl2@mzEo`ZUDbj@*3JkI%FT&1IY0ogeyX#J;|M
zNF;km_9e^dlR>9`^nc}2i0BxwoU~LnmJww~nP;bF@LW3il|GHQNLcH0zr>G$h^0Ww
z!<Q}ZRGsAekClGfYZi&@^_(rdUA)4=GH|y{WCF@X%<oGVC%H|~x;Q6KL;l6%NpEDI
z;b?K$<MHj5@PDf#{pOF#?y<in7vVZ{yVBKO8^Y&qPesYbs5aH$K-m>4$DnVPmJs72
zXN{o?j?iY)dYR{K&^rV#@j~CA1&_sk17owE@gCK5Hj1@>@-UYe_z_`)%d3y0_u*`u
z^8$CFC`q^HeBOO(VC>6b9aZLed^L}yj>=YU*rdk@x84xB6%i&DhpRui-Vqqddm!A5
zH7-xpPnNOms%nF_DeQMm6uRBHQkyD67eU$j=Zsu_Vd}NJ)#vJ__hnx=ggktoaQ|O4
zi4f5U>K=!vvbXRYYF|$8pFI(j#I{U^Oqj9=Z_W1L2pE~7tnl8RWX-=zUouAG<U1a1
zv~*uUMVf;dM}y1*uZ((O@ju_b!F2I_f*A#=f(9wlKgdu|>k*WTH2Hpj{Mr|E{ir4V
zaN0JCz;m3gchPw#($4pX8i83s+Y_lSywQasJY9Nzbz{BLHSSJOo5|*z%~ePR6(l1J
zT{f=Cu4-2pR|gJ!{}#5woWmOswInNF@>Er_G+@ip;o*jxXne84zYG=ajtyEp<eYnB
z*`S?F`*~r^*(m?XyI-(|X3&}Y7?46BgChmUC_h4rN`n}pBHy2YZD32?3~=r<?Sx*1
zobOs8wZe&a9HjaPnx6}6L>K0>Gy|skN4k=*o&J7+?#Yfg{qHdfWN_Yu5E@UuC94d9
z2c=I$c<9v}*BvwZ1x_f=Rx9t5IAnChvTEs!e<QnrO{LI|(oK*Q5O>q8f;3H%f6~@D
z(LH+Y^vz*eCQaO+<@<$<8^r!KQQ&oF&^BiE?bn%mWV}j$9$7Pk&6~$|bDv7b^GU9^
zmaX3F&(1};pF?#@V@(X6w`zP+KQ{^1S$roqH^}3zVfgG>IxJ)o-Pwd8CZaSEbNmz2
zk0)xtAxY-p%RW9K1f4ZSl##AhhAW`>Wa1V>%s<oBw$_jazEQS_Bv=v%F-8HBBYiO|
zBnjcAILOO69+=K2<M21A(T&2rn1*O=0m?9}Y_EY$);s|5HQA(aEU{#F7gg@fuQ<$j
zcGuRLRvG$Ffdhit{P35>0!O{ydPNz70C&Kw-sFs_y<`Y2iubDV7e>KA+jU*ShDCF5
zANeP^eUdwzVb<+;a3nwA`9E2DN;@Qd*Z4w+&23`RZ>bBN!Hm><neDfpasPk|i>aWF
z-y{J`nznpXUu9>RC8RY;r;T)0xYHCCvqs^Bny)Ty{4vzeQ=QpP_{6>zHzsQ-24vay
z%>!KCTmE~XgHRP(v_Vd#bREv$BFn?JhQB&z`H*&+ExW77$gaR|YKHUfc*cXxy-Hrr
z{Ew`ag9kBZXQ;ta#^JqBv$@%ax@5#g1crV~oEeG@iT&g6Ii(^CSJ7w?y)$j-u>0tJ
zu_XIhqtD5Ua=P!d#pK{~d<D2&)-yXPCIO2FKrQ_5>Kx%CJHej+7&Rs(MiOEmCzMw}
zOAJx@D+bG9-6q_6s~?OVqE9+2ObB2ltFw-pAA`XkVT@siUB%f^*^uQX2B+v!iz{d&
ziODWQAgu^hy%m9fpsPet&^IvLQSS~+lX$X4yq1V`oeq)som*qM{s5%L?#_l3O~ZN(
z$5P(4?PVJCqlfvxywDuw7DcKwb|W}<SYCNMh0>NyJYz(MjN-4(C=F}rXGx@-We7TO
zbR%0xVtnhLqD5Au3mygz9pxd8#V7sM2weS_CHy?=C%=>;gdEFmvBP`vMK(MK46vYZ
z_H{{P>BX$b-_8<=!W*2(O+s?O4s|b(=RR}B;5#ryA7f;i$pKpJ4^S-%J@X5=MX0ma
z(H)J1eG+vdqPZc;BGb&1e}2{B|5yM^mOD5r1|ozRY5yJWI<ap?-UA_cJ&!3gJjBe+
zB5$B4gJ~Phtws^`9@lX-!F9@Q7&gPX=sg%Exgt>S&<ZzV9%x_Ve+eW0MGlZMeKnQ?
zkt5f_#`?HK%CQB4)~I;!JsRO_Lz5TLXS{b?av##+D(S$YJWSE=^#wo<hWSfPXY7xm
zhUeILT|qRiDQMfL$g@FNbV-a@V9RTWd+R#=&j?U&?pn~>gSV&h?$Q{G!Md-Gg}H|r
zCu9;a!%|bN+0=+?uPrz^O+O@xuzy2(V1%1lebEi`uSDy>WKNKqJavO@2U0i0V6|8w
z5Y42ndRdLlkU_wlP5bANh0PKtQxjgT>`-H=P_VDFO+%)lW-Tj2fIH<wdWKMCA;l&w
zub<ckdCKaXZgI#<Ive#It$k!B$`T|?bFh*LoNcK)(P2^DsQ=q8cU5B5)E=N9vo_tu
z1jgYsj%wtb%{@6(`E1V|1Rfj5;qjn<m0iE|MRG}|Z_f31T-)8r9yMxvKz*=j&k9F(
z8$82o?!;;u`Ox<NE$L^~_5WwmPg&G3@gGYP0Ly=Q1m>2~i-_b6#gQv-{T|}I&CUsx
z`T5@-5PAor)Ri+O0zC=QCn}LKiccSH)`=MI?MD>xax!4_rIW7aIr8oO`ct?Mv9m^U
zgs2(CC6sHP1YF(9OAelNq4OzNxEAn$!h7$ULBck%EQ+|6GA8|4@$Gt=5}gHF^f;V%
zI8IHRIT3n|zrF5$vTfgauiNQCOeiQ>LAgVBY-eAqX~CEiBrnRLyU_Lte~NqQ@LR@L
zb0k^;TN{2YsryxyTE9|ZxE1JwfhU(DU52)DBnXj=;_LfeJzFyqwXwtd&7tTggpc`=
zCZBN6BSy*MuW<TOF6;v9<qE6l(CYx7PIWUlKfPRutZ01x#Z0$z?yR3qi^A+dH^(F!
zNHX8hW(~<>avtK#6l`J*lrUR1Hnqt;B`@i%l2VLW#Xm8*CHZ@J1UBuDS7Y)k?_Cr6
z^A&a3np|5V#+cx+5RmDClDWNACa9@O^I{o2O_mm2QC+KcaLQEvc?XXGNKe5qneTUi
zcv=vJC9?p`0a6YGGK+n`O5Iag^rQg1JQ*+-(!T+idIQX%&Ib^oiCuS9j<Z$Gn?(PE
z2z`D2a~o~+9P$sUWNT}i4<OqUUV%(d5^MLfOL;}bO{0yS-NMcDzl7R9&udv%S1v&I
z37i22HjDj3Q3^d@_6z&-0XbkGhz_``mL%y^%M9fiA>6l<Fie->-l@7)2QFFgIy;Sa
z%c1G^gz4*&qi%WwpK;eKj4hzdy@HcH41%ml+*x}o&};3jbywqetgZ>%)w@Eg#!M|7
zQi;aFoCxqA5}1f<(moTj&6$4(JBu_1{*G7Bl3rt}1#p|l#0Trw&foo<eNTmUj;cQF
zxhQ$|9FE@KXL75PYgSUAP?Z)3%n(tk4+~>ZAsSwi=n;c(aiuT5J7OE*yo-c`XHth{
zW{<To36(dWk;f!RK<=YccRd{mIK7Z>v!S@mpeP-=BX_*<CRld$tSW18pW{1BGq3g~
zhIHGdR(=5HQ`;B(BMW_tj<&hl8T^M+ng<~7^1#aTHa0ZO024-)kuZnwRgpHBfw>;H
z1>`R>Nex+!T^X8{+CQs|*}9)O&<Q!d20XNH&5oro`A-5AEG7FWxR1|3N|KAu512<~
zGnLJEdywU53v0vkrq&n0W?64`c;5n+?HORekvRCR>H6mrm^Vu>6<Pn%bb$2zCM3G>
zfo)21K|!!K6`gNeH0U2Fq4j$$nGA``usz{Y-XlHe%ilUbqc`v3o18z7xr<^um|fUc
zgc#&a@g*tNvoOaLM-$X6%D?*oUx?<A4W6N$;~eh&xDGRW{;J&BlKI@UMi*G^=Vpx=
z{hnEL*$i!r8fhamFa{8@TU$tP;e8axg_bm)C^wtFi!ITD(^oJjScTdr8s6r|$`akr
z^L~+pYjbKhhcD4+ZKop7UBPTtY>05L+0Nj$-7xjeCd&7$`~KTc#H8P+{g~JU;qEJW
zU8sw;$t12c-#$-Ht@IeYU&Q4~;mseKOtaBy-IIr7;h4}U?|dH1oKCVSjQOq2Kh@TD
zvtQ9PIltpIV7%S(?WzGIZHGzX5J&1+p`24rEJ`luyZ-W>`jV!!0!jUR%mrQGIey`V
zE--kU8iUIl0R%6emcAG;c;P#S$Nh@cJTPSY^V7pc$^Xv~*0!zud@9&~6xI%PF_M2@
z5U5d_%v%EYW43W@obY3*NAUALTcGUz{w5|!Ru;v;dp~8B6bByntI|K8G?40(`0;QV
zFhDGW9Fj<bL-T7&$id4E$|)#()mYJKAA>p<Y}i<5XF>+|I4oTxAy8R!f*dS~{@ytB
zYc1@xV4wb$kS#g^Jbv2`Q97;Hdz#0~@!b}^&eX}KVrSY&l9TinO7e|lod?yyi%l~C
zua;VCjxUskFfQ3=98*u5U@-w1{4*L?<BFFQO;JZRLRT54Al&L_F8U^M5yi!klDk(%
zvY3LxmjZh^0(&CZK-S>#_ghg|u~Tr&s(Q%-t3f-n93n{qjI&TF(#M`$_f|}SvKB>C
zAW0!wsA|`=ybGid&+9rU_k#@sOD~O4*2r(K`i9o;%d9#mYT>HC>PTDOhdEC)+FFu!
zV1c_XcW9aJHY?$5>Wxg(@2Yp%CB~Yjf>k(F`vtjsHGK7cyT19e5UgG|v6BjfJ6m1M
zzGgcQ5#D!Vv-~eqcIN~Mv`k?sqFs+TPFiUJ5M>wdo~{1hkg{j3)Io3WPZs5IIq5)^
zaYAu0Aq@YV{UM?}RB!qQiAS{Q1{wqr?vP0BkhMb^$(cR$JZ8UQNcX_=>GFHS^3Faf
zXu})-fkRxjwZe?ig_*o_R5<d%yt&T%L-$ijbii2142@{yog?x<ur~*#IcY3w)XUDC
zi#Y42g(}}862%d(w-_ybm|rPC%bzGB6f5Nx;W#2=Sl&kr9|%E>I|*UG2E{Y`*~oJp
zp)6(j7Q(XAV$X8i{cdq+61o16_F=7GcVv(Aa5k=iUk3YNLbKQ0-$_VKZ`o8T&wQL1
zYK>Fuq(~=uaUoNjs40XDqvwW`p!&MG99npCZ-pz|-mNipMOZ||BP&}}RTX{lHqcSj
zwp+-9o|0Xb|GJr_y};i&&*ae=!3|}83Ig-T6d2d?$rB|_Ix36G-A>HyxJ0d~|4N)P
z<v!;p@zeAiVS#)9M-NG4Ah5IvYwr3j6bTbjq76z_NY`$CJO*;~80K{#t%;6_;r{T}
zz`bp$)_@nE)mpO%k1`2kY<LM29FH);?XS2Y8vh4gr;vmn+IuJl$Tn?wWbc0e3~sL_
z0Klp21FX`7kR50Ert;KfV)OV?;+vrg-gSY3)Hm@$0V?h>(XVrOEjF<g^tZfm6Ip0{
z)~GVXBXF=PV7%|{U?fTjYah;x7M(b=TjzX((y2dHZ3T?F@M#@mVZ%qREm)bX@f6l}
z8~wi--E!GfP-}QWhA+hqf$j}Uze5i!)}rVTkV;`xEpSj0t*78RUw$1}+(zQD;ArwY
z^i!KoiKU8R6?81|s^48~&A%wCni-Ntjo3)%@4b4(>ASH$U8bHr*|bqvR4l?B+wK?p
znh%@^`gr<tVf*DYrcQNr_3aDLAG{f*4qxl{Nn)ripj~~b^JPhc7Ve$+4s7YP3&~Km
zlX9Z`YPuKJ@wf5kL_Z?-`@8&7<TB;z)(GSL*o4}M$+Vu&@)3O+^-!pPNbW@%j9k4+
zoX0d<vsi1zf8u!Ui8w2Jc>!vm*z4_jL2qCX;Vtxd&=?1}c@)%<L(4Tr&%{z#EHaE=
z?R(xd`=UL<%7!Am9ez*HXLf|Us|<{|5;iln(ikMw)YQb$=a@YJF0R;$G}!;OfZSsB
z_%yvH9J04OojP82zcenjf9$Ov_jY4N@(z8%M7yrq(B{cizRL}*Y8kp?DE5NoRn!y3
zcpy#-mr1ab6n%qJ`GtvOBdz)idIUOKK_=|&PmUu-F)%)cVLlp3py(SmW)1A81s}T9
zzl}S%!VA9eh<smk_GGG1IE$RZsMM*)cVuw#1*p9F3q@Nk$yUkGavYnK-6r-59~@J0
zC6Ngrg)0(h&~wqAT#<mfv5X~&8_@lbWVeJ6bDUU6Ee=W~c3DiG(MP1*2!bfJeL}cV
zl~l#ki)=nhRnOb{2VRwEIU#ltoCS+uA44G*33I+Ms`t$Ix@{phu_|B4pDtOuwOWgF
z_sKEw+t|<R1(ouJkfl_D;w8>j^^oQOvwcyWTJ3tnHK+SgXQ9}N2m=#DOQT`b+~oq%
zJx}VZ2(_@MWaD1j{i%t`)8@d>+vQlN^O^(gY+D$A@SxrF%J58Jc@VXyqjmQe?h(o4
z+8OA{_bpLZ2eDy=uF#X#4gg{593a+G-BiAn_c8{hknvIyyW99FSb;(e38TEG>igaP
zwg2SvhYSi+>cVE9tX-UTHprn%J9>?cJP%JujB$*<qGC)e58*=IMz=r4xUrd8Qq#$b
zyUV69d?wYy^k%2;v8eq)zG<VQW`q?;yGW8FE@-{~qXN_Qby<yEho8z>)-|g2i^oH#
zTDm~OQ@IA^0cQC-miVsUc?-*;KU|>AKK^FR<&i=vwu3Z*Gt-Y}#Eq#NWXI+Z$2*bu
z#dj5HUukX8u&_AdxkniH0|qfqeov*4P87VpJ3nhAwA^UKKa@zhsG6!1$79`6LV&bY
zH|>rc*Yoecn8K@*=^BaR#pRS3d<d=7-o%UdIGa%avpV61Kp{C!mpag@ROdcUXXdmm
zL7ciPe1)9VR9R-0`e+uRwlUj}EGEV%m1fICfh!3?X-BgBzP1bHQB$3zJXxE|3Re?r
zK<lJa)S2umg8Zp9&ec|K2|RIPm1~)<y}q<*mWc*CBjuf`K{|EaYf>L^xq3)BE3-&%
zCy?Wg+N*yWiX_2rWYySj$bKNbmT7}*qe$NCX_Oa!!)AoF*=x7>;@itj6-Ij^P$io&
z*LWx>-O9czfV+U%_8STEs|*w=`~A{6<Kpi;_@n-M^$VXr=4>j<SVQVAFaPgD91-}D
zcTjenr+zS1jOXXjCqBseerp_L4jUn%Stmj6sd>nu|FDPZ$kieT>>K}yys7->t0MpV
zC&isK<)fsUwR58fla2l+C9PZ_MGTeu#mRkN6L81m^K{mH42;j5u|b|YYyR;=6zP-L
z`KN?fu^*~M_C0iS&d4J#YtGC0R;>}iusD*wzJh?Qihy%Y%aC%v;hkzUs(4C0i1^O-
zk__OyKw$fCi-wh6k;F=WKN&akLR(7cxRKP|3P<GXX*?#(S)_|{d*7#91rp(?JCRTx
zjXvkAk*@cs=76o2v$CQV@as{^{H&`O4Vv%%ma9mD&gBQYI_$(qsjX3wAqw&^7R680
zv5LX&EHd$k#`h&aOP0o^HR7c7zAs@xZ6+_gOq7Bmg&Da7lSBVyo>ny)e?)&{_A|r9
z?3c>oC`#mfhRH|H>VnVqUhFa0JV)tYQx0KWbh~%GSKYQ+EqQ01TfqVMPK@fBU+OW}
z5R%<%rRAtzbhI(sDLxkWgb>l$@Kvfi88NGD!QGBO6rbHMm9KyMLuO3s+JSgSMf$*x
z%hKLihQ@p~H(-HRuF9q`5isH5XD97#4&_`ji=$w`P4@rM_+y0GgN(WqrAB1EuuGt6
z5F<u~r=utr1M-kxR<L7q<di)8r(m}#bgk(rN58w&f$}!2MkB!B`_ZQl1Pt(9NP?J`
zrI{~!xK&_`iRs01IcV|)oP+*yIJf^R^i6ILhs=z$e0ZeW%=<HjX<Oj1CtEfylA{sB
z|5T5z2f^ADNpLc%%(FXP*Z+%dbst0G_>Y{5l0fg+c1%~#xW7wkTeY74u>gprePn1(
zL5#u<vOp&e8Ek<aqH$npG{O4S3_^xbCwjy0kxdB}d&EVZZxik3HcmOj?{$E|Do8co
z${PuDsThZ|%fD-G8w>2u@D%J}xskvwN#Sq32btm10+tmvl4H+kF871J#8P<ZoGAV>
z>OK{k#g=gJqw=*#<bg&xXnkI$XlZ($4-%hFbe=uZY>8HBvHa?3;+&k$Ui;B~tHt`8
zx13`$*DT66FVP)lyVWq~80Bu8ZDz(v5s8BFachH`-+vEJ<#e4k-;MG0$-^2}t*ahG
zrLik@4;sw^=xi!%ge8)SmGcd?16#@rYq%w_a51JGag=l4t+N_=J%09)+@7oX8Kt}M
zIN<19wAxQ3gYBt3M83ExR~piFnJJcRvVlJ$@LGe;#I`v5qB4b}Y${X2C?mBR5Br9u
zKtf1aF~9hz)23$svBLE7LGs*ey&D91+(m!BNl03Fpo{BU&yYNM!l40M<Hr)cI~&H>
zgvI%5YDla&d87YjcG_=We3Ug}{US1f+D6SL_~gUuligu?zbDM>C(6fwUrOJFhSD`M
zp{JO($S1JD6=g9SBITGfjR-GC_lOsbGtKX|v$z&~SDa49<k}+cPnx1$Pnj4>WDCap
z8LL4z3=&Eh7-X_|MK6Bce5{>di~GN?9g}0wswvYXS^m%DV;|erzC97Nt|{@><HFhW
zWpWCB?~%m*>!)BcWT=|3d_iK5h;geAgH^pndxUoWt%HZ<yYM3l(Gw7iB!1cZAVLW*
z<^3xdY3JZP^K(8&`crTOh{Y)1LoJOYShu52EwM)55n*f3t5N#%r9Gxkul+t5#t_{+
zceU?(o?IYG=bleNV+l?SfT=dF<~#fuXeNa6YD5W30||sjaeKt`!7qhu+q+q3J?J6%
zV+nAkywTe~h(n>XJNRT5CT=b>KiAKX5QdNpsuFUnQgx+p>{=&#M7|An*LAw_^-qzN
zj|%X;muXvfBHeqfMUpK85cs3op3P-!v+1gEk-`eEqS3<Oj-RIj!|GYOf0w&I{@!N?
zOeoEhjDh5Z>n`#Q0KT>ydK?fYR-ua%oSPs)($}l$G*O7-tNo^yt{Ny(I&=|agDVd0
zBzYA2?lC2m@GGB^bJZ|+(Rbyoi{*sgz|4YFh^ZXKZ{M>88j$Zwx^+Lp+8*yW3wJH9
zA5mK`mHY1BKQ0U96AQfhSs2T5ePcY|UYEPOta9>12{byX`oT9NX+WMVZ8)}8IC$#l
zS>@ubv4ffjOTgc`PV;O3RV{2gGd!Ie8`P&nqLPp6optUB!fcw0qJCt6f~hXEKwQ|n
z7h!w^OSPoz2>ftVR|J>n`Xw&u2vLOk2>f8G5H3#V@co-Yo)lOK|G<s~I+$X2P>}xJ
z3j3>m7^on8Pohl4+ua5({3^>8NKd)*f=R?hS50W$u$8KepQEy*l?rrCpm|8hPE=@X
zB|$U3R_3?s{uuR{;B7O}WT^mOJ6xXF=aD_e+2NsCve`72|HAk#|Aq0br_C^lxJJ&E
zN|1g7oT-X#`jJH*SmD)wD4TzMkO&r7k|213ND3M=`lcy(z9?;QZh?XjfQ8jhJ0a_p
zu<Nq~L`4y87#_xa>;o9Ck+GpvruwZPyTVC{-7@=4#b2zeT*AyNU{AJGL20j(x}3vj
z_xjudtdC@nr?+45De&ne{2b1W+RZs8HLGK=NIQzb&C^x$3*2#ppPRTc-Fa*{p$Z_9
zF~0H-kp14wmg~$a-5i#v4&J9<4X9w6rc#fl@lsjDz^5S}BHvYbA^~>cbU3}A>F<mU
zwi5my?7df1lU)}zsvsa;L3)<}DqW;E6_pMOQX?HH5(1%yE?qj(I|>3SozRgQAR<)|
z2!s|o(n9a>KfGUi%egprXPhzqn~V`-!ruGYd#yRwUTZGDlb~pwLvh6km32{nnV8ml
z12G;Kx0@%FO4dHe%8f=0tueL#N(ctDuM1$PGBsMoBI{R2;ZFr>nZ1@Vcv>P{k0G^v
zK!%^|;uLot`{1dpqMUpYr@CI-6JOW8_2Mr(N`j?7ue}INbbSA&C2SNHRWkFSfVMFC
zj`9=5CiU3PoQ*5*+(m*{c)OmnXtA7hY{qUh_!V4FrfiiG0#A8u${Ia@%|2<kw~=uI
z<0@J7ovHu+hlV@F7&M{ko>0f7lE83}2K8Iq*`+vI`z9dag&&;C6m!?>-_!u;e&l;N
z`jhu&YekI)hv}l5>JrJ;Tko&8ZNBxAIEIn^W_r()YLg;Wb~tvlT#}x0=syB+h~pOs
zEzIJ*E2S7|DLZ;@xPV9OPje0L@y!R%8h;cgeZLpD&#uti>}K^!Kq9uWRbD1_?G%zx
zLt(FOrl&pSoS6xx(7ms{ap4jg9TNJGGrtA6;^RrB-;!JfE^=%YSjJ~O^?i$T0=TL4
zf3H2{$d?0(qRi~szfaXbFJf80*+osM8STI$!&83X;hB*XJ<1PK&$6#pc7YhSt2Nf_
z6G-c(joMb1$4JtFrGBavB+?8;4e8+Fz7x8Cce(aNYMF6pMQ~E=`$y}1l`>_}vFIkh
z2%5s>1uDI;*v|O6%<S#vqLcDt{^h>7y`XhKt$R4TALzfW#;(G5y`kCW=WTmJ#fOyI
zH`1o7Kl!YvUc@vCUPTmr?VFr*!Ji!FoQ&+18M=cP_4fYP&!1-^pXI_H2TOnmO~{%+
z6xq*m*}EckZghN0zQ*t%CxAoc3emNj1nolCAK>*4Jaku}+<obAaWdj(H>u+_?WLhJ
zSpwF%IB)vecwl&8crtNO<h5bso{c!TYX(FlvEjTW*WD*ziqz3WI?daOrJv5(&VI6q
zE})rDQY@mCPl}8H&3|=C#@ddX2&Ynb2k$JRB5((RHl9&;@YI&->jxrY*fJ!AUAeX*
z*}JR4?1bEG_-Zq;E@lc)X=!09G2QiBdV!H)=tj~LttdBvyLRG)R;RVMYSbs>%BOQU
ztny6u-&e9LTSS4G6tx^1*z`IBvo%qeu^A?1QYf`|w-;S#f3^&KiNxXOY>Hc#Ge}`*
zcE?odIOUc4$-dp(6ik&W!)xWIn4YH518LLT9Xncw>^BixT_C9K>2GU4JB}}nt?PN6
zsrN}MyHXN!$yureOr6}PT8bm&x^~?1&iM-on>DY_QLBGX;mhd-)JR6ii+NMDk_+oR
z+%`Pwn5VK;Be1twbUWQ_EgYJ!llxwmfV8evhTf)?+#(>pV~zM$V#J2=tvo01+aBDg
zoOCBiY3$7AKQI$)^3n~NgZhAq>UYcvRcea1eMVPmr^FpkL|BBbzV-vT4uj$l4sJzX
zt4FM4s(H@%->W9^5%Xj?KCb=!wQx`+0*LlIWt2?c5NO;TbOFyY0hcqCFi}l`q<+Pm
z;3sQ9&n)O~N9hFy>x`RndtWpOKvsfYW8H&0FhCaLbuCbBkDTp@K$4mqKCV~ZYXI76
zIiBr}h60j&(yzSMt8A;+^3$OtKsRRt&yjV02M<-!bCylGBK*>kx3PN#ZTf6_t&|c7
z4fsz!BC3+RlZjO3CTxF8e*>R7!vCkr$afN|Fs(0fku9_czgnH}lfI7{>5Pgj&9BUd
zEI=XpnNBseY3W8G7#{w-xz-}4YK7&OxSsd?N*Y-{zvZx`FbC0FAbz{KVm%baTc>_X
z^3R6%Sgl{pH0e9lq$XJrzGn|4>X|tMs)<GE*dGnL4SAKShb`<0KY|Y>v<7QSI0{Hw
z+<!)9*He_}(mwwN^h#;Dulb}EhsCGMv(8OH_lS_neX^&R6+W4DSlKy#b&;}yd63;0
zaK6Pek{5+J87Z>!cP6ZQN-w?M#}()QwT5P}G$xNow?nphmerJ_vm!gQBj#Fm%<%-`
zBYOLz3br`^5DOYhNp<9Qscjs9=0q{H)N}?fl*JO8Pc}Uh*quX_)aMk`Abgr*!-vP*
zpszIwYu3MZ8QeYE<xMu6wkfcfs@hpkoPw(E^7yn7#tGx^1NSg8uK>*w+u^6*%YOd^
z8aU8Q?UlHH#PGr2(u>$C{jQzmOjJUf0@5W*!4BSYOv;f@Hh?}F<k^SqJcCG7dyd@i
z&$(@YG|CM*oIhpIruUMxZIy})I=r{EACU1(NHI7yau;R&{;J-^w#DQN`^w8UU2{bz
z;Ftmr60Jq{34D9bKAm@#YK@;uKnj=5!#tkhLu!U<(X6V28NkxZs?Z%8E7gx<EV9EW
zPd2AL9!VUOj9kNGGQ(`I*#_3&SA!{7#ay!*5f(0Q0BJP@<uq?mleLkESuP8dUuOQI
zhdA4#w%n9(VVz0{r0;7*3T94Fp>QZy>I@ijN#mu)3xvf=WO;Z@@bGDCoE~6qdnne}
zc;o$Kqt0-9jr_XzQC?0#Ox%}ehX7&FP^wRNTmzI=wqFYkMnKNHn>VRr7^MWl{m(Oo
zSCL%KbnWWDz%i8lfq5P7=50y5=$^U+wRxQJqF)c1*?RbFy;bf-5GBa|n`?Y2_0k5*
zR$bd3ELf{&KqH298p34lrAh$v&3bYB>RH^%sI9phv`ZkW<JeKvVx?~6s~pUZ&q_>D
zEA}*Wr7@j$Z{vMb&|X@#Qk&fFN6I{T$isJeyYAz*1x0iqQ@93se<sY?j#LI{a)@`%
z>a|v0nJNnPnSabli`o~-+KO}p^i~RV?rk<-oF{xbdJ>Diynj+3Hh=gpBU=CC3bYE~
zF6!x2`y}AJB`rr%kb~cZbNV;RDelyrX((kab5kZ+*W~GS9Mi^3QV(LYwmH*QJCx?)
zcY+SL<Y$Xyr1=Q!V4-k`KUsfeP>?SG!+F8W$eJo5cAsxNwjq-&Rs&&I`!s=|i_Yp$
z9|sw);LKJd#U3)yfFRb!&n!;`m>ul)P%)ocqx<LSx@f6xf+W^SXrqWlLZ(<#tm@6F
z0z|~!PvEMXEt0MG@(y8F7TUaK18A)X20(nLI}=+5QrwTG_+$%`#^l#|y0eucJZDlB
zAd>z8`<m{yKH0HE%jG;^n}?yNvG1wIzYjT*^eu$I1;kJHdAgWwFRFRgGsk_ooh8mO
zW9(zn$SYs7h%D~^?v<YN4bV<3i;^lea;^nW@Z@SMcQ?3QF>APTs>82zt?I3kPIVA9
z{^dIQ^S55f3hTD#oXJkKlTjR%$YG=O?y4qDp3zmGq6)JFdXETqr8f*GyzsKsc28wt
zjY04^PwZbS*lcjd1ld0|S=}6SrIKlIpas%G2iIe)PVkop$&|T;AI%=`*8UVD-A~@Z
zFr(+gB>GauFzTs}xYP%~R&+a399{2;eQZO(+|*>!-8@gX&y<%yz26wdBB|ZU5%Q-K
zxORD?(F%2ZswQOgIPc`tl67IE2g2iluFw-J;pZyE)O+i{HAD*5A(BAzqS*exF;Mr(
zqkxdJZHOdaR*c=;v<;0H#-h(!>VSKTJKrdvXY|>c&2YvRQ<%I=_jd-d=I2p+80gQN
zp!cO4c9T=x<I%1Sd*Kg86mz#aF3tBJi<4z`g_ww%-|M+susE+GhR6BWoPk*1xbT-;
z3RjWVi2eo0JJmYM0tk)6>hG60W(Ji^P*HsU&yhf6!<><8hFPwk98WSPJ(`|dKWYrL
z)-XI;G0Pe+HuEmez96l7Vu-wp$?B+N;(^FS{iLZ=S_64p|8FxGEvvj0HotK$MMw}D
z>!VAbaTiH8yoh^J?=#T%jKy<hQ$gZmxn`!U?gd|JV^xLGwx!Q6dy8^LZfKcL|Ddbc
zo=#M62TWBkOMIul$E+c!fP5`k&s|2YISIjgc0CUHIOK_>URM2n(_#<Xo*V70(w`-)
zza5@J+>JiSgGW7ld&hO`OHUi@28#s5<4u_S9Ix|`PYOhw<M`}LlSU-b#<D6b$^TxW
ztHNa+3`!!_uxDfh%GXp^bWuSO2-~_gOMbJ+QfAtU%6qS0!E-fQA&D1YNej<f$&H)I
zx@M80y3J2o`M|?kMBSK8r=}r@;maA_-unL8pKS4B`}R>wqt6dBbl;%3u!;?61F_2N
zy(YHM=EFl*+mXKh60@}?k*w4{(&)@C#u(e<i!=3l{#c)1@IF$%j3~n+cfHUBzteTg
z)$G<CUQt7No8+v}9@*8`_U|)3%<Qq>ttur&;i7kZ6SIbpctXn#-&{SU<az>firCPW
zGOj)He&9PV&qW#FTZjCeHSg~vkr*r5$~2GPF&58Cd_sPn?H@FDt7OpjNn$$sUhDKT
z@K`P0jdF#O^1Pr)g|YyT{zngxBhCd+h(P}W0_Gpw>05e6CpGO=Fa)2#Ocoe!lSv6J
zkRD%(fyA$(5Y(s8V-44FP60W+&;ZNdSq0|Oa{GaaymjcSx-{+lFV=tfv3fj&x4PDl
z`~PaxfgHbAsChs?>+-P7B{X$~LCj@+_HIhvaZei0kkgS;O3LRgH>J&((xL9qA<Y`?
zw6mF%Zy)8h_rVB(Y#EoOT|SNkw-9n4kYjE4O$7U?RwM4B5$~(jM`k<_Q^tw{8$vf%
z_T=rS#dYBqWADI<b9NneCv9C(^W=y)2|_++r$q<YwPFk{Br<G#TmC#-XeTo?bcWWY
z?l}Jh31;M0IJZzc)Cj|3N#mya1So`;<F0gVF%Uu7Ms?v89w&veD4o#t2jF0ur_yF$
zql2HSoyOd=6A?+h8wM3e(#Cwzi=e7DmKBi__?P>=fiK(kc2c0FI@Alk+ghK=1~Skl
zf?7aAc~{ELTIQm>`5)m<C+6&!@E}T&f>Ck?(h>rz@)-f2-c*uW5lPyG&bvZS8-%#=
zFtZfxnnvj%NY*#t3R4*nZHkYrUaXQFzW}_V`09ey*oSIm2I|C#<PDQ%mrV}kbeKt@
z0g!uFzovGJ?p1l$Z6b@ufpR9OeUoDyiNhEQh=IAhd!fyc;y3l1#9)gxFmn6q)A4D+
z^SAyx8>%a*2)F*FDrv$#-&xU&6(XkPlc2gQOt&|7U$Ky#iufyJ+`0z8IHuv1Y_L*a
z^QhR~8em;#<LX4(jR=Qd5`;e>q(#re2Fvw2;YSF+NvU6z?um6iWF|9P$-Mf%*lV!v
z6w+z;x$SUW`vxM`d9qcp5@R~dk^;KHm0UGpI|d>l0)bG6yvqGLu{hGm-Z9yc_S@yH
zTa9~@frV=KKdLKV_fJi$SzKsxVLP5|FodokRo?ryQk7J^!%%OHZX5~9%TQ5)f}3#N
zzEEn%nAaEN*=IEAcU$vf@&_J3oNU_p)n(#?8HB!Z|EdeB?lAp;Kdp1R1ZnI!X6RZR
z(}%PRN(2nw_3_x-I6BBhmB%ECyvMnR3(uldiy5l7obp{V{^Q5nTRw}lbV$yit0iNN
ze#RB5-u{A58`kox7WJyW_deq%eqtN?RqT{hnr<?))W<lP9WiCltECd3oWV8@xG^c}
z^rysVuys9dBH83Ke&73L>=92xkEiVjXP3ob=a?9JE=h?SdYI!g3k!WH6BIhg2gXDB
zq1&#Q)MIEs6iOuJIHKraf_^Z|4F|JVw$Sp7lRi$Ziin-y#-6f0JX%f<b7X?!O>&|)
z>9G%kMlGoMF>d)|0!KR{;3>h#q4n54Qme-3i2if`7@cPOo(34LLwfEGZ~C0~bUlW3
z<rQZgn5j<XoCvz<qVQ{Ddtbc=;==)LeG9tFe~<crN+7XwgetIx`8evwx@4uC5N5sq
zMZuU9=rn`BvT0zk+{>f%KomK$YORFg`L{uNNEZ3de|@x=ct}^gi7#J=Z!(8RsVSw%
ztV0$m-12CPjIEnMn3#iB#2Cz<7EgQs(d`ZfAvETxp$+)!8N6k|vJ&<Cy#rW(O6rcg
zDo->+IxKglPP%6>k)-JcXRAVH`X0_L#U44$aj~?V(BiY`$HtJ9UEYo~JYKhYD1=x{
zZgLQgW&!EGBAeF~Y!)=#qTEe=F8sxR@^=JN4b*9FWs=PxGcJ`571T?cEyVHV@#8lR
zftHWotfTi!iIo)q5A3k)v!$zcY!UT|SFh}=#TRyA#kf%Na!HE1kL3HENRux!d-J<)
ziCyZ#l5JL4+s(ST+<=MxvOIex5`DpaM5}&zoa!+rR*=SPjZ01N2e3X7)=yFYF}T3`
zkl@$uIgRDowH?3iZu^0sl~sMWWj*Om9Y1Lq9ycwhyuWc+_D0VSmXYA4F*vgl#fx)T
ztFKw_Z&CY$gk1HK&Z5o|X$!lp+EYL8B+4*ji@hgPA=MTV8zY@+bXGf%*1;n;1Fu5f
zs)D|#Z>T4D0MfpD_g%VD=`mW=KxBD~z`K)|g3N94n)0nUv4(*#O{L`<{qcED+rGE*
zED6)~oC(QD<Vm05Tu;nCzJJyq<Bxg#zG!L(w_6K3Z{rY=Ci0OgxxKPq8d#7pZ#B<6
zG@}0<ByQ0Tes<=Z_Km>hXfV-i)vvB@z4P}F1#|sJNmQrgV`YSw)uu!r^l|9e_&w6i
zjBmcn5biaD8=5!^>+u^V?-aeF-^$CL>u(J&#Qc0PoIJVx5N>Eiy+<3n3UAh!)HdRR
z$hu0o2>th1KCGcfTkM6G^+!kfq{Mz-pb`Vw87pQ?Ur$GM$<DJ*S4vLQx>*#Di5}By
z_&irDgT5nXBC%rD!1xr@zVzH{{B-WIl!@&1>TLX?Wfmtk!1B3lx$8%saF-$G-i66l
z_m+p9VZ6lcQxTVh6VE;qw&XbG@9$HNK3zjdt1;qEF}o7!$XN%YPv2g0+NQgovMTpp
zyJo>kZC#%NKek-^J^Fx(7NnO+5+{1=LTV=7S90j7iP+6ge0rF%$E7AG3#*Ue+P-3l
zO}*~HH%U_=KA_eww`zGP!ixQj)W*wHc&Np^PK4|YYA}_I^m6X-t_gw%#0Su6x}?r2
zz@Unw9tIqS-r4b*5REbq3@b`3pM(>_MjtRm%!SBju$^8pwblyTOf)10mu3*Ng#H%}
z4d@VBLC4Xv*){z6*X(Di6)RtSu*eCK6C<ehS@yvTwH{d`#FiCm2`Ny8)Je!@6*f4`
zEtWZsL`pFEGgNGoB*Qy5rk;;1Mbp%H$oXyMl+9+@Zi%%;VS^XPq~sKn5rwxc;<VfP
zJa<I!svp(b9i<7Kv<~+g)KN9XGkb-JioJkNkc43f%4^W6TdEC$c3E}K=v9xC5XT)O
z<zUN*u;h;0sR+d549QCRY8cZe^>au%J(%<3<P8VA-LRsLO1mX0IuO?otst3%JGg_P
zzt}CTW;bmN;dcHRpH2;Z2ifFd_TqYJ<05ys;hVlJVM*wVG>W*Ds_Ap1j2BT|PHO1q
zDK{z_&Ch<}5TCwW^k*r6+7u~jO>gT>&n1;VZ>xR{AG!0tJr*O}GEWxz8Y82|{W?W-
z2SKDXMUgJHYxm2Mma!|!M^nh_ChqO4D2sYw?iZ-ddY{3WI&r^@k!xLa*$Bk;J$o7a
z8Whql?WC;=(Iu;(D5Dp?%Vg-xO}=+55g+p8wPD8CqgG?jGm+7Pr9Pq6B-JO&Y@*o>
zZR@^rW90}PrI!@Jk6zJG6>3(n8+grUst&Hh;YphY7zm37{5ZU9fB%Bxll{7N?v-b8
z@u~wVVeH2EREFul0@5qrDS3-UFML^LQxbuQIKbJ93@S)FR>V53tUvglt!LRm&Ds~&
zl`5T3m<%7LvZ}B<@vJi9@2<gJCu+zSMvSb;KJ3tBE%9<e`oB59zP{ycv7nlfYfIEQ
z_G|8-rvw*<mV_tQ(i7*86O!nXlEX&mh_m_g;8;8Mk%POyzKe+7{gFBV`y-n82Uw6r
zUWvQItyM?xkex}xNo<hr9f-=0PwtVbh&O&j)_iojTV8#%dPA`4^7L_o`y-Qj0%x3<
zS`MRVycp=Nji&I)CX7G>WL6i@PDIe3(0eumTcMOo`L+PLgXm(k6e%?sQS*)c%!(Ku
zSXlI*OR>pizxo)v`tWv;e8OP<qImlf`uVp~5c46C-)>jhaa2qZ>CUu|EqZQulK^CY
zr%o30JutI4hw^Z~++3H^udZ#>+*PS0_7+enW<K!j_FjIC$$P8CZxlYD{*<e)??*4~
z(|WHYOWMTAV(hOzSGjg|(XIe7xL)b|lVR5Dqi)*>sr4h&zi|?<n|49?D`KeBp~0_F
zN(m`9%wTcQq)z*|Ku-28<t{@t5X>u|UoAZ48#Yx6l<r;KXO5}DhRQif@VN!h+}jFG
zz5gr4pHZU~0lR~1b(5C2brXsX!_fOYszA!F2t|mx{#@G)y++c-6rnD0nhZC8?%;bM
zVlQq#Iz!Dx+7iHUN369SLJPsUpX+6@<<n91GHFgq7ndmeq46nDi`KQD70IU$6&?j3
z_M10V*Vmg}6uxftZ~m-4#$~2UiLGsI>q{2aSas#N7_ZgYqOSTZzDZ~Q7$x7+C;Z6V
z-ahqu-|51AUTF1}T87D8s{ULUr99sInG;ZRWwNntqQ)^}cYT9idb#&&M)1EvBY*6O
z-1aL}4?vH4uL;PNR&$Y3-Q|8G0a#46Us?fTLOYArXYal^6|KjKj&s@~*{r&rW7O-8
z2<poRLHvYv=|Q)zD%`)qrsFWwUVTmfO76>OdCM1Y(?^`btxcvYbziS{M&67Q1u-kR
z3|QSpfdsgeUqoX#Mzdj0x^KwHWzWOhB4;Y;7^L$%o%Zq=*aa_6yBhK>x*Ih>UElI<
zZJceHTH!t*o0u2dEjyE6^E{w^1td9d;RQDS){2Xiitc{ggs$1cix)DKq<V0Y!AxCy
zPjzk6rB_y6FC}lT6UziO%A#epy=Q?>`r(l4Y?QfN^{~$CS^21gUu9|7`bTjS?Oek@
zt%W(e@C^0G@(;lXPl==>5hh|z-TWum_4M=F+7HT!mOU8V6;4ScAJaj8p}7JXHOKDz
zTZc>I-jClyBO+tOI9{|Cby<(dFaVc9^|;_AQQh<QuZgJ5av>RMZaZyjR1kk(rCE%S
z3`VrSrw5k*4)O9^Cgpa6In@|?`K_X)zD3XKLF$m&m9ih4Bzh9<DS^`a<l_ra1?FF3
zh7)hUm1}2>cBU#7rPQAF>@stBJRS_Q9*_Nm(|ruekhr(MLd-nDl>NwZ%tB!mPbOjM
zw|qOSpk!JKV$6`u8OiMLPRnhwd;wkfeb}5RTUbDDC$Rfd0EV<f+O_xG%XUDnVRpUd
zL>$=W<2ZIxN-yR=v9zf#GIYp%t;thCd!C#wFaNP${$-S7>A!PDNv7tJhB1qsmsFH8
zL+dJe%?#z&$C1h-<3~GEpiBv0Zg>XEN&3|plMaT2AzfKUx!v7HWUF*PsWy?7yx#f)
z-U(Uz!nfpz7ZTd-<n8Vz(bsZ?@sk<1*g9?0B|qB&As?-^^RC<|%x;;~{OmhFCzOF%
zAq!&{6Wum_<(tZLrvd`3Hl^I}4t}4BJ`a}&%QA$M>Cnz2<=kKpwTLE@?@4_An>k(q
zo9dqrsX_zB)RKG%vnYGGQd-bkEyU1A6F#Ptjj1rTweF>OVQy1qeAktIg=LAMVT!<Z
zk;#2xd~KohL%rk)8jbS~*%dea&m%^s4IE+j)EzN=s9cE5FxXAbPewS`g65Av&>xJf
zg<kkAKmMSIop4x1jePXH;|x*%%+Jz;4&lacqbEfDnKS|t8?lvNbLDkGOlku{+U;OT
z{^)*t;g8+Dx>`gHq)U8CHOA&_4U{YuniBBQ^}$G1m&=%%?z#+V?@JqTzy0c4qN@iT
zXC2RS9kU-CKE5q7*k)1fo8JR%WTBSo6QWw8z729Q4zb>aY$<0CFO2Vt4OHHRPbiT0
zBe8-@cI()vM3J-g!V&RmDVv(*h>mC-un5GW<6GySrt#3<-vu)_V;)QJQdF4g2FM~K
zDxK}jT>7onqsO6mOc2{xUZwM|6IQm_;O?W3WXd!Sd%1jioT)aUb9R;0DF$up2^k*)
zDem``-HZq-QL%N8;DkoJx^W@K=XJk#8TPbiXfASO%za{d_IyUt2U}F@OJ-FF>FOOE
zv#8Op7XOB2;7D0GzpEI<PxAW~&dZri{H25bcri_x)SYO_8uhDj+@}jxm9Y?6Ub%kj
z1Jiyk1Gq+X&6nf3&K{FQM9>m#0niqz)?EA5Mzew~B&E?#Y9q$2B^p3t7=7VV*nfj}
zKx8VI9v4PNd+%r~esyS#CGH-#i_+rPTT}Td$nju}8TVR(t{6cbYH5dLRe0Gl@xqR-
zgH3fK|FJ77*rEB`c{AUb;6$CT^Z3N+2u&dpD(unfY=UlQB#kdK-Fw#at9;_@yJ<Lb
z>F~2c6o?j8^C5N_eKuCaGCnpxz<1w#s}3~EW2M$|+QWD{vFm5TP;#;`iB%77r=kbB
z*7gp`B@;=K#`WX7HzQI8Zr1E$YC?jiJ&dlq%OpHe`T<|flNE~$1%t~ENrhC<-#@(h
zPf+0Ul==%>v)jKIFsoASsWx*}>C1;%@yjyNiBt9`4!YqfO|4nY<&8CD^xwOmen+=e
z!~lP!x)ANw){=eT@hf1=Y}KEkyJKf|<oZu#sGu}owR!w-o_0v~?l}0mG>bEhY_(ZS
zkEKYLq(_Y-Qm$~YCw01?!oqngKF>s!t4`589CKTCBqoZ$*v8IPNlwqW>H%4PO`zdg
zYbb$Q>;tItE>yYvP3O#g0e$pBVfLAQ9OqA&6;WHv?js`Ei9r#FthbYk&3_T#Tl!^M
zeBTj%aSAFCZd0Xq1HKzA0W^8mSSMe0Bz(U#D3Do1p1QaQjjs1I9v&UNdoxqYeU-zj
zL-+;wB19pCY^ckm0jyBxe>AixW_1My23h4py;tPEbecwi$(VR`W=Ed%gu{|3YvQwZ
z$KfYdALsWYI1@^37kTNlAAIUW+pdP_JrLu&u+QkKBw$M6G*IZZri+24JA%DSD_vDB
z|BCO~2|W|8dUqZ4=jIty*Wz(?S9h~fNTY%!>J6M~u9^1>-SYF+p&A@4KThqKsI6q_
zvd_*a&k_9P=GOb;=RU$M7EAks2EU)>_M^wQ11iqlyOC-5HJa-`k3+Xg4Ziv4!!*AQ
z5Ab`2SrU`J$Bk7tzw58}uURN%^bU#cXWy0-?Q84x#C*h-@1^7IM<DNWOnCaEqQIba
ztJ%t2R#8+3;W%ouLddb3G^SMz1LsXMV{))oL-GDMasWj9d#+fG#C|x6%kqinnke$x
zB*DlWKc}6uN7snR9-7A(R4q=o@UfN`ITMq*=DxsK%-Vmzr86am&Dun&vLRCqGQ{n1
zt1|JThY0)Q1JO8HlYGl8mfbEKjFb^WKS1j>^HIq5@pj0k)z&#f@wHBb+noV3Zn7G=
z9JMH3yA7x9CNsNuQ;nGPPxztokcBb;_5=k}j>AUC5mtQYhv~|j#f5|ik-5GyBf#im
ztkqF`e;PfjDf{Fsz4%s>nf+?9wGKs8jUhK5RVd3zg_SL#or3Q0ZRRdDy@*`&7peT3
zLQD;>zB!f}^vuiMF~A;wQQcl<bcBsATDeQPNYd+d?<#kq5<QH}cUrC=^~iZ=DcnLZ
z+Oy#^=|a;pEwQuG8qF@QXtpq66UBxg5iaM8%@6Y%6SePajBK4(23T&TA&&`+KPZtl
zk(I0tw(o0M2yt!Z<qnqr50w9LSjq4YA3H=$0^ON^cJ8>H&bbk^e~1az(_gdCzI!uD
zBoxW-%Lg$wzL(Lc5Vf!)o#;ik8(d7=tL!&KVbk-xH3XrTxmGO4?^t2umcn=uTQoL&
zmtSD?yxlj7p)`Zyep5*ts}tz8T2B`Giw~8}*#P1FX2))>m?~(z>3VDiR#K`#G|(Ol
z5v(H0txS$qLEZk}a$Y*N`(&msB6SLh^e!;Yt6$7i)V_u6^f1!lbrq-BTBpVKz!rI&
zl~|s-qi96e*OOStt8cLP_8o!QF`(rAvZNli3HFH}m@14cX$rI>QifxzXT1tF)^^v~
zT#HHXD@3o!bEyLNwb2biWGk6X$+x_LNVcc^6re>6CWyOs^MCI3Ej_(7VanP}U4;wg
zRYsI~Dx>v}{oiG>SIcUPDKRAM<;Z|rYB+Z5bh#Hhsj?$X1)9V<qaJFc6}9%1hiVy_
zT?jBYy4)Va9j&{8sk8h%xt3c~ijn7oYCpdhv6K`q$ya&bTK(IZ2y`F{1oxe<GQvi_
zX&4Ovv#i^{-*vZ}Sn5mK5=ys<Nw(*LSU#@$FQW%Y#>0N$x0Y{wy)`HZ(tUE#pF)Xs
zv?`CR<14f<>EP)?w`J0Up7ftVJ=rz(aHtgrEuJG7WId8n^eA*c3-h2UK*6Mj-G4Pv
zOC>=Cg|d{EEv<3!X(<}3oLu@8<YBCqgKR;Bs}08f!cy{TSIpBs-Rl3C>4Q3Bu|^N>
z6U4MXkvrbp%LytT?T?Mt{41!9ZE%IDzCcW#GMF_;&}S(s<W${09}DVZBXT)xfc17R
zmQU}zR8uiGYy18Q`oLH((YjM;--g>8?#m~R2!@T*F!NfC3)vBB*tb6B<KroMMaSxO
z`sMAl(#h5C-i)(3mhNTlVf32UH(D;p++NR0?VtRsP#3*vj!2Rqld}Gb*pCU=dYyU(
zi}%-$k1$Py+^&)hQuli#%oizmMJm{Z(*vHW<$MDZkS1nSSuL->BXZY&N`<nZMhtG5
zy+}x3sunm&(JgWzl!(@Z)Z8((idy&~77=r0Na+eVJ9HwnQ&yMPY_*&^2l<yBvusZY
z85~|w@?YT3%q)|XrAm<1ExHaAoayVAnKqP-MoZ6|Dt8*(>6zeQc+yIi-N+?EY<Ip>
z>i+3`4L4OY4C}_@RbV1Ey!heIhrVl4H@WsU@wS7Jqzk3k#G5cM$5JPQ^ex=ez5%^9
zLfV&q0?B_6&(A$qsM1{1#X@Lcr46a+hT=sfYo7>(dQElE2hjq6Bg*1va6=yBs)I4X
z=?59A!=Ha;$ga(=c{Hrwn5u#~A5Kgb*E}|q2HD9eBaV&jDg6S&jD4Q|@9N_}K9UMQ
z_oyc=?-pqbU*=S;0cC$I2W)QegmyG){}!YY`eTmic*r@JvLmSYrQ76d-)@K@@o0q8
zB-PkogQ9<!rDy(Eds5bE7;6;Uj5Zj`uoQvC*Up8keea`a2{`ZOfjOCotxqJmxx)+A
z>nv)QDJi4og**p!iNTs#v@8jKJ`})#mBd#HTBKaGt74f=YS(IA57s8!<?Wf}hKEGw
z^^FkpyNX!jTCKO1DfcX3u3f0Pd+~g3G1YpTBTD16s9?=9N~Yf(&Q`(qNIwpklnw)2
zWIb{j{?}!0xlT{tYfz~X>QA8LtrfZ@Y0+uXnnajerQh|V7XDo;{?@pK41B7nY_9T)
z8McT4%PBrhLtSlvt(0}EjD?Nrwux<Mq*Cbr`Pe)pBfIa2?kPW0>3`D01C@9Tg2tfK
z%flFCes>ZsSDREtfpBHEQKM_rtsOhZZKt(bpiU$NOh1;H*1MHT{2wg<9m+3-wdJx6
zMn1Mq0)Mrs?GIT9V@(&J>OqtZ=A7{&+~&*4REme~i!<US_K@2eCP<?j!3g)R{+=(r
zy*)n_vBKC?v0Gjf%0hkTcg>irdDKGx<gzcVboB##u`pyLLwtTi_ILtCaE)A2bT_xX
z(v*!!+vxi_$r`u*A7;pH)M%1kY>~)_zA143Pq50U{nvR%Y7;^#62Z4V?N*ptQ#VsV
zC7k$Nv{c50N{p@(oWD0ML8H+WrV?}Z%1m4kaGZS<*Wcy^T-BITE2h_Oji?tc&G3qe
zSrLtG4qrw=vl#w#Dg5uiOd`WWtq}6Y7mgS%604soT$7!3A3v?L)KOJWNzzGC{Tb3;
z&hc5x6=+?%v+Fl|#3ZXrd!%VCzq#QQu|ot|>7&2yipv}NT-3vjebmpLrUlu%K&DNG
z-IKrnp6A=Ea&sg2p;|K6xFAp8eEQd|{vTxSk1KxD+af{}jWg9C9u;&o2*arQ*Tn+P
z%k#jiTtJIQs+WKJ#6JxAuz<YT3!8Y)Y&971!Qvl39U;=9UR7gsj-zF3`2AJ|{q;Uq
z#$QhlkRld+wdh`5Z!YED+NxID2%_K_(Z7?&{&Vu%TP&>~5@Rf5t~C1oV|Gcv*krxl
zxS0O;fB!piLCuXYp-g@M`oJpPpZzN^#t+sE&;RcqBJ}*fOZWfp&5-~0?YTa;^v(Fa
zJrg)KcqMNWXsT>BUTM4HP~r5mc(l=WD8I~OXHm%Y_ph>HU9hWNit@TUGK73(PcvK2
zM{8%Gn68w5gBB6!OM?u2<&v96zd=}7EJuSZ_KtMZ6qXr}q}U6kH?FYkdfq{sz-Q7V
z6D5Xl&~^URlq!<1X1D(<<rf+I`-f9~q#K{Ob5#Xdm6^_8)}!9e)_6^{A<SzVPTLxQ
zS7zRjhVD}H+LP0JAr1X7!RQY`#dQ5Y=$VHG^Hh;niPqTmr!U?L)BBiH9^n)ygNf#*
zHKaG_-QM}#%k6Jb7oSYXV0HS=k-PbdH4ylfM<3`n1W@7Fjcw9R;MJ)Jc-t%%(jc#O
zb~B-DcnR{@(DbX%kA%CAY7(N(S2~Jg?|kmqT;8#h4L9lTT&|))=AvHnXoh1o$jCKI
z=~-DFkmXurl%Z99V&T|6W_8OTT9(xB)_SGtf46tA-nUfMFrQ-=+&_$s<)yKiM--0!
z!*<nLJ?t+d-`OoTY7lz`l>B3XK4IiFlWt%%AR2)Lo*0wyZVny!&>XI(tYQvm^R<FQ
zTf_uwYs!i;JNF19ok6_+FnZB|(L+jb>3^6z+w}8O8NoTOQ%JAf@I6Gbg>(xx%S^!M
zqOkaEhb^=iht-@20TQ}bj*mQPfDY6{iDl$--anrFb!xl!@awsp=&cDw41xCB3h-7^
z<5;V)rjyA{!qfGtQR6SqW~;e=0D0-mO}JpePl-hDCEjQ+^0(G^{>|irFGnU?*SW@M
zHI%QOP77#>90W6nSb-=T)+$sF(I~m3`HOAA3#zxPZjWYbVRN01huF{pSUF#F_&*#&
z=+#LogPm9BK#OfnS|V1sX+6zgnnE>vYt|9P;vNM23;S9%2CB=F1vK%)?POoboXG;s
z##icU=$a2ZzCf?)E}t$d%9At$vI|Jtg3P4-gX76f-vWkel5fO+<A0sh_-ST^9@R+#
zMvT={o~yXZ0$F}Lpk136_Zer5%wUdUfLkYnbr8_sU3|iGwb-m9oCXVY67)R;6jXK`
zJ81MOO{HCRiw$X-4m$)xo<`6H*EFB4-+h4;hG}<b><>o#&*T1)xhW(^KR^cV-KktH
zX~@S=mfyE5>B*LtUr81;nR;0Yt`YE_=VT-1)hi*h&h~MW*lMSarwGGLpY~3FnqN)r
z_qIwAcQ78xH^T24PZPBpoaM$q8|+okjuH7()ow+~|I3p21MqgT;j5tEWFZ<4rlcW=
zUNN(=iq}^od`*LuXnWGIVo&+51Y8Rz5V1L!@^W~n)oDpCH6iCQ0Ye2}oNr!GTxNd5
zCs&OX<ISTwG<^3F*}+S)n;zJAoY6`Wd}0AW-a?s~mE+{ND|WQ`+!Y478-i+C>`jly
zRuHj~QFf%fSYgEvpWbjRq~D+@l`IX%GV^jK-wjdhGt;3A$9`2CM&1os-4gvd5NOcx
z#Ot+@r>xRQ)=tJ!P+@74ls6!ALvfj6jw~`j4BAIu9G6}&IyT%+K3?Jg?m@pBZ^tD+
z|5c<Zfepe9WuHHo@Vhv}eksTzxj4$c82eT3KMz^#NxdrKYI5{Lugwi(PdBw+UGpN-
z3O(ZoDtvN#Y&8bZP&2>Vi=S;tzLv`4+u$|W;oyFxL*|}r0@njxEjk?kc$;m~;dmgq
z(E*ql^0}fJgUV1=&pTYU)>$4_X)uNR!B3ynE`M(eCQZ-U1a?kBS*G9l5jK7L)Ji*=
zP%h&=#B9!PV)4-7V&8$^(<NH!_veH%VUG4g=cPXT*;q&`N=Y}YU6|Enz#sb-p8|5M
zW8p#%ztdM_=4QW?=|&>mDWCY$`fc+vE1(GU|JE4taVh$pjry@b>sRu3Ykbd+%P!7O
zLg!-S<-7r7q2bvRQ0_QlU(lB93L(BC6IbTkCmCwh>)9GWfZfM9Xd*oIZ7hL8V9Lwp
z;X}XX#w0)xee^Xa8B#sQLfd)9j5lmue3{N)Hm=~SeR>fwy%7?8xK7awm}Okr&R6=r
z^7~Gy=J0uD-MtU=B|l!uW|-HqnoyG$2id1s--_^I_LzUa5+i^kW1?k}oOz2ssPaC6
zlfz4>uk3Us``orA2eeL7R<}LJb|E24cW^itQ;^6lOGu2-(KAXskCxp>;}*r6#}t13
z9pRZ~2!9B&zom(;0W^>v1a1!|H&u4<LKEB9=yZc+7E>$?U38@3&=Qx1QP)u;I)H-F
za!4M|1cGTFfB9`9@eg|&16Z|OM-8D>*DkK)ARqyoYm#y0z(#uIQ@mEiejwW^`wC2z
zlQb(BiLfp8Uf@LndEAe-Vi^%`c>(J6fW%MVTN*xqkuOuJ{X3)ne|EEz0XUi=9&Kg9
z`|eFUnU$6DRANQrftJK!$$}?q2u(wS?KHa*VnE=@Siizj+0$yh>10E-KLhLQFqOV$
zU$|=)1HSD#4X3FN`b${g2L4>=rJIn^)Bm{z)M6<Z9&LUIq#(I8q=p<7^BaH`jYry}
z6D*}4y(1|nN%eKKU1=j8^kd~oU7~lwywa~qm|GZ2*Pmh?0HJ9)d<{Txp?(&q$)#;I
z>^=rcdPon!4Q}@Ojh?P8zhgL44d!MFGMrRevj1t)BY+WEILcY~q};?6{LKUd<i-Zo
z^2<$S<)1Xi+;olpd7GHqgq`KzA5Qy2FyMQu@)TA6@ktB%avQAGj(-2UzkcUmO=k1+
zef6#y>jGd^+5lgLx1XTRU_>}l0#0Ynp>59XE<WJ)mF3JG(j$wZh|&3WC#10KC0j0i
zN&lE5|1Bi)bxC8Ud9AEGwUIP|pb+$ULjI@8XJeBre^SA3LU;FpOH!GKT7Mmq5HUUQ
z2)k06ek^n+7i?MY_r76OobF-J26AOTSvZMjd2e?Kxo=DeS@&ghsRO-u&+2P8r^a1Z
z+am32$T4rELj{DG(?pVYmijltFS<5f3QwF}pD7A5ua$lUU@T<e-dFl+NTe@VSmrpx
zGbYkZa5K0G$Rw2@m31Vels|Pu`<?_p60y(qyPqVBd#K+4F=(MH&vYMqy-O5}azv7g
zgJ^FxPZARX_6}ZUOoL2{`su(MqL{6{`9gS&fnl=A9PV<|<6%Lz<YK`;l*4J^L-?TS
z?|$3-Lkj?t@Pw)U?(*0PJJT+o|Nr-2+Bf69p-p8$v^`v?m;6ipsM2<*4d@-5b_Nw=
zeRtw*k1peY01(X#P+Oa1R%!a{5dg|z0?6)|6)NuoC6D*GAQ%WjUA-R}XrZ@H!>bFj
z;Ic!`J=gu~j{BeoP)^;Qp0)bb5_|qw8QJacoVL}4{EoL0n8cxEfN8Kvk@NA~7JL08
z;RWs<-Cw^AWOk4MM&j9)+U~(YC{GsrHl%TH#B5`tE_YdSlcb9g+BgCDHGHe{>hU*)
zGSC0E?SOUX`s3@Bml#wnQ%bZaRfK1FY7cPH+UCLYZvd!fYgJTQS}LiqxV3(4H&R5L
zVwMizix%M8&J05H*If)0?os^lbFXgSL`IGR$0r2-^l|V~Z;C!gm*?*<1#QQBt3!`D
zM67z~&9$l6w~%c-a19C(4;zbDToA5K(BG`}T>PbVb(tzt;fxak*`1F(nYb7BiNvX=
z37e#Rt0z|WXg>ZfOkrC`6x*i8O05t{EPYt?MqWkG0Qz@Bv3&n>U}2^p6a7>C{`ez2
zmbkmisYO?jOZ^{E*~1=eYZf6C#Ozk{wdm_qk1Bm~l+y3*T_R`e6R{vb$E|_nRSF{+
zd+Rd~8384PK;PG|E-_aDEKmzzfzZ(my$;9d#P{8|keo|pUx_*#b@G2>Zz1;%(`;-m
z{lha@#BHnkSjWUTZjiNF*;;HuD=PtUs-V*TXKQntc<IP6P&B=RNnT9qqsTJ=<Lmg7
zijWaHmM;*v(=Xot(gN@%JY~+ecR~~&8b8(UaD18*Laa?j8DGn26^>=JB**_s&q`=Z
z-SUJOb0_l&$T$y7#Ze%T9NSuLRPmIBN<psh83P4d6DG=JflCh1>N5jeTSspb#sU1i
zBUz}Jtms|8fVrE!?<;nzsnerkmAz-dSI=`AMvc9q!sR7xUq2L{_@OB78IFaPNiAy&
z%Y1)YV+gzv#^3<lCeV!Qqu)4J+EY{-l7hMAH#dFZ<Ho}W`h{bkz!^Nr|GHyMpO9s^
zjoWfouzlIuUg(aGNc9OMrQh$(5PrJa5|dys_D|bT-~QGLUH_zL;(w{%|M|_Y_dhUH
ze^n#?8>j`I{_;&i7gfucjO#2R|1d)>A|HZ!MRNWh?2{tkOGY?(9*1)LEwJ~N9tWK4
zoygCE|M&p-O9^Fr-@^vX{(7->3F~EmT_?l&`=4|23<9>ctkZ25`u|Mje>=ASEtOdR
zyW@X+;LmDiY8b;DN?{Snu)MiR37kU;?R+(2?SgxNIGfb@7TbmJ1VBTc#*6gJh^zr+
zVf@Dt0`+%JPnYB=FAH?hW}z%?twe0hQ`<>3K>+`+n&Gx1@(#!abR315VeL1cGL|&%
zkK+L9D1Qpn(UXE58eJupKLobf8SkVywp=B$v>z!kVrl~PKSBZNLdgk$sG4my9#C9X
zo%)@k{FpDzOi}NXTO}sVa;KwyXL^Ud4!+cVawluEs>~XYQCXgjR@BGYOArodhx-0e
zVG2+WCZOp!J*}WIvxfZHZkcIoz#8w@?orDav-Y)1pymi*oDkR5kzk-6QREvyqbLNu
zv7`tfYJjM`75uLBvaAss!UVleDT8~62D+NJ0W(dK%eLyvNUhQ)v$nlU;s<me5B9q_
z@S|+LINKa@znm!VxBaS8e&3P1ArUWiwICC<!ZPQFb1Mz&6LrqWN?QeOHt&m$gBjy<
z3K@b@F;+{zJU#w_px0Mg!{x8-@2(8-3@4Ai&-%3_vm|M@GMM*NeEM`z{F1n*@6H@t
z^6yJAf^><_U{8QlneF`^HUnG-smaOsXd8gkz`LUFX*A`IELECfKV!S`ek{oy)ZudI
z8t<G`Y~{@Y&=!uawAG%aFqvWn(6Eq2XC&^D-hHR5C1Udy@_{w&jex1GMvdIbfydw;
z?vaswxhfw^O9iDzp4`0sf}lCs*wa?O-25rpq4}&0Kzhj8?m}veqt-#jEh5&?@P>_g
z-fw^usCpP<6M8m6peYtR=Q>hk5S9)Q`fm$~!wYs5DBK@`hde=_yD$>ptt`LYpNiB4
zO5<`sukvbKM6@~lI@i;yzLxN_Rg-KgQEym>HI?NZ>GewO+QZohB%n<>VO~lP{g`V(
zBz)Q-=qucT16p+tEscKFDU6%+n%c?2H61iMyrb;E-B5W$e92(Sta{USGfB51HfyUP
zfWyrs0y(9W(XT2;T!JHH`j~j3M~aV#^(SwBE{M|jOKx%etq2FdQ`cg7Zpt%ZkMs%4
zU+}#xy4~}`r$zF<41MPvPd=@zW2VpD{L}`IO#oK7ixPhw7kJQo($K8BEzv#Fu94rl
zB!6*qE+L&H@lN^41xF}z+_?N<D`mxl_ObHw6!bnDFtRh4SZ>%=xjw7>9e@4m1t5;{
zaNA?Vz<Liw>Y^nJF!8feugTRo8Q+_gIO5@Gd6M1c83IA2QtGWx_)~I&1}&cIP-l(t
zDA_ERt<aS7WFY<!Hl*o2OMM0-6k+i_<tUK%y{ftmkgz5;i6kseG}Yn|sIFI%VFixw
zSq-2gl5=x&u3Mm?h@l_07_}>A*Wnl+UQO;z(YTu@sLt$%3uIfnK(d|3;y%NA+xOc1
z<nD%Gw4|PN(aQn2;=~V-9nI_KXvZb$8IN7;7RWdHW&7;g9?9h5cL=3^HklKVo9g<-
z=J$LRwU;t#<@tDGPTKHvtH8SHppz}@WUIzg{Zvj4H`+4+pZVGW2&=cQ1{(IvGw9bj
zy*_4_-utD!bA`uef!8DzH)`VN6G2G4EsSCwVsrSp8iSDIukRWNtBJ}NB;6VvKL<2x
zi!O{dJ@sb4B$S<<O<(v7UJS{f4c+iOBsc=prEGrBLbOz_XYW@Q<gsz3g`-=ZGMhYW
zkr$YvF2BSyE2LMOa2K<5&C80-+HJbx)vH<YVeLBrYfC<CcWt>q(^$V9j3J$2`iA&*
zxsM$)j+B{EU=L+8#5urE(p+kz(K!Gpu5>_w)u=<2C~#b(N$?2IL9F8algWp|5QE<n
z(IPfsD1i7{Ur)&fU-GO74dIE$ip3PZ><X{uq;)dx_Hjd$e&I4l&r@{mVg=r+5~s83
zzR53s^Y*F+0cwVZgNDB4rF3|+I><WQ)?Ee@fMLf?B=Fas&3}s5P^nQqJsxeY{;K>l
z8bO7XKcACtOP|`!Z`UlK^Eq=J<x<MvV8qKK>nARo(S0}}N8^`C3B+>vJHAZ$983<0
zguspGu%T?wpXv#H0Ij@fu60xHs7o{Ca1~mw**s(i9Ce(|L52&XpUldZ%&-^+LY;NC
zW=)B8Ygn(gYnq;mx!IenlU&-xiiDTYFmw9)v{q3Q^ndyHvdp=a9R_L~{wk15gkq~~
zUdcOVM#|d|OeA&Z&~sA{|EAWE;DAM1=lir;Q>$cDL0;Sq0WW_ZzUqD`&~Ubo5pTwL
z61}s%<xRA@cYn95!C2m$#YB(6bfA-syk4sB39L`>u(p|vpq~Ki>oReoDR5*M_S9*~
zwPhgm?BS>NC#75Lj+xs756u%*6M)^1n*Xu=ol;ijAdYwD{4xO1PrWTO?>MIuB3SQA
zI+y5O8DW1|;-&GMRa0<o&c5oZUtYix1_#OW2`LsZiI)ulBAnSt%j7e31jGpbaI@}b
zCt`6Gm%Q5GM%KmEWcUVMwmo0C+5H#AzQcvaI@dZ4K75h}f`(wBa4fCVJ@+z!E50qQ
zkD+iemTZ%{fGQ@O58pv1UnrYuDphs4Wit`VYT%w(p_|ho7m0}VmfPn_Lk@u}wGT~S
z*Sa^KO!gAIf@3?#z$K2sO6fN9j&Z@2wbylBGi1H?Z%BPFUAP(X{=@U7sPD4<1*4js
za&@1nFf&BVqmn%F!9#W>b=)%?PCa;LT-r(X-giT~UfD&hu3ruw<h|jXjmddQRx|LT
zA#n35{K7(*ZsOxxX?Dm9GO$?4=rb)CP$nJ<=(ZKEXs}FiJa)L8V`Hc2RguI)nOVwO
zF1z+xs%k3fxEN9Z$LL6_tF_H4;B0kjlN;AwuOut+jm6}_=kPHcQ>9rMW_6YcgWFrL
z-q5NB7A7#X@RQAa_dTkz*|zg&7i_=t%Rpv)c1naQttM1AY#o~s|B+KehL}pUh$LdH
zs`<3L8Pe2(#pTgM2S*3r^_S=Cyf!)etyi%AqNt*8&qHFVJzDhrc&!g(%@<CC7D|)a
zs8ayXkY7rMqzO7p025E&4Q@V+XjTGio2oK89u-;3^0Fr_0i@-6IDdyDDNCZ%yRpNf
z*ZP$<FLn@(mJ(S_t8YSzU1+)J@M-AZ6}}<h!$$cuUk2Fn3HI|^OW(tEzT-l~zj3q=
zlch_Ewzt4dAC}8jAc6$V{PwN=lB`&_UJY1ImRMNwRo9$%Y5K-B&6|6L#{eN%g_dwE
zN5V@gcDMJh%yh{K`b{i6{JyKod+IpxJ&(V7`ZfZyFT@lS0N#&fpG(yP-vT&upM5)j
z+$&I1R(Ib>W7I$nXQPjA=h`ydZa#&jMS_98|H)Zb_NlUJMTi2*XBl2b-#m4Ch?3VR
z4awNXGx7}cjftg#R{ZcddCbGQPR2|(v|`D*L6rLy3Yq)nGCP_T-};{0m~_*NZ{}_4
ze(ilo%UaCkR`0}(qcw#~NK$EUxAJ?XbnHF_IQy3o6ohi$I<k<VfIRE{gPG{}mtkpj
zqHruX4f2iYbboE7UrS5_TJshuRY&ooG_TmfFXky~E+w$Zj@c*g)O$+QD|3SEb25M4
zY+}`*<ebrY*4c7Xk@-FI3|>G$V)*mf`;A}rFKRs&fw*U*iVskKA$aN(GMo*Mh<XuP
z3v&H!$SMuXMwH79LtnR`_8t3_3sGUi$jEW7<X3S}=Ws7R3J(|(t12v`#0eFR@e0TT
z=gJ?{_2-e#5LFEW{G(6COpLtH$sDnSPbnkx%4cu5H|hrsGV`b~pyh;-l=f_W`rJOP
z>)Y<2YGDdjq<L-P*V<{DmL4vv(ZjPV(>K}YV!~4RJJ;jZVhT3vRnUh02cI_YniaC*
z!I5)#twt-2T=7?aY^gG+OeBZpB&9`=tN|e{eka$E`vcLcQ#<{FwPn!I&@8c8R!GpU
znJ!PY*-*5_wp~9lgxmZGu<Wd}+-n@rAf_hZnt?96s;FB}S_Te6QOkx1Q~P6jVUtg9
z!)C*HtdO5K_82tAVQRiSZ^-6$NJvF-p$e+^;t;i=uj>U)*R193zW&}Grl2gChv#(S
z&JL#KEysN0jv}pPI7Q2qZbksl<z?YkYH$EXwcLqMcd9)`dL|RBWA>izl|pRoNA~I$
zhm+#2p1avp)vU6@7OwhahJ;)CEGRg`0RYKh68mV2p~j{2lsUpu<y+!C>Y`b$NU9a|
zCNFek4U0|Q+NZuH6Z3H}%im{zjOXfFGLhlwc5<_tdN=1*?b9e>4R4@Eznwg42=T7Y
z9`9W`#5EI5C_Z2NTQc>Xx9_IcG%4E&8r22_zF20takf?H|I^-=hBcXViQ3wNwjix2
z1X<E9hz=kivO}WMN-IKCx)nqT5HTu=Y=U7;Bg!J1D=Gp}X~88BA`nD@uqLtw1rs1j
zga83TfDp2gZQj`Z&CECVKKIAHcb<D^=7T?ZtLmInb*kR8Rn>Vva`Bz|{5wvm{7|Vs
zYl(iDse5i)#e1K3PeGTc-mhlW8HK1|Y{CuJ39sTSK=kR4)WlusHhw}knEY(^$Qx<=
zD2Vl4!U!jCTp!d{fqeOydZkS58w6(lfZCVL+`=zhV}4*^(6g%{$Z+$nJ(Rh(7)T{U
zeRmq_?k|yms^aoWZTXmDM5X5u0v0e>F4#0$T_12*)yIo@apBT~3?>FOIuGQ;-}3oK
zhd4^}_enlHJ?iu}jrVLIzdrb8I59A90oXn}tdD%h(Y8-?10u&CvnJd0;YTnp)w>H$
zZl`lM_NG?C{W2?8e>6CRI_YMnd+p^!XXE|e${uI)L?;Olp!WN`|Ek6}okKzPy#vy=
zg!g$lTnBc>>fQ4mhyymX6x|Xf&a7M2Q_<=w{=RtS?bYw+i`jq&cO)Xt+pzHEoU}d3
zc)~L4K{CfO_NxxDn`ZjXrY;yX^Q+Y_G5Qw1H%%Tk>Ols|HQNnHs-Wi7jWB+lxh$c4
zs3>=O>m|RlaYuJ1Mf|+;k`cq3qf=u{GIY*nbu@-M9js{9>uddG_l!Ox2IB7#cEtSt
z?CMJ#<eRrd6L2TBVa(L(kl~)-gPiMD2|;n?%U`*V<`ps;MAa8!&l;HpY%Q5iPBUJ=
z3bl9x&YZ7XllMfGhLxm+ckC>!s>8>8u3Dx^d9X^3P`w4qziNJ$!D~0e71Z00805Vw
zfBR3}zbsD~Xj7=(C`PgY$L{8D?D=ppuvap%IFb&Zh-jD}p98*m;He+8E$)E<KV-`S
z7TELq!*`{o@9Ce!o4Bp^POsv<&)+vB-^c;cTZFc!U2@ix7pdf&(%)F~&$P%3c|V-{
zDlvWX102{TXjywnvsJ&&*(M7?Op+6|Pdc1RdYP4b-2fO2;n^SGRjz$@)o)9Hdqn1j
zp*PwuzX%6&=4h*vZnkt?n0gzW+p<KAta|svkY`g-u}XP!N8l%fd?d|d8_H1$OIo-y
zVn>Me>Z~Xte2jiTAYanzD-Y@2U#jLa7F0-Sys|t&SxiCW(h>95X+M1~Nuz-iQU_OF
zeo3*;xy}YwjBC;mF^n7Ioaz1!0D9}pX2-x2Xl-ZqB<){LdovQRJC>d(Gk*Ccf?+@|
z1QXm79(T<=`ZQJ*Hj#z7M*KM(dQmTix6cO|3QY&H3Avp1F6A4RB0Lvz_fo)=pBtZ|
z+WCofS*3j9^HmK;)LYQq@a}o;+9d6RSC^Ss-hZ&Nx#)=GfC*gVD3CEe4JscA2gFs0
z!z3n7F}1QvdOto_x2SKQs7PE_`mmyY@_^Nrx@QzkW4+6JUw3j=^0}KFHN_S&S!%cV
zg0R75uFkg5T;7q2)U2TXc&T{rcP4E`mD|EqczEf>DN@V$W&hv=T*crD61yf7y=|@h
z-fa=ad)eG|Z_YIdF-}qJ1`)k%IUM)f4;2qy?0vDRg&uYfM3NX$`#G0Ia^3r{qDavZ
zPT*&J7!X95xbdzwV#kVE^%l&#DN*}Jp_6{gZ?r?R1CLVj4+5+>8K<CxT%16;X((bG
z3GQI1{b-tdLQ_anSr%4!61x!?6z5p~B6G@Zj{18_j7=78|J{r@#evaWq!mA98}3!J
z&e)r+-`g%e*tkF-e~AX+uE=}xwAf^zw{JezCToQ}<?9Ud-S2x1%y2oija<<oL+R`4
z>gq_=rL@kepWK^M*?XHQA8%KTYpp(eruk@3zQj3tBXfOfBt4p9O1gnw?d}x1g!ef~
zv<%vTJcB$P{AwW2`fg<j?SE)??*07l=CilT7kc;XGI`Vez4#;z*!msN|0Rc(Pk7;d
zrj2y%NkMdBL`GS#fAiiPEB!ZS_0zWJF?t&M;oB}e;O&r3@Sp4gsG!j=fvXT|D&@{4
zZPBmWbk+={?bK&hg?N!MR$ui!UEO<MV3<Y?YJ5GUJNR$C<a#5<7b^~E)v!BdNG<l7
zT@CZGiI)m)|5f`D#2fn!<q7KWyH5$yV(aZBhyT(}r-TFJ<9?L<t**wf^pDQLU^_&Y
z5!>mf1jxAcyob~DNI;rF%QJQ*8<S;B_PITR9xT-nMMbMY3((%FH0=C!ga%bfwF+CI
zlGuF52r_Ijx?f#WX)_Z&b077>L(SKrp<mH|F){i`<|}{&K-p5+kcbqMnhiO%-u{O~
zo^fpAy`&z^5WHuS^OR@Ii6X&1gV4+xo_AcPWpxx}2jUEsbj`9o3UQS3HV3`h|2Bv6
zI4jzixt_F*mc7q_>r6659V#+R*-ei|c@Ermb<haGa|iM&Ok*!;b@(!XDfC?U47y7!
zNBB4Ajn&%xXF$k9Kdk6}FvyT^2Cw#y`mhc(arhnF;;{X&P6Eb4@mdtO^tdR||31n>
z1D$Q9OvGE4`^LK=(&I$vGp~uDs*TtiP_TKHIX!>|diuA9ZMaby^#}HiYKCp<b0mvy
zYM8mtpI<5w5S!lw^MqNOrw41I+bO}lC*(Vo3I85tnjv<6jvBTdb>h3g*}%Fk=GU(l
zsp$UJ;}gVwpGy%ppI{!=@Ij-;bUhzZkHc!)_N9)73{w#49$pq9CZ2BNdYT3|?#CMD
zZZ@$a-i@^o-t{*+yCYx%?V5`D2a^BiJfUms&O6t9&baBp+Pcr8k!y*gQy{w#d|B6M
zZwWZ~JbRoJ#!FAjy1tu!lW8CEybK<gE{J5-#I*Y3?h;dbNS5@am8p(aHHib2usY!(
zgV0G7Gk7WuGzwr#4#8o#cXrbk(Ke_)9h)kCBvUgFAds7c;xY9+p{>2R`%Fm1jBZ%s
zaY}xJWh#1UbE<v7C0XNZn5*JLZdu|tR7(IsHC!32!?&-dt-=ccq;#UezM_m$=n}Cy
z%-=%sqdWFbxNm(}B7sOiABHfsycL}g{-OtzBtmCC0|!si{)DT6>_m96^jhozKtrJO
zfrDm6%1J7{0Zm+V*HXm(2}d^z!A~)R2mWG&j)knX53Cp%s<F&VN`C_V6K;6vk1V@H
ze=Q~ei2iHPOT}P+8L91~j?E%xoTP?RbE_mQGw2ZjnEmIVVc11PD2wC^j9K~r1G9W%
zKaRjROw6TSdmWs-X6<cTejJN7*ncZ}fg@~yBk@M0J5YjHQ9yHk?X9dE7IU-iwZy3F
zZ~{LMdOMCGODZHr4t%SNf^gqEO9gQ(g6)a5%x`r)NMk=UY`~l>n$i*EqOPM!@mslL
zG;Sif$JYt<t<DJ-eYQ{@TR+DzFi+#D>!6&<9Q-%J&WQ1fZ*`mm`hQ%f#1N_0Be>~n
zrxuMBjzL6php)SbTj(nUlm5$uh5h}{e)VX7=y`P+U^YbpA{dR@q5PO*k`VvL&;o*K
z`FJ2XCyw4KfsDtgrW8no`y8}hg+T|R*)60ONNr;wzAOjr&s{3!=4Nj^gT1UEGBm5u
z2?H@}|3ta);jMCmI0&(qy=-9OE2(E{Cj!k!xl>0K{w~bz!*nON5WHm(deV1K688~f
z;T&_U^Iywa$r7Z+V^Rcysn-(Y4KoN0Mr#{qj>7m<-@Rxyg=Cq^D=6AcZynm?tOqEC
zT;lITlx#GxcGXxjs_iaY+<>-$5#Lp38!~}cC@QYQS2r|t(su*aojv!#1MIaZ`E5(-
zt*Z6Od0jDP%aWd_X)_n+Lb>-zt-gimD+`C}Df+6cm6W>#^y!_-O><`=&(;!o-UY`A
zV%#wHuZk%}sTe$Zy^WHJzH(#dbhP4wFpb#wV4*tOd~yRePGxftg&K${eeg7?svNpV
zW$!OM{;vCxLuuu2&W;+k!iF&8A^V66KSg<NQ~c;zaL=96z>q-{l6=Q(mM`jB!p)Nl
z_wKkKEcmOCZ>3!4nWahD$Tl@g%W@=$6PNgQ5x_8Uqp&$>fDY!&h;Yllsd=lMRL@O0
zw9aJyZ8P_PxAOhg+-=b;{9yN-SLM<<dr?rz3d5syjlEBJEl&LYsJ)A5Cc78;U5@YQ
zfF>mv4F-*t5l%DNco?ua48pQ>Md&)4a*)dnU>$(0va}5yNW6%w%>&<gSVYXYhY?9S
zTKt>EzAj9WBUjfkI0FHOSkgtwn@u7&gd&+H$i(I9hApLSLlEm$_ncrJN{#z#x@k_L
zq{^46nTmOT3}1+jFA%OaBDNBBaC)J##vgT@Zd6yIf%t#&);veiNZ+@9ZT29>0L1BL
z;jQbbkc3ls-U@fuu^sdtl3UNx_v!(p!7{581Jl!t+J**6R6ktfI!`0n{DuDvojhbP
zw4=(a=I7<Ve0O)xwXSKDp`%{t8Ux;kk{pr~PA|=#7v;OOBB!nMypCJ+CaiVu`<0T-
zC&$`5UFoC0AK>p^s&ZcXE)PAKQ4PU&AuQL5b_5)Ze^m@t&*GYN9m<XD&E^8%%oMj9
z_dg6zPkI3M6T;#r2*}(%_3zsM`Cdr^@dKdOyK}1@2Z}KUyt1664S;1vSw5OV3`V@~
z0B8p0j)qj#I4>Dk@u`ePwzt&=!p7_!KR>}=U-q7t*)@`W!5MV^$I#WhYu7KVg}E$M
zIZkV&qEEC=$nEy6&<_64J>qn5t;d;mWDk7xD}EbxR{#icgZzZCGtwsxHZHLU5-R&-
zR{B<1Hub7SCA&H$qIuwPfgR$y<9nB&<o1yZ9ymP@<Flk69|@2V=XEn<*3xVu9j60B
z*AShs0K16iIrBza(~8%U{==KBpoJ)ECs!?Y`)^qttZiz1SEDi#*a>!7+MgXf&a0s7
z&&)VhAaHgzyoBY&d`;+iW`wCy6Y-FEebWIxAWR4@_;vasHw3{}4*`f>I|$suY^)>l
z(4}L=%fceGUr1^S#vM(sjCrc(Ov`%vjI3jN2U3ej+XRqF$!`nB*BR57o`x-@7$?5o
zUQ!4P+@t&&dx3gS0?rDd=v}-^%z+xYcRt;E=tow<4z2K_y>PReQmVs`ysUq+E30St
z8O~9MZdqrRT7`Kyg>ut&)9E-dg10A$ef5HLOIndbho8R*AjcK4P`bA~6H8fvNUP2@
z?C|T$aqKu|uGD~bwd*($>%K`wc2fhkR}^ul&o-$phnOd5;V1AC&&%m^Xs4uBRbVqx
zdQ3jGM8nK8)?vQ{;V|^!oJe}r)||-xT8SVtr?DJ}L+k`nFmkU^PMl+&fL|2+bRHQ|
zyTUXl=6P8csf=*^Q4&zlOB@?>GEg-3Onah7foG#fp#Ii-XHDycp@1dhLP#+&HKVmp
zQklMZw64cLp8xOjqpi~l$PccL&uueCe;}emeJ-|WI1l+5UAAzF^*#u#Fnq0Jc+cqO
zTE|tw$zrr{rkI2tI%N^k_~a;3wz$vY;`CriAt1s1Gi!Xm9(!KeBbq8sM#)QyNL7`I
zf#*Z{dY-s`pKnd6fh_Kz2b$R$r6nqZP6wu4g#f9?8llr4gmI!{OnaV_hHNo8-+6!t
z10S-U&yBlWUzFHS@`}%Cf+RjJSsOFc=doT(UTkuPfCN{g`-%`1F!1viDaNwJTi>rx
zJ@;UILtn)K&GczroDQV^1O0yv=0G?FQO(9zKCfHr`1N=7S@cr&lU$=B65Wj7Vt`8u
z%xs2mw`HmFR~vqBbo20<lAaC=z`E@xieeg{?Cl}dRNMFaJrsJmg--j%yL>&mrvb1(
zB?Cko_riOF15b<WqVe^{;XA$=l}*%~Y=dch7e!A9eCCVC``!H*5N!ahfyAJ?jv_<s
zNzaj9#-U-FMmN6MtMfzzBGuWvl6c^6$N>dPrX@_+s!zh}sI%>>57-<(Vr_r?W-B(!
z9PH70jFumRuRn87Qfzr~wDhSv(jH^^-sF4jCrBb-$=*-ce~G%X;aauQE{VP0J?WT#
z65fcnzH}50=ny!2II%OOzM%rmg5Mn!Km6ohJB(G^f-aKlKh9}3U>BvuG;sVE^p&zI
z#z~6Kyu}H?yjE-0N}CpY0u;EJa_KNV0N7Yf1MXZ1yScifX}*T=yEYB!2n&Cwp<-j~
z&Y()oAkB%uyZpwToTg(m$VG0HV4}MlGDKR2B+~V%PmZv4$^(5yvp%1ymz<Q_m@F#$
zglUXb{po8c8cTKb*V@J*D8XATtsjKh0c^7Hh1yKb(o{PUe8y^u#+c|-Ggm7F=(P+S
zoE*{Q7@Sa%J4E?)N}vgxNlp3rwFo$q27TxcH%fa=%potnowsUu+!PWVjMLOO^Da07
zB{=XA&?Ra=pEUc28GUqsE^)^m+iV7ze_u>!e|BHi&_?VgravpN8zXrYl(|f0R6COF
z8hW6?$=4&AHbpYG_s|O$TTdA=)ieHoQ1}}=*EW<8_vuxB`>7tQklK92ern7oezN&S
z$>=}bvXz~)3mQnXwBhQKwr{aNtQl+rN|^57<HuN7lhLMCQ^psNU;0AY$Z8A!3^v^B
zym((A!QeOMTjlgp=7VN%@aB`>rZZ7FDbQr3^5>;3XMue0-fPSXFCU-1O~0G{i34`%
znSEsE67fR-k~aP5WBTif5pC1Y-e#M8FD@+4aWD^mcISW-+*KigjJ2)@jFPa@&1U#E
zEPKthKdD|;2yw=B-p6O5VF_SN@P$`2aH4WPD$k}VKT=KC0|#ZZ1!?V@!crC)@hmWx
zi&ZtU&&pq*ercv;{<!O~BSv(rvU`o*qP)_Xb&j<pP)^<PC8T|xe$D`9MN#GIHw>5q
zuC_@QCvOfmjCU@qGWr2Q8~v#Uf-j^>8)~ZKxSEa|CZ!xI>ZeWA{e?o0_-TtGbS6Xc
zRw?lfni+K5H{)B7ZI~-a1oFiLxpQEF7Co|x7!!_cm_A(w&SnKJlh5nOD<t-^JXl~N
zcz1BUN#v{I0Jga@(P+?-7{QKcAV?k!%t2LM`B2qBS~n%<6Vq{j!Q;I&Xefs{<BzQ>
z-Q|>}LV+ba&Og_fCapHiz7w>E-SolfVz6(#*6v^RmV%%s`*bd&`g&v}P^PI#9AIs)
z)7)XdYt!u~Dg%$+3NM#7ID2`B-&PO2n}^z?rea29>59|cj#qQgL(+lQ-=F}STPNc8
zVD{6^21~ez@3om#K584uH*n1Qpsx({L0IZg4*i9G2{?jtGya@5XZ@1;0@H<c>#>(&
zdla=Rb2x?7)T{|96=S*DP@{6ivfrCm95n1xk!bc8<^0Cv1AIm59u-&wxDAc%>zxk5
z<>pG1Yen`ZtWq%{%X)EIJ6<gc1jHs#%4r7p4EC?eIJOKzLV-nUL4rUmM^ivU-+r=_
z(ftwu!*Eg2wQ2Od-Xu|n$)|$BqAyqAuMVSgl6!oq@cb|)a1fD<ed0Q8w7S=?7Two_
zz<=f*76C4yuOV|+5dO2wau9I)g3zdU*NBmGHD~7{=vfCSDa)2^@;vnOzF*U-iM;ic
zj(XPZda56oP9b@M%*9Eb|7!&Q{eVQEOE<ca^yj78iw|N2aljWd?06q)(9_KJ`dvuc
z0grQCGojm`_?qtSuGs9Cvd)-T2$(!O5MF0?eORFpIbEkMG=i}Kqkh#zk^v%1*WZT<
zLf}Ac>VStf7&hr|Jk<L59Fw{M(b+LY?U=+l&*jD~^+`r?Hdph}V4o~~b7YpE0Xy?s
zxsO^WZhbx|vT4H*BPkE=*)%*CD4&<A;}yAo53r{F0SlQ838XT&-NHvyzyDx_qfWqe
za%m=OYzir1wWjOWTBjSQqbE_x1w9!fj~fT=c{?gGq>O#I-CqYwitNUE{k0S;5X`D4
zS&%YIo8Ia^f1{Z)oh3~Q>4LkTi)s^GUX!UqS#O<gCiim{X4#33`JCgUbkiUqbO#|m
zIe9pE5)5LNXov9Rp$UA}E(pj|9pwmjYr<vgQ#1*7GsPucql4SmCU)S%ru|l|sBi0q
zT9dpCJBFkT$td|uf_2^oERTf+tPt<F-Ro%h!LwTOWS3!$DI`5kp>WW)`d!~R;OI~8
zK}RdP^CbmEfRFMdJD5LHyxp5!bEyO=-T$cI7UA#80spFXMP|0XdIA8&`Ta%G&YFl6
zdxH6iO9Y#&oul}*_D-t1PtHB^by+*K7&;!KHQ-<HX<v~5W^j>|aUgrS(+vvvNJ(Sb
zcxGb%*daMrpvi9xW?!G%*>SW|+f_*Fe$yfC9NB=?-Kw;Hm5ThB<1#x?wbPzv&JH<>
zJ?#TCMqK9yq06)|p(*xjm|HWTLnY46CA2+<6M7vw49zlsSerAn$;bP&%Na9fK~a*E
z_u3T(5u3>TC+`Kw+Nstv-mrithzryR7vmZ3w4L&+AzbbNI<N1}yR2sJG$pMZ`c^sr
zw)5>7t?IrhAXo+aBIy+(FPB4opQ?aw%vdI=ET-bGV<9c-+u;JcuLh*NYuTEaKMC5V
zdr2UXrG|(E(^3pUn0hdy&{tcBrzMo%v#2N9G%!n*nD=O+^XLmCEBJ9`VsM>yL@nDa
z58yiA?zo4O&D}}@Utr@|6e-_FJwAT=l-3LiF3(hbj?asDDlsg`Cb~{37=e#Fs#}9W
zC!ZVm=bemq3uZDM4qlSi{2f-;gNwX~Q?Ajb@EF%XH2JK10fix>QPzw$>%B5>yWQ4E
z7kS#Z?o@8T+6SPTDWD6xoR@$1!_`!M&rR+<fyFs?Hm7ggj~saX7??C*KS81Kc4Js{
zt3SMA^Fg*l-tz5GC$;*|ozri?4ba*$Kdqxlg1xeaf#~AqRFHlx=A+Pq0S>UjxCKG=
ztxkBjvNP>$)RI{p`6Jiu8Y?%`9T8{}Lf=2#nm{B?WR9=A?Y#c%_}z6anU+ia()@wr
z+OG0}2t3rhG;HakB*oTnjKUau`D-Rtz3kp92e|lhataQaryt-SYx#+j3&DbPpXC#e
zzZ6Pe(cp>7X@Nc??QX@$)yB-#(1b`uftL6A6p~fNNLpUopuKfr!pSBH@dNOZ!1)Zx
z$Fr;`(wXu4-&PijSw}T40})f8`)9Bi<F6a{GgwW?doL46zc$_U_rG(6-k*Wx(|n(=
z_y5g$5gAG`4xLWb#a5tuDuOh_Hkg1e!gnCL_g!$0A6l2T$v3yy1fvdcLG8Ukp8bQT
z_>4D`ry6GBZ=m}DWxK>?U*ji>@0OF_Di?+>vcKQ0M_Chk1{<w7fEvIAp^o;P!a<kh
z{D@%00UWdtik%NQyyHiztZ{SeqfC8-Rn0c0!)AU3@=2E6Sk-2YY9Kfa=>gdWluHcd
zo3GEAhVO5fk=@u$@BKg-1#DdzjZ9P5s$<1<G4rRMew&N{Yx|=<l>FWC+Q~=0QN!70
z8Cca2S7<Fswwmx;_Zx)1zu>Ml&0mjoq%Z1z0PYUv<Mt=;@zSd^nW<g+nWa4)10p*h
zbzEFWdiy7*tb0UW=$v_JX%_+wO(X6mC884_7xwu~5J|WeUjt?X3DOHh-37i(S`a=9
z4fhA1=Pcfh#q&7UCnzLBnNAge$dxXN0+}P247%nJ5IZ*smHmmAyFwjc&#hJ%Q2?UV
z!ek%{|ByPAv=$M9k_|2Gb@}g1)~g<Fy8i88<x<HWhaOU3mEYH8*K-C+O{%>n(u;>j
z2bC(`7j&4>!N~6e&2aL#D-CiL{5)vhjNG_;{ikv;aiD0#$;s60C%0vTUq1b2w}_gv
z*jY9?(byc8FfGi_t(1|IpbM@#oUxgLknjg})z&uk6PmP(Uh$=jgxILSH@ua)S5$7Q
zqWfJ3PSB3fPQ-QJchF2_23SXM!Z-wuY1FMxwtbcN0tDmT$s;8FBcG*ADL1zL^jTro
zBI?<{;!wsY;{tZ;`?=Y=`A^xwhmAm&;+tAkFaamadBVs9T45S-bnSs7wCR>M9eT;-
zrQlGuLuefx=FN~aE!ac@&+2YC=}esMxiyHhO(6?vXykgVNLeUOmF+7KcjNgbv|*@~
zSub*;?&>(A=uyMKQ0)Cu$|H!?J91KYD@!4kUr~yr4s+V#6p76&j_P&~HaA=ke9r!T
z!((Uh%9VF$19G1nf<vfer~OF?`w#;JLDA=ZP~{1fJLNVB1Nf2Bfz#3~voi=ug<lJd
zSi4{VdrO%Qs!YWNM~&MSUCth9Ij0S6o9IBgPHk#Q>)w=fqx&8W^U*F<=aHR!!y|JX
z|Bp~Du>F5!0vU?D_w%n8^4@!hxCAmJDX=r_BLB`~sNJeTsC;YdG3e)Axb64D`XXXf
zZrI8|?-*c8YQ!`FHjCRJG&&|e<1A<w)?GB|879Z8@>fQ^7SE}F%_<%-)({+KS4ETa
zx)LJ4n0L1fGH~}9xH;f{Qp7MkQPIuj5DJ9w2I&&ZglxL|0L0ZM>VZ?8h2~7tL%W_7
zxeL7L7l!YG1oIf6tSTfYDtQWlobr>HJxrD@v7<k7*)LK}&&7Hro{%lQV`1y$vKZSS
z@2(rtA=#BVxb15K6i4UgQpa1YjHfcoun&GXW`&8omC-PCz`A;x&L(+SJ9=#?1X;YQ
zei3%+^PdIUtoc(c#h9N1I?gBeos3*pF?OUN)7DeVo<@+hS6&)gEHSx4!_uIFTlXGX
z>%ZteSm6Zy0xOE{D{&CM5j{@pYb<T}mxf}Nl$LNdWP8_vi<Rq8JuvYd<d>=`FSl~H
z$&o$tFXoaK@Q>{4Ur@g&t|)E6eh*hi-5NrD|47jHw;qJ*#5rmwOuxgqa!b6zrX#MX
zU_chVG&2D)&NWFoW_hc<aw0|iGu&jR=Av85&3y2YLKBPHXs%|M^2G#@HaExtJLIpb
z0@Kd3K0eEhXJ5Y)&7D)qB#$;n?dDRK)vGQy2h~WQ)(fuyuX|T-4@)ZSjXGcC6F$I`
z9*F183BqvLb2};{^EGYv0NB1Yg~|9?BFi&NZyW*~{c4o6WI<SJS34fO0#I(N)4oh(
zis&kG0aTUB%d{ddgd~a><OG4z?6KG!S|JT?{aRSsb2TG-eD78N?&L39%0U5~%htr$
zcd8=nMDw)4zbSITx?mr!d4>EwCi6`&rm(LGNb4`>$wPMGi`ETTj@}rF_SV#WR3oSn
z+^?;OI~Z7#Zl20;GCqPD?qT)7{~a=LlyYH(zvuz9!j1J@n#4fS&F%2yn-u3<x-3BY
zNA8MuZSA4xg#^AIrV=+is~Q>Uig4}eh;{qj0pd06$^up()YdfLAOeS#$+_kwb2lll
zT7!=Oh+3w#qaNIG0#ET2m15XLqk0riYd?nlU0q_AtW<pXlF}r1KiTx7{<R+$(1jWg
zFNwbLqKCNGs!Ig59&w_FZj%0*?b2D5yh(96ATg%rhp-}Mk&5d%>?Abkv5hLpFiQ%J
z;FmCp0Cv$*>yYPrvK}6G<@;x5I@HIgMYjEzn|su`u3twpSf@_cO;rQjVN`COGsw3^
zl&puz>7%1wghFy5xkx*f)>~iz+|*E?`F;*h5(>9Y_zbU?j$jMi+BDXBe=;<qdXl+4
z7fSWLo|<4A!cCT{FqOj!IMm(=7n|3EkMCjBdS^o{hw);$SRQX5N4f1k(OwXZP7T);
zj@N4@1#=cfM+Lw1vuXO?#js2V?iVbFjNB@)LAG~YrX_s%6Ii|S;{JDmRfEz@c%FhM
z8wnd&@ISP0GH&ytKL2jL+`6RbgT4gXRQ%;HEdq!vwb&3Wl=cO{!UzP_b8Meo1A#14
zP?J>EEG$n9$MuNeg0O)|4^G&CNm!g{`OcET(16)5p0mw3%rr(cstv0mq`TM1psK2E
z92=TO-WqXLL8XbC>xbG*$Fe3|efU+LGG00HthhG$ArKCOT>Y0~VBJ2vpEIFxF6nEA
zbk(lWDh;ftc{K@`9b|xWzTpFDvf0GeH|c$w<b!L>27vX8i=8@uDR4rw=t1t+E#9H?
zNUMTC7c3a7EQo$8P|Xv5{myti)_N=>Qs?|qS4uQ?L6+>5n^RJpAZ7_1)?_Rck~ynq
z1YvVv@uYg(ePVW<l~^bqH+)r)&B?aq?qJP9MKYbrn}7buEof9x#BjbHGEtQvaX@*6
zXEgO|9XHc&>HIiXKq8Lm;rbVSif19lcwu3mectEMJNb<M=kH*ep^2QTY||Ra9}{+V
z)3bd|r&vyCM7t&AMXXo$TiN9b`W0`cZ)7IyV!%JZ|D#xU;Lcmwi0iOS0_5SUOJW5(
zlL_go>#4uNjzH`2W8f(Ds&v89%w{2~qB$F!xN{f^XlO32K{*isL^ee-f^=z~)-O|V
zl2lTHMA@fGvjl`xp$NF<vLtsvx%K8(D7g)TE$v6eaOTRe^3U@%-a7W&FRF1~fB;o6
zSfuJ<K~w?5q&u7g(34~FDpz=dY&hJ$!f#ITP8?R`;1^}z)zzS!QPZ5^!dN<`32>}R
zMj$(Rh{I-?RR1l8Gu!KQSf+{1AB&rG`O;F##+Fa8wXgy}Hcn7D;6zzVo5-0L-FynL
zfFa*;#uI!vTdP6WlOi#rr_Jzo4iMEO6TFMWxIoaQ=YLTNS36p99yx<wwCz~D)1u39
z4l1kVOFdZ1aALi8d|?HBMoEA*#yYY3rN3Ane1bD7a%8Ly?arn_mqclqf6C-@Vr=5_
zo=#~=kH<xu9~5Krt+n31p_dYrA8MedQG~t#fbKgLS2sMM#~~VWJoDJ3S(;f7(8VaW
zAh$s=5*%MB0_YAZ;JDufkqs{dw@zj4hdQ}NlmlHf#9<eCI{Hj*m6|Sf0E%7J+?SLK
zb(R6wK%fP`Gz50vL@L513Lmj^n0MXB&gnX*LKx1E1?b(m!C2X4NaDFf_AsU%$>H@$
z7at0<<JrQZ!WtN-4@q~J&1S^M3`70_@Nc=~gsU^%&3#26OXmdxEV6>ht@)YK3yWTN
zZe?d$+tF(<lY-o(vNy6h>73k6^=q5p9E9LH_cN`uJw<SgyiqI#JQ%<V6<A%l7LOP<
zpEdrFxH~o+GF>yOexGL4%71p1m{NE0v&&Q2*@dqO{b6k|p>m2K<@*e(Zw}xDPz!jS
z`@?$BA5i^a{~;e8dXhCUzCDFjEhEcsCdSVclulX{7TSK&9|@kX#q_3kr({e;=d96H
zKgOsM<+i%#4$e3wXLPGP>IUbWrQa-_MpQ<V#RE{Cpav7S=ZqN(@aSv}A6|#LoW$la
z1bW@4)s~bC)=DhJB?c5?lBV8s;0aR6^_-EhumOnt6!vLr?e>X`oznu0Crf|zmxP<F
zkEKZXE!wPVLDUCv=dfoAeAs4L9s<nee$GC#>AtUaNzLAICdShF$GXA`^P8YSW^TDT
z37FoDr9GHzT*aLt4-bgz<j;wl>L{>*L$u<pV3W&FA=&|Kvzb3QA8zUBI3L4l5cs+=
z9|e=|(=-d8!%`TM)h<)9m&aOCy!%U=Lgm*A7hRnfM*gdi!yo|PHU4mYr;Q3c!|FC6
zufZt4B8uh0%W+HLHjf5&JUE5I3nbSO{4k69FIXCu1K8fPn&Rt|MITOPjubA;r}&qp
zna-+L*}0a4MbtJ2|D}sWr^2*vo^{vHCulU?*rZ_@at_y(DxDnsWmXN?J>@E|$3niy
zI11G{OC1JMrBuT4ZYE4|?tS6=Bsrf7$X?k#t)>;BKczRPd?<L+UF?O*&Tojdj-RRy
zi?<tYxEvgsuSsPpaIhIS)qS%gv`kn29`h)Zmd@SCo{c)nvxRO`{)Q;Aij8+Z$=T`5
z0ai@fomDM;p)Aow*Ux@65MT>20P{`-2KN<Y3vQbg^}LK7@$HGFsdB?cA`fPi+G-DT
zhLD4jv1g=Hx>;*;YGiZrN7}X@1Y2OP-5rNq>@F_#>sb&_%JK(wYhrzH`Bo-YpYI^!
zBnz#;^N6o#Pywx%&DF4yWHV{tB#i5?IIOZwpbAsS<O(W_oamNr(wiN>{D1zhTGEwR
Zamg0~$~wEwF9ZG#9q|6Se1G88e+K^%PtX7W

literal 0
HcmV?d00001

diff --git a/docs/img/domino/drag-r2.jpg b/docs/img/domino/drag-r2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1411bd02772a012bef2563ff31285f5dc912e518
GIT binary patch
literal 31447
zcmdSB2UJsA*Dk#2Do6+EQEAepinQ41A~py;A|g#d1f&ImAiW7l5u}NLh!E+$Hvs{q
z6Qm^co=^jX<i_)!^OoZo<2(Po?;Yd*E3E95v3FMXTzjo&&iTx_AdV4d07e}RZ4H2o
z3;-UHUH}mVM5=q*Jq7?hJwOxyfD-@}88bjZYLSs%02voR^;;VNbjY~>*}hA5;rDCE
z0U*Q<p#1$B6VmZ#A@%(8+rJ+vGRgkcV<!0@SChZUr1+yvHbc4^Ag%+7w{4x>oLz05
zpPrSKya*^>)7GQ>**mHH))xG&$vDItIUWO)P%itQOkTrX^CLC^Oee@@$X`&9od?L7
z$S9b|h;0CfbQe@)zm?y<MtUP7r=X;wra5u)6fNn3az=ohjDmukl7for=g*M&lFkE^
zOjOM0q_0x5+_9uN@4_ncA~x#;|Fz;~w!7ak0<sUE1)My^eujgSOHfGof{3V`yuu~L
z%SzX8sB36yY3tm(Z(s;9GB&Y#Wc}F2*3RD5&HcHDr<Zr&%b?(p(6I2h_}2-ENy%?g
z-etef$<6za|FNXBth}PKs=B77wXMCQ^GjFvz~IpE$mrNO5`~_fn_pO5T3*3!ZSU;v
z?c)v(f67G~&wq$TdjE%Hf0B!dBo{d)B?TqTPr1m*J$?$#L`ii{nwt6Q9U4m)mh&<%
zPOx5!%_?p_$uD~s!}jpm_fzZwawtLUPtksp?C%K{@V_P5KLz`jTob@`fc&>YK~7FV
zML|J9MNLI2)F-HamJ_E={8mo=Ug&-+^gj#3KMRr62^pymN=ix^(jO!3Nm|DL&xJTi
znwBC&BtS<&Mw(0%OaK@-#=i@{0Q^5*DTv?xe{!GS|Neb`0yBF?XkiiqebG?HTaIPx
z3TM*MSE7Wze6x-oqU)siOgYjlK(47sgGGEH0*odOP__nY*b`hQx-pciFZI*XXw7j@
zVNUsxdXC**&10)*ktbRgyC2=DN2)(o4j!mtB|EG}t^4R;4PWGXW?V~M=W;4`I6eQV
z)>!3>B%?OB;{y(BBOb;R8?vTs2bRuOXebA+f6o@@ZiSw-x`Npy0{)Lu?^O;+r4LPd
zu~%rwOK_71>H6-r1_|(!JGi2~J!9IY6h~oHm<C?&HN5g*;oUvLW=B#DZ+-38IBh0!
z#f_ZWS0Cfnl@eyiZ7Fv8!tq&{#ddfooI{NhPmXB}Q@P`n)jnI$t1%53c3aITHJdC-
zvDwNBpQpT;3zxVWOD_9hh7MgHKNIQQ9}KtO1mTq;I!qeF>XdVF9=EE>Du<k>w%6ix
zl|#(?+S71H8#j{`g@M~N)Au25&~h8Rpm$OXfflFVI(1>F?vew-p|tsfTI$o00sSWx
zak3pQv3)Y{1IxB9HzV$0I!e(}C+rz81vfArVT+&3nFZANhc*ilg>S|^XuO2$<91(5
zclMsCoXli+_W|)$Dr5_njq-?rU3NUW0*=havey>Kmlu^&*E!c+{-7@&Y58)jQ$Zp&
z9uPDAK<RskmcNWQp6&OCvlG--GMLMcY1+9Y7Rnv1d82a#i!yRs^DakU(92Xal_jfA
zYleMB_Jjk+ZtogsF$@uPy22gcz{BS`g_El31sJT;Vz9#-F6D^Kin*^VrcdIwv>*38
zNcTdhRN*>Zb8X>#<r3EohRX{q(W}1vIw7})xHzqRlHX~eDyw`~or14DyK}`t-vl_m
z^se!u8p|pXU~k|hd{+R`soWDEF@cA<aUacEg2OgoH!)of0VQEhg;{aTt^o!7Su4`v
zs5)U^ENx3hNn==}ZI}ua4Q9kuv^bh5bL0*^j_VttuC=|V@To=QrfwYH!--pJ52ygY
zI*2m5k<HfcBxWHC-OpmittX`$lvnkd|I1xDPT_~tRxifL9kv}%^^F~qVjX^Dqj>&z
zID)R&!7Lwt6;`b@4{A4R=&G5R&hfDEZA<sG%!!A0qECA*Mu_hs!l9+$_Qt3wG2Dw!
z;|>*rs|v*?&3@mP6%6<8a>R|VX<clXpHCaA7QfhY1%%*IK;+>9HH$~f-oRFQbxssL
zh_^Gx-pu-5@tM+EOYtnzR9OKE#E8F%Mfj_6z~n+&YJPY>G`Cd{x=mmmuDhu#QTG6b
zG%FTYOO6-V%SM4aro;I}u_I|&t5fHL_xhe6&xv7(K%2L;Wpu`qy0n>ym6<@KziML#
zv~0TBnand0e#$eo?KIVHdVhIxp=8n7Qs#_TlV;ahTAdiI#6BRNm(`XxmWS_kKT4l+
zgG6hM$tYd?V$C0LVvY%#0X_-Uh(K+wQ>#3BXs+j8_+-Vx)4*ALf(T?M3V+xlZ1Oxu
zWP%@!Hbm?_!+B9x%|ROyn9g56>ackJh|h0fy2W4IGimPw?hFdVYVLX66w4WX*z%~d
zHcnYLe{EeTenZpMgH_L&;w`2`D-T(-84BiB<AzCA3`(j%3*__*c`GiOn)ds+K6}1#
z+*@CwD~9bt=1~;-dBSdd_Y;M2-~_P|sKU;GQPTnU+9=4KN-XWPvHIhR{kF@dALd@0
zd&O3Ee~eX|jkaGe&x_tH-yEN9o#L(z)WW@SL>U)>?hMBkiECsO-!<YJmF^NR+*+Ry
zu)fk|HQ=vyfZ6Ya2lI(y4D+xw*GKgFV%{lt`VwMfeoTBxroJqpbMpHEh+l&nSz5<%
zqyZDcg`wp!@Q_{IWcliI4xwwiylUIJRVi#MSWO=~iSyBG{x2*}A6?LfXX9$on<02>
z@iTtXF-GCDMvdi)#Wk|mR1yPZSld2ih-rLXPxnZfFUQrnqlf@9301$@!K<l<rAqwZ
z&aHRS&EffH2a#@SDG$21zCn>lD*E^fG&p)&ilu%=zhMwnomk<ICAD+I&f)^4Z%y^x
zC6oM8VE34*M;k8sCU~jNx4WgQ;`TB9sQUTfG_>i_8>ezb_1-!q->8us@mKLN7d35I
zdrOZ$IqoCKj!WDSVKo^oo6KfQhYL}lfSD2*2X+?II;EhmOrO2(at{i`-ssDwZ5Vsy
zed%nB)~MQLPkt1ld6Ulbt#QAr>nCHAiKQAPN1vnd6v3^`D=u*~)2>?|dbofMn$G?p
z229J*tN_=_4^Le^4WHDihlv_7Rxhn>4d`kULaClR(Uf08Ep|GS;cqt*`Z1qOx~oI_
zA&uY4Rh4KWcSKYt%kC)nP(5^LsskNevRB${%HYQ7qa|}wENZdWwj6cuIh^BewG`MA
z58$|ZVD#?8-OhmVC(l2qTq=a=;AFA=&5bAA;au9YM1UTLxK+Isw=poneXKYpx(=7=
zQ@k_t;Q4&~bDwWC?>tNQ8gUXc217zvHzXm|`F*u+Y7(ZeeDD6bip73*M^4IDWTyki
zM&YyIm_}MWg-y1(XZ-yH0W77TdsKOSPVeL#cKvj<qvLGcvx{EJic3!&nu&nj0Titt
z)4)UMGo-WNeYWG^jm$F`*cqYwcpToNFf8i5UgSJ`x-wKAjLc|8O11d(s4{dYK|-9;
z9oBCNRee^n>Zx2?whiK)W0qX+k)h(@9R~p4vtZlJz(P#xJzOVy{Q?f6uI%i@AK;Nl
zJypEs<9_|((={^l)DRa4fe-JBxzx5gn};*xeV`bsw*X!8PE4gew{+LE$M()Pp-UbM
zZ-Iu~hTX+kVi7abk@7+%Si$MgDODfil5xTM_K7-(i2SEpj2gA~rCv{jzYXXeI)uM0
zM}$F762Lg;nU307P3*=CwD0+a6&4RW2j>rVy$|m|By4!3>m<2BGES6Ma%Afk7mjoM
z7(F%H265N#4(HVji;v%3P^P7=zv}p=52%V85n*P@I9Te5-p!tEj3EMaemwRHHQ+Er
z5t6X=!#t-xI>YP3&9eeA(gOpWI#M)=ojUjioQNy7wG~vl-v*9kPX|Zb7*dffT!q9h
zZH<}g@~*x5QS3e>_?kOyiy=Wl#pgbv+hIquW4eqzVFnRQ;Kd5|r65BhRE+7w-Y>l(
z+e_`K=09D26)b*dQAX>>`DZ@lWGh@A?L;8B2GP2#COU=-7{-gx>B%L{SG0w`RX83K
zZNl95AD`t!S5Jq)%NSZ3!%gZAX##ic-VZsVOamRgtlceTn-*!s?MUz^U{jhY>cfTe
zL2I^5`k<WGD)US{bJ5%p^;R7wkBiaZQgZRE=<VCac~P#-0Scn;V*?{o_VvSMjLiIo
zp_B|kk&d1fN)r|wS`x>JR)IaID_=0ZcG<*zqk*8MCO|VWfvD;_tO{x3+S#2AHw~+k
zEiA100)4>eG1krd^@`ag)q?MMckDLGxivLSvb9~QEc^CIrbDVOx2|&P>bT>Ob<UCe
zr@LSIdB-vV%q>p@<#AR9fxWV0I+zHQ?iNc}+JW(cN=tX8_Z+Av?3h$#_CX3GD=b+>
zHfZz4Ncn!`2sb#ea>RlUtCoQB?8x!>bY=88g#BLsx!?<q{1?CUMRSxzC}n=R8)-|i
zDj_`U+|uiSYMW-?Ap%Sj;FpDQnu*6_?_Cb`iicZ)?v-hgYZOigF6m-mRDWk<&@`L7
zSRr>a^8J9lVXVNWgO`naUf+ZGjhnIN?JRYANOdZGE9jeB-%Vkss=mwH!X|aE;}CW<
z5t76E##1`Z9z80aH)zQ`Vx&u=GTg(?#_x`{b03L;k)0@~Y*hPp(I>oeQIDU*edUIf
z+#hXOng-jKo>0WOM(>dAT#8Y<gqOzIp`oX7njI#{aR)x}sw2rX`&T_K0s}7lFE0>G
zw)dy)kFXZy&|o+dL2SvpemY3)f+rEs8O_8d=FQfNqIK=cBp^M5a<%H!M&gbO4)>!W
z-1{}H@!~;mJjkGxHNHD0IgwmJ6)4^ov7+4zW0vU#{-VC2UovTT?x+K-f1|>Q1ApZu
zMC}8{-u|AMCodqP{rFbkWf^Vt{I{_dsk2Z9+YwEpku}HlT+Dj!T<rB+dx*jX3+v&t
zhL4RXcr>W>ubMaF&x6V@FA~@x!J5b2m0)~TCPVfT;w>D-c$`lJmU3`bMBt=p2EptO
z5jYXDu1^HirCLEaogYMC3+=BK{jR}<2)w+$mq!HNC81*Qf==)Q7tNo&f{$4n>EI|a
zS0X^NN)MI74>}1<49E;ZI5ZfHNvI|Q^X(u^sNWC~*tv_BCiPO-PXr_{P%N#8ErM`2
z!1nM%5kC$h&|<hq1d>?cE$5Ez5rOZzm`(!o?{`|T=*&jwh66Ciq7{pCSgDt@HFYCT
zo&Bq;!>5-0o4g9q36@^KGXFW3@n9<TzmOoFz(~^0|8Y-%w`xd>GhkQKAMAd_E1>Q>
zb1c7;CW0T=AODnghwCius{^AU>()!Wt|{|)$0J2NCoTeQh=oVgDM`s2Te5HKBwbQK
zquKiStF#28gs-Nv8$3`IqxcKiLkPdfE{q1Pt)kxeF^%ITa4slSc3<V2)kk`T(Rr?3
zj+FERoW`i?@@>hpzF&39;2w$`JP`1aykBokNDG+7Gqp2a$XsFC2ut~VufQ%x+ufCt
zePkk;Q*u+gs`|}%$-oy@)P1+*iC;!C`;Sq8{PW2G-iu)>WGL_%`$P76G&hqw9F-be
zJ2<y3aGkO*Sm6Pa|C4N{U`ijGl$9dKtoq8t5tK)}Oo>u@kkNyf?gp>FniS`VK)K>F
zR_tH%N6OP<&m6~#a`fwIJJb&`M(m8MOz4+QTTESd6yBc+SnxU*6k^3C!dnoEYd<=N
z7sf^8dKTOnl%us(xr65R0#ZyPlGo;H$IFG%kqQ@;5%?Wx+31cM#IMsQ1M}-}ldlr`
z2mluPLWK=A9cE%;Z<6t{Y*kmQXx+(4LNHp(QtSOzUZtlsQ?&);?8TMxEUTkSFy=v+
z7{XI@uqCnUoo9-R`v;SxgaNxDD(0v@9mOW)SNW6MzH2Ehe>DKVtcoE_$XYK(x2fTL
zB^Dm6a(U0!3CZa=u;)-)TM*CMQ~rIfDn;X!)r-1{@B1$1`@s#IIPMk;)}*FG_IMr>
z)?Q_8yK{DEcv-R6Rp!Jmj~{BOxcW=CdpZBI{=mPi=iAhXAR_SCIe<VnDp7YGRZs6=
zS|Fm*^GGRZXQX1C2;6kEPAf!rK8P(+eprIPcO>N3-yk<4ujkz6#}vIXYofgQ%IrtG
zvx|`Qg0?eH0j+Ly+><F~dEvkKZH9kfiRT|!a+{{nj>|A3%g)e5D1kAcM?XN>lb^EU
z%e&j5)ep5>{AIM&%Cmx&{|Ygre(4?<!^W(QV)Q!+0!i)ZVwi4=6^NQqm338BfLJV3
zH*K<I?D;C%F`Xi@is1%MPmt*-OckxCdGj6|C9`<bb8?m)HEMY(wEMl|3^gVF@WNkU
zlE|-cc$>Ndaijp_z%oQU!WK1VHe~1bKCOIGd8smPJoV*KiEmi0LTxY^P4^d--J*Cj
zs5v7zgY9lR%r#pAUt=sZ;+dN=u<#?DA;IMd_N)K?sae(4fWI|mR@`F7k7fu#GrhV^
zT$&;0E55sjY&%;SKbAG?Ap)v`)iRuu9g5FieO|L?ASd)S(lqd_PGb^0A=lhTZaAKr
znwpwifiQJUd+{gk*`>&<4xb8~s-DJOCj$Q?-qNgqO5jcAP{SmnVU<gQKjM4$!X)O0
zq*B@!CTo3+-VnNPk=DJzNQK%tLN5`ZX%JcU)(lgz$~M!@Ut(VvY4K6jIlATJX*Bnd
zTLiVa8Xv7>`0Ml3|BHJe8$#mAah8rVB-(*YK4EtuEMH47MXcBKPF<4sES03Bq(Hig
z6<8pmUoL}z2-uKD5>8v58pDKBzJAeuU;04G$HU$AgE>i?R0-E5+8Kby>CPal;lIMf
zFW8vLU>Jz%4}5?f9&P)uGfe6^ZfihUxXw<%b<eZQ`i6!T7hRmTI#q4F8OehO2Y_h_
zqWzY>vuSM3Qf^I#$d!|er8Pk$k<CoHSi9eE-B)99G1LEL7XQTprO|ghQpNL?VB|=?
zjd-0ZZ$+T%C8SU|XKu8ya35^6N|UAR4MFCpe#JQ?10b;A#fLV^`>AZ<kB6lg)g|VK
zdwY*3G?avqlJhCI151J@*3FXs&hJvkgc1Sk%@=B7ScA?e`RP-gYWZI0^;|2^XfdBR
zR%bsYu@`q&Y%89H4QO;uWz24lXU{CQO|y=9PP8AY4%IBYaokfjEzrAndvJ>TnFq(<
z8ia9%Wu)|15%2yw+Vq?1xJx81<T{3gPX4IRDMaAfP$g}I@2#!X``Z-m)Vj-$pUHk>
zZ~6#l@S}i<;7s#>tXiD<uGaOzJTq)VxVow?Swl%9&RkbalVdH^K-1DJ1pia<kjxRp
zKg|&^>UT)`r+IUm=93Xj1>=`A>kCDzCZ@BrdC#%$ReCrIL|V|^zH#ScDeDy(KXhb#
ze7sp_oBj*%DT3$<drE&)nSQywp>*<ty2|RR;##e+A694B<F1QDrt7dH^%aYh|DN7&
z9B81$i+}e6{eVdh*2&s{f}DmEeN}RCxl<m}@m}I<rg3Y!Hg6YG@-W*o(3a_-l%NH~
zOcX29{>)Nrud8Zr?|Q37#&c=;p^`7fMSl0`Sf8_X{4%=#)bszczyR;fNEIrSKA-0M
zPIM2MUyUGMDJ@y*ycgt^*r#WN_19@x6T#hYx0E1{sKpi;eqAt)|A!{rhyNFiO*Wo%
zmgA31v?SxR2h-UNUNWfpX|4jkg6dwNCMd^hBO`&<lfN@v@FEE}Nz`CTV>SEI_ZGpb
zphpkF=aVl;bZ8%@>~;ee-i7~1B@X_(4c{@kdkkthl-~@MKl-xfwyq|Wg7z{J)#Rcp
zl~y4|7oU8$&_ppr`B7mDgR?&_$N1P-=Bn?7Ik}6`7OGL4?Aqp!TTmR)!knCXS#-Hy
zryu8WbOkSk%gXP{Ci&qhy5(hOu$}I9E_QpV=O@&LpExm5VyK&BG^?-j3&*}<c=DnD
za{mFkPPqkK25-^l@q8zB!qWrdwONEjiO=~Mqsto<-mo0Up>G%!sni$Nc0X>S+~GvH
zkT`zT4-|_G<KfpqEy`Uqf-r3~8jOL%ErzY_K3E`IJtVV5Iq$0tpL|jK)@|K_|44=i
zbe(G;xq>g_Ngnqhys_x09XTB%$8gdv3C@7O1PVjE-x|^Nl4AZMpdoWJb&gW{2QV@E
zb}x_0>)9K_lt{1YTeC+p+k^lQ@Bs<nL$i-$Hvjk#HQG0e)?Al;5%bcplQ8=Vj+JY~
zlz{Cr4h0MwaVmjCAPjN+NCB~}Nd(Aa4$?jLhNb3{2;}`)8mZ6q@h>;PibMc6sePRA
z9KIkza%WzVK87Fk`kf~XlJr@ewqyUX2FW18r5sW;Ba_0)q4daU&suGB7K3<?G~4*X
zmiFMY57hw$4F-_TLSaCfgiZG`7d*CtRyxYYHebr%;jG=@yEE~3-&|QSB{|1UA2Fs8
zpZ$71X-g`t*-n67$uDG-f&KAUv0^AkA`qk^?Rz?5XV9dZ>TAhW=cli3#;KN-+@Oo#
zS;<upJxLZ#whU#Q!Vv)io7yKeW*nsHIp_r51+AMV`eTL5E_nNvDR<9MdE&Byxb5R>
zv*bpv<GQu@&sX<fIp!EexJ}dA#W`5;V;T0WU#eg4cM-VLk@LlXDjATuMN|KT2t1l3
z0^jb=5&;ruV%ZLIu$67;@`ErC))WN_SJ4)ZSa%aMr@d1#v>1M(=-zszB*Gayo~W1{
zc;b<o$y2Q-7>D5o0aCmvkObo9xCqSR(Ul*bF$VFs;+sD$b0`jtKl%EAvNgi4V_zaZ
zjM|fxY_D-oZ~B-@61)W>0^He0LZk<q1p4`uqed6uk$!@NZz=t4F=ueXO`6auHOk-)
zB&XZbj^*j~p$fD{RpXg$&9l)OvyW)`F1B5=T&kh3#`bp-0iH$_>xhaj_@&(Xr5106
z0^dBNb~|Oby_xp;evY&O=O1zJc^<u@9(W&r1!Vmvv1`C3P9*2|kGU}EOgCdwC;?d6
zZkuT(G5+XTlw*E98Oig2>8(d$tdTePaCQ4fAAE<&G%)9Hx$=L<-T!&beIa$uxk3M@
zjh8O(Aecu9=W#iBJzta*<q{m}q4YGSzbeI&^}-`=f6h6q5m#fFnz*MT(zBp_-AXdN
zRMh;cEn?w1Pljl%EX%juc38S__|m=Cg`C=&=s1g&is;Rr!kEk!Os{urOe04%Or-ql
zk+CyEckEC-l-Ko^A;-tW%IEtPU9L~iT64F%HO~fq8{$gLw|b9MUC27$N~v4@CK_Pd
zDM7G-Z09%9ne&7tZ`9A~#+X<PXXLFYq~yGZR6ILr`Q|#m$QMy>CP=N}tGCvCw&8c;
zU?m`fBJk`Kbl>9ButN-PKk0b1K;VD~?2%Sz`~#c;5qQ`aPXyi<kA9cmu3U>H0z%E0
z0z!M5FcHwULcFv6bL5$VIP@=EMUcMgm_hFu)1FbPAF%_ceZOUa7;`dq9I7jHcJQIw
zKNWuBM+fWEGhpchpV1N4WLB~R{g0;8q$PD7S2=_5Pd__cXNl5-G$oyztFKPqk!<8I
zQKw|-i4#cSKby#=eEVZm!CQzAi5O7}!7id9tp{q1wJ$N1A@W%tdgtn{>-!wp7Ot;7
zo}EkT<^Brw^t!P}-KbHKfBCG<@~x&m5*a<8(RvKmA-Q~wHu^6cI4crCXJ9II#qn(l
zx$Mz;B63km4yF}V;<dN+gg0ipj+b6BY6*-kL8EtKLXm}qB`{SiZ%9aPWa-U)q}9qb
z^-eMa$-)Yr{T^SjZwf`O(cXg$oS?-V09Jn_-2R~%BRw-BtXqrQ-DE3l`>ydYT06Mb
zk9R=k>mB2gYa5C@k9HB+JJ3|E(#ie}(%4;P+a>~QT|_{D<dG0M^v@81Wg>7Jk?D3X
z>U_T{TK|~ZU!F$C#P)0?5ipTag`0?z&v@URx%Z6JbU#Vf?b+Jw$Au+?NQJ0C9y_}o
z7@Ktn#IZAQ(nXfs__EU7#Axt>rrs+jW^E9o)2TaL7KkdZ3o(mUbmrSzSU5x*p&YRY
zq?2{rQ>KPT<X6k9&G;NX;OenTttgroQgt5r%lk^7*!AfiPJQbTid~Ggx{?dIar9N`
za}mq+<ID$e_GQ15xImODq$wr3c0n>7nK3?BS(7L)!x4ubFh(;!F?{f;Ia-mG?3<Ba
zKm3GT|0#P5+OlsHb|!7Hz1~9A)#*NO&UYqSS0_T!_8vH%K1We`&m0u$$AVYI+C=U!
zgj$^ao?VA!2x<$wRr~lU=45draM7^uo5svj4UN;+-{BwNEYZG(MBoahKU{9z)L_t1
zBYj|G;j&GnW~-CBKqOP3h2~5nauBr{*}&!5kG40lu&eMpk-kCO<C7C*<6bWQ&M2Uo
z`m!;PhBjpaKLsn<GM@~N0aMzE+`?tFr6@OiWP0e3YT8r4DHwH9G%i*9;r4Bomab~e
z9Cs6NYd;MRZXVx;)OcHQ__{&?X?2sgOX2yOt%pdXA{LkC4qrXbMbeYCL(t+z^>FZf
zQ(I(E5?)+xD1Aii1!LUUa+s`6X;7@caMy!ck7pV#Mh(oz2m;8n6%`WYCp2a^ifNxQ
zE_zsBoWQS}XZqNh`3CLUqt^vb1cRTyor%0nGvWs6_Y*kIbeoiFXY%B4@#7xCTcH_>
z+dr6@*uA4V5-gi`S%^a$Wt(>fnNOtB05hYGm`ZeBNQ1a7gsUMm-o(tvtjpTR+AZC@
zH)qI5f;E3N;KbR4vjHbr$;Ow6z$$`Nt4V4~-7z{LESm@vHInM$&zW1Hbel|_^}kfv
zC<oq4mkscUp6>8h0ipEqH|w-#eI0nT^Ep@#Kb-;)Hp6l~dQiQpfz51Y(@B1f70ufY
zU49fz`T_5U<~ic;)Yc?tm~}VVVA$3RhkPD4K!QX>-?(2A#O6uiPgWjgsJF8`{VIB)
z{=pC&!<w|@Db;%V@oKA5!hWk1qwdk9vW&6L;<008p6YWVuv=?rkvR(Da^D|VPtFV6
z>3CH2ikWJz>d1fpD#1HI84)^tIve}-xd;2cV;<Bbt^o%3Y$F1`^e4XX=jExD57=RF
zR?R7NCT90q?Hp`Vz1%I*w7XW*%jYdic&xSIHp3$?pZh*;_zKN~j9&lNwm7EaLim=K
z<K<MNew;6LX|SR}GY!TY=X+icirU?dzZ8-?POm>z5ig{`H@3VaZLwm?K~oR2Lr+J7
z>0w~3tUlH-u&_D5^s2bhTLq_5rB04+g`qBqbfDK)ct)Rlf&{nM2vg;Q2)QrD<7`q{
z7rU<z&gS9t?e`U*1?eyr1m>SPbITl_unNK_>Br96RHYcq>vQX9e=Kgu_*#}k&_jIP
zv@h?(`}7c2hf@kkb{pRo5vbYSv5ABcfg=HU*Jqe05h%9NCISOSt4k>d6MI^}=odNQ
zjymhVS2E)8zf{Q|2~N3uL;$+1|Bu5JkUZgQ#+H^;F-c(vN08`iA^^9@D)7{UnWEN}
zhIhGp6>sDVt!hU~-fF25aOYX%t3q5nPVU%z2|;=FhvLnoiGa(|;l4T7#>A1*ltO~X
zdC||pudXz+^+dE@E5D@ku?}jb!<Ki;Z-jn8GZE3uEl!8awh$&XPT)nRG4G$imuyEH
zF{+I%^^xYt+2dzKVES6px?$t?ID9k9f*D0uOMnri@ck|>x2%W&7&_zm1t)Bk4!eVh
zIf6T55O*<z&~(s1)<9NWI_Am=X0tDkVoS6Sl6O~4z_w+5FcK}@5JbH-<dy)ZW*hha
z@-*4b^;-bcN3nRkPmG1nWc%4s>~`h4bM~tDW>fvc*49JE5d=)QIKlB#me-<t@WN%#
zBbVUE=kC8HvjAb!8Mp|8H8C&ilxLu)S1ou-FdLcf4pC6!;w0TM%tUu_E2RgSkQeV5
zxq(7Zmrw)YOkx$N9(uHN+i9J__%;z}3i7c@j_Cu1G_Yg&N?kFoEr%Yh$C@pdh`{N>
zycn~F>Js*fKB||ABrQGnaLKH%<z~m%69+$n$xHnB2w(kFhGCacx~rAu+}i27+h|A0
zc>&Xiq0-?Egxk75o#o4yYO~v#!kmaf{c<F{|Nc+mg^_DSVDY2hLx1pP0w+n|_N9TE
zUN&Nm*DLn~8pIulfWqVuvJeS9L6;2@PI+YhX0_^J9kc^V?I*efDh*1kpUI$|Hpj&n
zl#R%p_R)-{Il4arK_;|Ln0Y;-mY`7jLjU97=Ge^W8Vzy{8zU{f%j#RyPw>v;BL!9X
zzlVhTQR6MJkVtn1TKk5xnEhzCV<Ghn?Xtu}AKls$Q$hi{p4Y!Hy?jGYO-7^G>&>u+
zLV?(j4j?8{SPF5=mmV6*RbydlH$Vgs0aHk}%BWnxMIcp|ZH1a_4l<bWq>Kop7m(25
zl-jr*$(w@^4q3_4NJv`vPb95UGGvdu5#=Mw^FF@#t||Z!%4@GsvEKQwRHnFR<z2Xq
zo#h6`5$6}p!2^#$n^XdR<-_q7`q-A;LN(4+ZZ-VJSOos`5S-sa4IZ|q$hGUE0gYXr
zt8#XX%+{>SK8VD*xaQ%UQ5NwZ><1;%>I?(C-oFYmpubdndD%i56w@G%cNi)%MTr-C
zy0#?NW|<rAvD+2REtyQocC-_)tA~bkCTyD>{h{75FUoa0W19JBj>MmtwCK<tmGUhx
ze_pibE0_*HQtP}`JSd)3Jea9#d-h6fM>v~-CMo+C@P^bU%w#2@E_}D&nvY7_T8eq|
z)g?hz++3#^R(3G&fk!qr9lTK^_asrG+%R)2#JDaJW7w^@1{aDEDwlv;gI_+pk<rG!
zpME|~HcVc*>#?8rjwl0KZ24-WTmQhT&MKQ&+^AWp*^T;<wYoBVD9LP#Sh&BrGJ)?u
zJL<>eDN4#b;{pY{>8i>Yqt(n_U5TxV^|KaiqI=dFr{{ndAI-%w1blLKdU57<Al*y=
z*yl|5q^}m@Myv<9Y=_9%%7DpJSGjI*#9i|z6WH?Ha99vU31Kr#7p7DYN0V0j4*MCJ
zx@=TIhaW?b3l`>tV>R@;VHFXG@q$l!m;PbE*IR}DG~9m$*1v1GkNl5ldzsL~gBPpm
zKc<_KAOb@Hg!(-_v6~4Is_$>Q4!t8e%${e~NKh<Lv`Xl3W+wtGSK(02ncJs1p<^|g
z_=8p=01F2fyTu%AhY$|f3e?ZgzJZRVfbj>{NoQg(*+*wcZbyb9d^6Uh>~`50U+tex
zv-~`LfpnTc`Z8b*e`retw&xj+x4<niYkD98EQ^E~nmEHkR<eZSpXNyaYw-RTB5(>^
zEC)Z>BK;@`Me3OvvL_Tn1k^}?HqDQ}Kxq3;LYQvQ(HuO;{`(asb-IgZqbZHPxt+QQ
z)=B*DCD)SkkFue_d`_&kHIi@F*p=&V{m@NMziXYU_5)I#)rE)xJ7h2^M7%fZ8SDzc
z;#RnBFm_zor8}T0Mr5FM_$E$$kUH?XKpFz_6iq~|;hw>9Eh!pnPtrL}52u&hrc0vU
zCRv;wQL%fLYJ<In@PRe$QovtlFPD?y9ab`Bsz~I<2^9sHAZD<&hNWoSQ9R37bq-0(
zvT;irp(7lA_|@0Rd&~A9@?%J8YX_Yt;Ko{Z$IK>EXn2R!5vpS%VnkkG2JftE+^9GH
zI10jUN*y6?GGve(_Lu_&h9Kyk_%irVm)DmCs_fSV+Ij*y)wpIn0N+VeIu!Chb>ZEA
zR4{S$-)Gylek6!RZZmND^loDqSNUsXP*E6T!uoo*c!_m@f9|!9^!m@J0B>`)t*P>(
z8`C%&R7RNJ1stLof^>`yIJ<=+<-x7!W~-zd@_P!$$&2bNeidu(eV!uKq3KNJ8Pb`q
zjKNPa58y95jo$5&n9o`1qSNZ<=Qr-Ta6rDr5%BEd1&SXcA&sMlkp4#c2CxZkx}BZC
zID(hW$7$XjTI0SyXf<@hsJ=8R_R%A0*&t8+*nZoQ8TH&#mz|TFyM)(d+j+lE{Lwy8
zNhdz87IDN`XG#R}sg8X?{}s&spRk`Ptsq)#Mj!c@6R(FgZLv6A(V8<i^1VA_Dzaxm
z2TXZ$befa?G9rSx8D?-~BG?n~4d*vvygjBp|6UZ#*yqsx)gE4EX?aYtrZBAYhy350
z2kYr{3A0cf%bjE6y-(7g?g(gI{E*4ebia!Aj>UvcN<u*Rh4&YK+Gu~w48uPR!f!PF
zM4&_ypGwn$IAV9pI2Md<2t%Yb{(08?GBTil&lyK3;d14yW_;CQS5aMc#t)v0BOU8N
z_Hk|=r(776TLP;h`N$CJ??^TROcrIXG|}d#hP?r%-PM%}R?1DyG0C7;jrfEN&f)zU
zCHTX{^;rauH3dG~@5c<fSQsab)(=$!;f8ZzN|^Rhf4neyn!edGXE_NgpIkM<zx_4o
zq;2OhpO9wwrS77wOAlt{5AFO#6omIR=dXY`JuOx{pl8$+NF;RS6#Y;gf1W45)==Df
zxt3n7wr~Cke)hc;uLP#M*9I8kH)iHWzo)BU<Ts-n*RxBow+d15PR0?-cdv7QVX)cG
zN@m&tMzhe_swC*L2M$vAZWQ~<&NK}^s!rT3RJn>yj@+lZIY6=wcc;(Y{%BsDc<xG<
zB6&j^iMh9qIS}-K?#X&q5`nw4hlfi4vEKf?{{CB^FBBUM6)RG3_FF<P;%STZAe|OU
z*do|ab&aXf#J6*H^x%^KUQBnE6p-^Nr0-y$#65UZzV4XFdV|y2n0{(fHoZPr&l}p(
zNM?dp#`xw2gL-Q&C$3n)y_B~W)5m9g9yU0b-MV}$qg7}5pxbb8p4ZRLL$$#mxLL8+
zXZXqJ>xWZ~mnyS;WAxvJi!JLj{=xE#{_2kGko_c6V*kuwIV?Jl^Aq#}QIO*ZC4E9%
zC-la*KRd28#6PwLWGCompRX8bij1DxJxD{ZD|h%Y;JVJdsI$iIxAF0OoBs~E9k8`z
z{L#X^ZmU(C>5Jv5;7Kd(6PG0xqMN?aEElqEA=cuHa4}{WNEp1b&cI{VQyg~P%Q0{D
zlFNJOlM=103*W`<vwI*mH)rM(LKWM64gRq*F76+avYdSXaR3hb_l!t{IK-U@SY3*M
z@1AW$WUrFq09FKT^U>qT-_>(NOeT5wh5xJdgqGvPiz3(dDgMBg2Hu9z4KH2Ao)_tb
z-!;*mMPVBBuFwmF@5s8tuZN(PN#1_sZcixwhdUEX0DR`GENI`Mvk&_$E5xrK?X8K|
zcdmuUE?aXL4Yvv2Lz(p8rH7A?U|+%hYOF(bE@-jMC_l-&y-)kuljA71I^W7=t5qQz
zTBA?<zNWuAtN1C3MXT&vS&trH%f-Y!m-j9oF>Lp(ULY%gQwlppb?o#U-gk2Jt&jaw
zZ;4xo8m#Di+J`4OC+VNJRA+_Qz52uXH28xCl{c9ssLm4s_azDvq4DQAGTJx&XHffP
zoEiSe6GQZak+o>*d@C%i7N+ocBO`ao#I1p=W~-dm$cg%cyWVqEwLL+vt>VJMVrI2(
zMh71~PdV(!G=gEiXatXEExI!de`yx<!oyD%lb&GrSgzS+?@Y|Pm@6#atZl2}N>$jo
zbDE-O7AemAtG&e0%yPo4>}D@Zq;u{_2)9Alxl~5JhoVxt#T;w|rMF5tm3vQE&m6!Y
zjgVj%O-zS0&J_~ht_F%|F_f{t>E_5Md-8}rrQ+M!D16K-Su^p02hUwn_LY8%p8cjI
z?=AmSmA7g7;J@fMsG?sFXiI0KaqThInehJ1?vC$&m52X#zFvJIUhnZlTqk*EO?CLd
znX0sk_YX>0#^`hFM5UtBd0yAIrTAqgT8Bo@+^`VtbcuGcxMh`Q`MF5C?beS8xXz*<
zjc6PCe$KTsmAjLChpf9o@B8hsB7RD^bC!lBDW>D88||z&``~mgxDkoXb~u-RC20c#
zdxhS2L=p4k#PP6j3W~(KUc6=YdSQ{kI)A_$5sVF289vuV5XwF%o#rzL<unUY=M>mG
z9PP@W%_Nz}vDlb4Fq2b~Z*&+T=&`=OwEvGQYuBtY_M&V&kM=^gV8Z!E`nmN9bJ*lW
zgOtmR?7S6m&on8bbL&ZD_)CTW0-s&YlWwZ@Zr|LweObM21$~$e#h9UyccXdxV4U8y
zl!knS*LuwoVn4+oXZNP~tkB!V+$gZSbLq-jgJ!Y+g{B<d3yA#Ung6gLesv}M)!@f*
z;JGn1HuzZSdlKm^?!Kc3TmK6t=RZQKKf(8puqvWI2APN2k0Q<BG~;#Ze6RO7_|0#K
zPit<q)e<S_s|8;UYnW)o<VwccirYA51+o45-@I|$vx}kb&g3w+z;-?mE&<g@-Jfr3
zxR+$&SI$$?<IY>(+tVrY5@n#$X}}+6TuR!Q!M%6wn`E8pObqjl3X}5a7%wz!{!Y*+
zpWzT!Gs<5~N9eKJ@zU$_*wB_Z8EvrfXdfv*Lkx-A*QqWYhB;>95GK)v3AaLxtxO$1
z0vCvY^n;@kO^&mWJJ?P1tpZFu3KZIKZmHsA{<Q74lMl;ZeKzL!Y{u;WnP+Z><;4}g
zPIVZ>@krM}A>(x2_1QulN18T|L=SJjv^?XbGqQ<^B(^@;w3&_+n!X_&5@&?Da#UL{
zrrv1V3vaUO<brpkou9Zl3<G7wFa_q`9GsXkV#daDzM^J+|A8@|L-}=d8QC81KOp}g
zz6s9Ja6SWMJI&A_iE|pf+JMzaQp?Gef4!-+F#L{XLcfk&Q2h-DTlZT92DY76Mb3Vn
zeYGJnQrPAfpX{e;+6X>X6mxv@|DLKi(R_LN*1B1ph1fU25$kWvSa{j_+r9LD4;XZo
z_sOfNtC!Gr{WA$Y`L~|pW-S9%t8*mET#b|#LwEG)##7hrmnF{iPr1G@qnzeZ<JDx*
zl9v|!(ElZ;AdoHn!RLXwHiJm(g}xs`cft{T-g5JTbCH|jHSHsL@*%DDr?EbT8Pg@{
z4=bQ`Q{&4PdD%<eei1pQcI?HXt<GE>)HI5SL>Mm?f*OhS$%kFT_J>MHl&czqUp@JB
zOZ`dSzFqmVtEzf-Km5KaZHDJ4!JGR}PC1m!1$sTt9qE?c=<UK&S+~An7iJTA%w}nQ
z54A9usLEb%u{op|qta?=Mc43l165E_H?Ugin3T1au8f5S5IAf-_(X<hf_WRo?PEWr
zcri@g03O9TG}lRB{1#`aNK_vqK?&)bX#vQs^OTAkK0l7|*LdWVH+bMSbp{7UBN%b%
zgPj>%?ib`8j?Sg3s!g6kniMZ8g1tiW4@U<%0Do4pNSAe}<+_OFxiw6$Yh$=>UfePC
z-rS^8a@WyK9lu_fv@dLWsfJHFm(Z6%<(8U^c%mX~!}?9p4W`W4nst!8gRsAU<92uz
z#f!UlAsI>~866wmLm%$tVQ$2QU-;BZGV}kX<$m!7uYkaWKYM-+&woXdUo-;zkDwtU
zgm8J;Bq^vtei_cp|E&ysrnzwWsqIBOWMOZ8;gD?3k2vSDOF+S!uU{gS0>Ywm8)guF
z)5z2&KADjQ0aF~JMU7jZOV5!ZxO+lf?%}o$mqlGo<e)|zsMeR&`J_u=4N#5L1KdC~
z1YqS59^nQL^GH0@2>W1+i!mBPtd+HJIyy?pO&3ch3q%4<w+h_Ps))b6K;F=dEt@)5
zfvNB93@QtCpVXt%5W75Q6leA&YioTgfJMM2GJGp$d{KK;LXHS<Iiw3#M(7RUb<tu`
zcO7L!46X7`N;p2$60n}_wM84h@8r%AZqWZW0|2}iD7H)6!2m%7uQgI<j#XII`HXR@
z__jo&HOQa#*x0FzSMx|)s4nr`=hDxN`JAFJ<B>H|Ot90qKx{MN+$xm$Qz2f&6%C~&
zCCEtUnOuKbnXYBWht_>#u6FMY?DL1q$UIetO&<EH<tqlf)E6k`5Hz03vu7d+VmK=_
zxIIc}uyu8&S7>X1qtGdFZVGBo&6en#^fJsUEFk|N=1srOzB4ag7Gu*E;o#%UF|IoI
z{7FBDZrD0TtGO%aTSiAcc`_;dnofe5OKTCi<4e=j8Zmk2q6a$L)I<W<#QO{t%U5@A
z8;4o>(+I@*q#QnLa5+u{&m~O4$9mDz$6E5I{)?W<x!c-5G#GL&p6j^0I912c(a0>n
zEYDe|ib0RrpLu?^$3uESj<!FM;ko?s6fIUSDbh{M*W$<8b2FWgfH*g6{#6Tlme8le
zSmxGy-iYe`(p9It!iX=c-&;Fb?${D`k*hg}W~&qYCzOYIHMUMmcKcpi4sWM?vgXUC
zp=VCxEU-`~HM<`!;RZdC`$n}J8D(;n<x3}-CC``fJ>J?;L&peDnA}RIVFL*A4sYC>
zfMP#iC(?6ce@oYpFdK+zY+XfgFYWuO99<uGsN9!uwNeB>lUG#duVGX&T+iz~=D$&z
zq=9{ZR})PuWSdUwCbdu#T@d}EezjjN>K+=>;UG7l#+!%<d9m*AZ!ei#Tc^(P_=WzD
zdt)j4U@AN_h9=-)<nkWKp+SRw-{Yorim`I3th@ZIX28mforb4(U3q$(zJb_r{`0uy
zec5j@9CtfLQV_Z+;Uf~Zqx+B~cs)p36RjU%p^OzznMAzE8)|_iLY-WwUh2Rj1A8()
z6cPIOJg}h1$9kIRsfLmp^w?ZGY0~DHjB7L0;O>SxgGb2B7c0{DUcH}X+o?7eRn#$i
zZCB(}YQGh~=~3C;*}q_5ZzbwFoI1Zuxz15EVv67J@(6A?iI>DQaBE?P-|990z@9Ya
zZ-Bb=dcRf}+4~;qh&FoVW^g;xk33z?qO3Mpti*j$ezR0b(fjGZ2{Q(ri>e~FpVpi>
zSReewz7DoOEc}(;f7(F6{~3q+d&COyl?a5_u=V?^DLd30;F8eQyBRFYBhhlb47YU0
z3&kahHu>0K42g_`HSO_n*VBg&A^e|g2QIfD1P;ld>C;>=>s&i9s~ag{N{t0qXg4$N
z-gtWaLxAIXQ<Rs@UPRb~<g~PB8%(!OMI9Xd0DLyrI&I#y&h*;c88kJksvnlOb1J9J
zU^FE~rc)86&5W-!ziFI{7BF+!;yTo*1MT@?efJ~lIK4hXaSan4F>E`pA|_0QKHs3_
zvFVn+NcqaBV(l3R{qW{L;&JyF;E0{P!JqG$3~69%_@9^t(vnUf0{>dmQmacVzZiyp
zc;<ii_*mmYAGO^}H<K@;?gfst+SS8FW8#KijKlUw0NYY-+amLpz>#$8ZSD|1R`xCS
zQk(NBPrcp-gg5(AOd=)Kob$WzZN2O51bOTlza}O)$@RU-kBV`5+St#Ze7mL0(ubj_
zHYLR)NR*1Tt0R5Rr!UrFlICpLA@3aNpUJ3-jTF_n%=B>d^cRhFGSkBc_`l#~p~Ar}
zv)f8{3KUQ$9XSfkVAHyV>sCuxqaXC<ckgGuUeWXrvCWz1Yk*X3(%`__nCMu(q&%TD
z*npPbQT2+^?qviAmiM@^8O)}O7SUGY8HwP`zM2^B&e@GPvPghKkMsOkY_a-$Z$6%O
zI#9b`QDT@db*pD9TE=ImNV7HL6ig3mvWxe@#v-TtAzXg(+)TW0ofK^dJIRa4h7jqQ
zeI7YMJs**~BN-V<?-OJ2{TV&D8kx3kS-)X}fxJApQ9b0~3>cMJXYjFo!SfQT0|BS?
zors7^#LNzDTyA%n;`vs-8N(-4Rg259WUI)?lnMGAtS=)J|AB6HlW)$K%EtiF%*(j0
z2Z0PBC?ALxn3Y7*2a?x=TKQPMtAHLokuy?Bo`1Ne-DS0+z2Xp14>OdxUyWIL%`f?Q
zDAmK%Pbm#&Z}ddmzQ%phSV!d}_D)p#p|`C&dtzdR;dc|{jnA#jK{pIevC{jK-CKuV
z)ls@T`u&n`zo0<Et7Kjm4JrxNkO$c$O5@L2NB{i%cj?DpSZk)g;2-}H5FtB3`CvzW
zPE8e;k10hFD&Ki}bU;3vuB6O;GclS-uIsc?>2W@zevYx{e5Uvn@hd!|h|v~!8Ki{>
zaNq{f{L^^JA-UQ#-*uR>JmJj;*=)gzkWXnkBerw;HKkf{>Sw{8RQw5<48<^cgr}>y
zxel0Kk2PZ)5@&2K?oioX9V(`Dv1!pkWS^32d6mRO_;a0}iplk?*tQs2cL%XJc<<~Z
z2aL%j)}>64Q>+z{mwH3j--QTlwIvZm56S<*0$TngsSkg9|4CJovNrxRMg!>smo)w-
zRn7a$fc%Hk_&<an8F@a;*vJKG&AtVB7x?<x<W8s#@3xq>GK%-Du>z{vi1_lxFh4n5
zXMR6QDp2`TS*U)J7vf9R6(t$d7_{?VK{jn*0>u421iAFoO8R?F_K3diu38+HArjAz
z(tTcor7v`R5E~5}2Na2bM?St_K86>j<s7X~+F8cX5-cm?xIL|P!vDg~hURur8H@&}
zOG<*&PHI;Z4bb!6_zr7T%M!bJGh>ZfkvrxI#DNsZ&bPAiK&KWMt!?afY*XI(aFrF*
zluR+0a?r=0cB=eb5^7pGv7{j)!L4#XJv~Xa4-_r*S}+-67Otd$Z15N%0+S|(W3y$T
zacGm+qd44LFWT22;Ej{yqbHA^t5Z_l8H4E3kaiepa2h(})FsU0bJu(Ey*^`<bkB3}
zVk_YCf^o^`u`_ThOQ-2ly+fX<h?$HNIB$79ZjG|i@o$}{s6iO<$2D*1lXF-h0=e_G
zswY~caRx7&cCsl362s&E!!rMOEB)Wwzgp~kWWPG~;eR(h<M$8_A_@<uiOfEJ=j7bL
zW?#W>A&}IX$T*F3kk)##v37TBIn&1`G-SJQGH3g~ONG()5M-PU171HpGf9iq?9heN
zFR77LVxf_4X<Xd35v#q|OizffNlFSuPsvNa>f7q#>7;F<O<cbM(#Esnd<s2j)k)xN
z@KLh>7twC=zNRBj<>dUtajuqS<^3T2bIct4SM-HBRUO717U1Kq1g0Y>m4gN7dz~7A
zV@OWop^t<~`$h`(?(x|dJ({x2#(ZbOl5)wKIeUGeQFyC&IGWZ=1hLWitwM1ti6E+U
zO%(y#>_m?zq8H|h0ryP$ig5f4h!bw7i#P$Z6>(<7dIT+gpU^BWFVmhHk+>z7kmWAS
z(#7K#Y}GL{hOXrm-h0uANotu6^OL|*H+rE#jGm&M>&`hVDUVF_hPbm|H8&_~Gw^KG
zUJq8uXC>Qr|J~ca)J(XDYoF;4Z{WarV66;FNt@s02R0?M5|QJMCLR0wTP8+;g+N%G
z;klTvr+h}Mahqt;4l=HZi65Q~yKC9Cf>t8VsvSDfXx3+7rYn5bM`EPRZ!uExd~s@f
z<KZjl==a@i&{{;k@hX^Fqd2cI)y@|UV(q6*cqGA7vwI|hY`XY`KY)z4-S_hU77{SU
zXS9UhB<q|tvx&xQgM#o<KA)spZ@fIqtDDZs&{6GD^}%vkr(8c+Db><er%-Dkq@BN~
z$bFL=H_@Jw8-wE2k&x9J7*iL>Ro&RyZ{8~iL`fFHg;UWC>?@b@j*1&!(;RP`gIiTe
z`#EE%TO~8QD<NJrRjx0#b+>sk?6<+;q#Wj*BOfT~pKx(bwin=9A(oDm&lkTkI-_!)
zLtj&Is(O$1J%R}yXX)v)zs&)A<<9r8?N8kB;hbjeBon!QYRC%YE|=Xaybm=b?XsS!
zg@R!E&LLBDb#;#X8rKeo*9Oa01}?T-d2{fLL8$%+wNWueULAsJ43`+_ddU4m@(#JW
zk~fc={W^gacDWemfYNd^;_k0aeuI!?kWy{k+URUl70bjrzbwl*vs_|#_{sdT_L&fk
zwflSWqbZNvFq^5u;USXPaecgnZ^rc`(Y}?@7)cjlHRp1NDtJgw&FYHRtxvsJ%}%?_
zCJ#?|>srZ=3N>S!mHhM>4@Bm>DE>%IuAtih;C*Tq&xYa!H*<;FvEPKo_403R=s%Yq
zeIM?#kgonBO;HIKbTqyyB_%jlA>X%s%3bKgH+W5O?DocV>W|_Vx-Zn(c7!AUoq7GA
z+W!{}>)-!>6ygQ!UOw&;I<XeTz`FAAsFiSj7-oRL2}+ekT^a9AOu31CL;Xw*{|s(Z
zjMVHNz~?p`B2wky;|zNqqwW#)KKC)!pGHSV<EMLsmRVB^k~|V`Qvke3O@ZjznEXrz
z9h?<rzXg6;MG;B!Qn!n;w9D1SMK0HCL(+PFK1D;Iw1QfjV30Sv8REx?%Rud6Dx>9g
zhjuy>MIt4QHQ&&NF_paMDF8H8)iq~8A$$xt%lCSjp8Wb;jxV?6TW@^&sxo45ZTy-3
zIVz9)WY~;AuysBue+1DvrHE5^wa&Aj5@MIO>+vk@KmCbO{2KalFupSRi%!W_HDAIz
z_~S{_PADd0rIBMd)#n(}=`C(TE%(KQ+d=sPZ1En)tJ1oLM;jAw-&$Tl6fI>kB*SPh
zaI~BsctGte-V7_4TkC`JRhZQq80emyF`G<=9lhC)yIJ%(i0o;m>;qTm&60vgpJ_m-
zyT4odZ%KR4p3<}wyix>V;{OOMsx>_Q5C5HH>mUAa+4_s_llBu=2At&!wO&?~1kFG5
zY`U6)qq(Xd*2sxi2bbeDRddp72BunVL&=RN_|Y_ralMiL4r6fp{xhA<S$1%SvsGJj
zSizRU@AtNI-rC48f{{nrQ}whn{lP-z!Hex@DoC4Z9fqUDjEpSbRYY2{9(#|*ruXV3
zsL=T-#Vx=mU&0TB^WG`T=s5qc?!Gguscl<3h=2luROwhKDk3P-yGRoS1nELVItURE
z2@r@VMd?LQP^9+~I?@s#RFRG#p@f9q6O<4jWZ&$)&pzMYdhR*<+<VS<pL_pgt<04*
z^CWYwG3I#3c;AT{?b4ggKgDsl()$x2-g5t)E4lCWeq&CMJf2X4We}1-5W9IgDjj->
zI@Xo2MMuFyRD;FaRk(&W)?e^7oSq5PwM!iE-%{hs?AZWn7z<i@F1#v<r4O^Xp12g^
zBLL{&qANYWqUrVi#ar!MUbW>5&i@ZZ`@Mhud*?s;?*BU4_LpoXtbYroinL*QJJ<aB
zC0Tq0^$pHOZVf|q2J%fc0X_8IhJUbnv+>g7tn;TEWlTWPGj89s5UNnuW^HUo*9#v#
zDC1hgc=%)H6DtiOS=-LbGY-R3t9_h#j4yH{t;{<XFEZ&S%2BGjJ1Jlz69kBn;97ov
z1qb0dOb?ZI4={UX?&J27ul893G{xl_{#HkValwg-_jzn#9YHD2<4!=*4$iLn=jG4B
z*a8vl{`ryX;t+Ko&bP&`lca-Q2@zT|llVJHEE8C20BdOY37`d&->?7bn}9=TuF5a3
z0Laq8CFu8VFZ@S9{D+k9_bK3?-ydOok}=n83~#O(aZlYOI6M<t-Mn6~pJ=Rc(SWCi
zo3VuIDBm;Y>lc-C{OWHmJgHY^zhkNk6kWR|R4zAf#*5v0A;5}Lt{Q-$a~Nqc0$xkY
zG>`x+3{`XhtWUg7baxT&K*n>KpOgBeyAlP7+!2kyyE;~XHoN}JM+E-K1ho?&@mBls
z)fKOcpK}UZcM>au(ttfu>o8U@8mYqhLZ$|iysmvbtv~3g_dQ$U3T~@s%7+zUjwXpM
z0_lVL1!VE-=t(GNjr=vDkRlxEv6oggu;s8GnA|Kx?nKbt<D|e1@q`1&7fL~Q(HNbe
zXrEE_X+)A{4{70XdXM?*=vLU1B<?<_R6@j_*09^+rV~-NZ4Af_KjSLMCT-|2T*7Dh
zB|C78Wk+s*2Ua#0{J_M#Q-+ahSu6zy47moRA_h&ikp;qgDys!gi+c0DOK9KL-E=FN
zbCJt`y%3N5s&BOPuI{zj_HyIp;OI}v4N-UHZ3V-WPwm0C3g!6dE!78&#!Eay|3U2W
zpT;SF1$q8jGxy&~bp5IULX{KR%l1@ZydBL#fp0~1i0w0%h-QA6QV<fA9O-TwNSj+K
zTqWGm+|eE>q?7q{PXsl8xb@1j+-W1+aP>|-T?A>47>i?|q!OR0NN8fKa9}W^b(}}(
zNjWlCldaFGuw<J%H{rNYltLfNON~uo8OctrK|0D1(KTse(^-i&ZfMRgE|!5)X1Ccr
zop*}^K?}#`?ptb_Q0+<``7lePyf#L6bI+G>7blQ8D?tm6xX>i0bSB@;a$_tvZ-udS
zp{g>O>zw(*MGKABFVU_E92SOx$?&Xy!=prfjC<{l{N1C8G4ts+6ASzN>N>0+6Mb8G
zl1SQY3S8LT1cM~)MYrcton6e=eU=D53Xi~MmL<+eKShD?qqzRJvyWB-o*Ty+164^!
zMlb&a!0<KVHgj~Fh8C~D^>xg6#}jv?w`Or-jEUFGo343^ZSX9n1hiot!a<mm2ajRO
z_^oz|@xE4mihL2yIpaCW=$gBbglKq?k>FM`?o%p@Y`8sBSA46MknCAqsmKQ?y6U!|
z8fNK3VHvZy3JQ>Xnfg@D&nR<_8D8NiSi3?;Oy_eVi7h`Gwe7jM13yM7Ovfz^*j-^;
zil$fPL%@i3NqAtp0T<-aE3jB=1LOX~OPc+M29kT-h{u)8DaOiDW08##C|L|N467rp
zr&VKytZerPITo-1eM`M&vxSSH$nq_j5Q77DYy48ggUP(k{inXd+`+7x&bfxm$T+{g
zAZTc7FK=Ip@!`~h7;?i>%zmrMW<zCZ=7FbI+xdmh!0He?mv2q)H>_O56RjK4c~=r4
zo_IbNX;?=Z3#Xc7{K|5f)Z`*$u|Ay#48q!`^0OHclL}l>r7H{^$#06?S!_1MF5fH-
zv?zp#;+RU$q|WJv3X5)a%E$y|bAZ~?nVO_#dmP?s)q-R|%|d1N5P#YSIs%_J^{(|V
z9V8cOsrF|VQR*+<w3h;z>E9b9svkI9tb}&Y#FF&)VPdX+sv%G(=3~fwtKK<hD=P$w
z;V?@pL3!G<`n}uCH6R1g8(AS<8=g1$C5w@Y-4#*5UAhCfzxX;PtI>-j^l0=mkCo5Y
zovR&KhZjYwpQkq#mX)Yhv}h>4_6gVV;1$Z!^$(1K-?Iz;)cL=(800U6<G<inUzsBj
z>`D}_ZcA!-3`Fy6j!d3hmX=R!|A(}8!yDY)CSQddtUp~Z^U?vN0>15wZ7XUJ*XG{1
zlP`c>nO~ZsD7IQ!*|)8h(@e(9)7omK5T@tu4jwztMP8lQym`(`_UMc+n@#@QzzewC
z;k(P^^I&iN@vSCfP###6%<3+_<C41RrnkJ1@OdaV$mWzdUoi9a=oqH+)ZhGnah(ts
zj2?&=a+Wwbn@;7bA~8y`$|FwdjM_JnKfmc80<IJveFi?hC>^I1oPF=~-XmJEWi&q#
z`?b}y_Ui=E$_y`y;(nEv$j7Q=&fU8=avh*Dk->cBhqdEgBtZk7XiSuBcvV|Dp^Sc^
zWGtpGcf>p@b?1O<o?d6F9(g}Lesp}K`ac^mDkd*a8y38H_zQ>~21t8)D%J}mD){Xq
zliz;ouKP#kC7V;Tl)B*K5*>mp;IvNL5tGM6=7+FP^Fc~xAMs*@h-4OMZ;u3t#+5=M
zS(fwO^(v->j_ICIBHK`qy$lgo*IBrc%`4ud=gh}!ktwH2+z80|igcl#x+fQNzSYrD
z4!~3|0{15_lsV6E!*N?LMH{0ft{r4_o}BAXV3owM$;<JL#}ex-@m-y1R}V}-pR31(
zBOAGTKR^?9jm~Aw5zmQo^LN4AiN4JAY#nV)^)7AtzT`EtCwo&!<&rhyHvJI4qT623
z$)O?Of}I9dtGSTmIjV|b$Jt915}j_Q%5U8=Scn`DuYVyw=i%=^H{2+Ak{H?k{`8PI
zT<{Cna!n&)XRcN}da&yV?om$*7Z=Aef?A0mgpr`Kyq?^KvKl~FGGmxO#%6HA?th$T
z^%JFz*iNSAjc&^|V-Y?+{Q)IY{eHZLF!;?WwsjV7mLC@RPTa-DtjY9XGVo@q)?4>%
z7t>US%JqohYtd?3)l`-<-<X~y!^hI3j}>&;Lz-6QWiNOjk3`X*FBUrA@*NZZ&t?R^
z|J=X2#_xB;)sksr?cthUt@1JPv(2>+j>p)uo;eqfv^Py+r`kT;UC+cd5P$+7%bVYV
z@$0#Et`#kWt1HXT=xXf^@0P?1O)V_}#=^o6WWi4qfgG@y9k&4<8zNHQT7a-*$f7lO
zTQg3)TK7oSS!%J0i&rfwbx)fOvm4S4q%)DJT4xb#C?XPw*UFzeVSIB0tv4bx`U$u!
zQ+f4T=24yCoL603eu9^;2fuS{rhqvVa;feuWio;yvFZJXhl`R}{Zp};deorjO!kvw
z$3?k{Of8ebGVJvf-z-VKlhbB2+~*Tut|;(1*Z0v(tfnWP!#s%&@fF5FgyB6vRIUoD
z;ngSjN$Rk*TUZ921)V2dT)jG*BpN=m>ORqUwvX!^hMe;2bVB2;Gt5H+U7#q--KZUD
z$H8w{cX7!MZ$2ky+t@9cx%f%$vRjUJd}pKT{od}{32ZfPTUB6itnDA~?$sOZ=3egY
z?OTsQ-op!ST=B`dx8{2x3vAieFPqZs*z^I_w1Ik*a_+;534|9HiA~PW)GSb+JBVgQ
z8;h)D)YO^8DNR*zGmD;!k4f5KCD9RmM1<S|g6rM|<{9O$%HAJ`nNM0-T68#gG(M=)
zj1ElFqVc-FHs@-W9SPGvc_;EtU30PjX5%^7sj$|ORy~I6Ie8003sB!1CAEQ@kdIJD
zigR00;4CgcPu&BDw|loN`0(z2b<hID89p|iJndMi)qrNX2Bjf#sH?b`jhZ!dxNXPH
z%m)K4yW?Le?lDSg*MN7C1EDLk6mh<_)cMMiQs_A@0ACkx#RF9Zyj&GkWT%}=ze1<~
zUCKB40)x7$+K440%+v~A!-iw&#imf-j&0>Uu3<U5m@73VYSe>N9lF6Qln~Qx9h+dP
z{Yhq~g!0_n|3`?;KXd)xfbRTbq928N@?Bja1oJ~^_wDySg32VN96TVs^j?}6o=?0f
zw|-wEpm;j1mJlD^zwXZ#^L%t%0&jrLO%Jh6)ETyq-f=WqABKN%&+ccetb6!`e+>zd
z!iUeMhg{Ab6&}mQ`@Srb|ByO7V-?;*=IR7ckI=${a_@P_l$58~OMoH*&&@Y@`#o$R
zD4Smrp?~LT@oqgAhb(D2DA)~vhgZOYn*!$?^wRm=Kn_l#eFTOx3s0F9ZPdTAD)5=A
zRA44lm{(5{tJ*4bMZ!!TjFz@eJ#jj89#9PPQ$6&jWRt^k1}zMdX45%7<|7Ekv4^Pz
z)({FgJF0Ykh{S*!ztg~Wq*AZsK_@%om9H0iCHK8=kQ5<J_=aNA_Uq<dwC41aWm|)G
z1&@0PRk>qVV$y{jd?$7l24V$Io*fS2x#WNJU`t7_c5E<niM7o7^|Y~2z_=aGsbz@B
z)<WR1(_+Qmun_LD{-hw7(E_8viME1Xu!3P4O~j|-;|9Y=U$b<$i8Zk(MRDDLJAG2?
z3c3;xbS)bNIDXi!dD%Wr`0{4EC}`Chje4Cu;V1CkzQ8b}5R?CDHK30Gbd>1xAoV=D
zq8F?q4~VebM=m}8*-`xS2+7|=Pky`huS8M)6kqC}n6y-Mc)syC@K#ryOkL#c09{5i
zo@TkV>%zluvZYRd=OA<^7$^YlG7CFYIx+jD>CkIWuR4FzvR$7scIyzm&3ppe*e5nC
zf$Z(G`K+IbwYC^Lma^^82!FevIUwpRD#uUjwkT%OK8F`HY*{&lH>i8uw9$xC$*<$G
z#c@Glb3SDA-F0*l!K!x<UyL}4BF<r@5p9t!Jp0I-k4H&&AQK{#NI~q@zSCkCR-P7)
z86hcVEaP%`Jkn?yMAi9RKafKGV6DvI2DshUp2IxIp|6=^^Ju=@wD2di)Ddlm{-JMu
z0W3+60CFPm+T4p=uAZ$JjzXm7;y&W7ie-}JhnKylBFA!eg7mWXIFFRk0@yzYyt>d$
zH<nzXXr!)1!*2Ks#U$(R<bVG=*Zyqo_|H&R{=jZpAlYI{#X4qqN0n4dEkrV%{CI_u
zH#<1*Oz1Jr3f?oMyQ3LoI7vE(KNabt8VNjd?eOvlCU>kcYkB;Mf)E=#JC_U76n7ii
zbcxGPrsV34GqGpbHz5l?(g>DS#1S|@$(<O8*<y5tbKyP0F)wPCrI{Jq2xGgPy9~-r
zUQL&Wn~Wx__0elwl8PVp3aEE3{<^>y$AGAbz}fr_2HX}B$S{E!hFE8;qEYcBeP^v}
zp4NgTa*xJ2ik~sQviC;!nE}U+L98_NrNS-FKHOJivcTEo>uo;p(e&Hqqs*_u^sKVF
zCY|>O=~8|))Ex(B^$G@X_YF#{x?G^5FO-bNPj;M1mRnTV*?X)@pXp=T&1m8!-?9SK
zA3Vb7_&jFkCYd~PyC#7^uzz-}(|QV{wks{(gr91!Y;PTEad417$`Yr(dBSm3`|YbT
zGq(s@1L>DoWe+qmReYuj?8#rmxlaMDUzV0j?uounqR>8~?V&Gh<g19#hfu6l-qq>V
zhYDBduCxa=Q-qS{mL?LBgNMV-x(5~3larfM-?ksJ!YsU3Xts0-dQb!?r3#mQGnrrI
zpvxaOQQ%hQVO}Sq=q1Gy2v}^YYK=F*k6=p>tXt|TEV!I#+Gy@py~rB50NA4sX96Cd
zi=#vpoj(CMW{7X^R1lBPGGwII)*e}9yY5#}Zm85L6gcKEOFcQ51j}j?!=y>H&Ipv_
z#-1OeC^5c#ppK@O7uprtHt=#=uDn&_0OnFxow*HK_>7RPRB@91^xi4vU8(CM*R_Ex
z8(n^d*jIrU9-Y6p!Mzq?+5YYNZzk{GdHh$J)8Fk7QfAjdVe2V@FbC<3YcrmejpskP
zFD0W4ANa)6>oEwu&^Rw|O55Onp1P$3;fM%L4N5C9*QCuB?rf-C^Mzh63=qjZi|Scy
zQ<?h8`}CY=(ve$Co}*(+Au6;GDV$?k*$T=gdE5;f!s43fdWsD=HPc?uU8uAxAruo)
z(rWVN>G78N@$w-x0V&+ptSr(XbUz2R6tXUQ0I6MZPPDNe@Dt)J@MQq=VCE%}jp>(_
zi{qH&IM%IW_cn}vsg=rL{yBH@9U<#)uEyIxKkjvZFl;Sr80}E|j^nXyT+4VLTqpn*
zEL`I*W?tt`HMuwfD&rJYN3VD6A4q_LlD^SJUbEI{=b<6Q=dF4>kIUXPQY^T#*bZ&V
z`6fLF=Q&KJ07biWCgYi}J1lE`n-?q2G{%)LKS6XK+o5M}_9M;+wmtX>5MXH(HHZ7C
z%DT5co`}dSW!&Np3+jww&mdn0h2zG^u49{E&<liXnrQnt^vk|Ywn$|D2Cc>Y;LWmo
zvN-L93!2~hy-gviMa2A028j?A8kf3M;j|g1Vyv>wsTePVJM_BG`n5GCCrbl6PJ~7o
z=6FtO6+HzUfvFFdW);ng0M-E=rWQ$<@)7gY3PRG@aB`u)hP}Q+Ogq+g$641nV>im=
z?M~w7_YSyTjA)n&m${Oqk#yhTK%Y;biooK+fN&aqYJM(XdNn(rnw{~!w?0>gzqFfv
z&fcQ8VJ7{f`KQ^r>~{<w{xd7v|LSM|4J+c`2xq_Zb-zhpNKP>^Da7`FAMpL0iQ7BX
z24z4v!vJ5<U}&3w46b|F;j@qR^=XMFRbNj}o>kuwG}nBxxDjBI$BT+3YEF7AnvPK3
z(iPprLx%|Sq5cU_Uj*2fiK5?w8jnC-r&Vt^P;>}F23m)utG(p!V4BrDP_jPcFCHH{
zpS&}Z$Bo=QEDkzLk!wW6j&!_dssOUOB>TtJsql>oNk(=Xfh3@Ww#~^#Zj@Bi?BfjP
za@=_s<<Tftbram@C-G8x?x#RAy{W3@(C=CR`&T3O1`g;56z)PMx)njZwJJ!e&;GuF
zqLKY;kKf<_|Eou09X7B2;4Vpu2rVOS8?X~qjd+9`@9}zKCW%dbIi83EYl6>OEb`rW
zW>+S8Oo9hN5}7%%_f&QJ(klN?fX%BEmF<18CDnC7R2MlMN=retr3#?ii?QD~JXMLx
zf_sx&4@;oSzBmw>O(C43(e3<01NHL9|EcaS5p&DPpH)vNF%*F(NOxurZ@4_WdS8Ga
z5$Ttp8LLQ<{zQAoI^{on(9Cg70@V?3MP`H2Zc|ZGu1*vx{z4xhkY5wBbol&$LTwz}
znnjaDe3;4ihQ4nYq#Jr=)(`!nS`&N7L_R?&eE;=!!5>-xf!zOd9d@L%OQR_qsJiD8
zO83bZa6ws@7Hh(Le*QZ=>p#SxpZo;a@a;rP`RaQ{G1V*=X9kU3qL9SeK(jO2hYY`t
z?k@`L|0m{#7%}~qxzQv<76Y+^RSj8YRtQ0cxDC=h^o)iR0!nC{vwi#I%2zuO$MT%|
zVTlgi&$ENFb~F0bL{O~K{|1Bb+lmOX_x-e`Y*m3qwI;nHQmmQmq28|pd<6Ofu{4E(
zWC;&Ysz*arScsm!;M9;Ap5cvai}f*5tbGLYWN{3G%JignbeBNQ$`7D8)o+y3zYz#g
zhvT<mA%eI$wZwK68MxI6MSqPC*&nxc#UAPmpa1+``+*vLk**TpwlUFdZU8`1!=YRS
znJy16>5a_GXbFYsnbsx%sDd{Zbml|jShf`|Q1ji7z#&k{o8!p+^tJ(hw=SnJh&1-a
zrjNzVDs6Yh+SeyFJx|*l(NwCax`F}B1Z}7v$gj%SND$NT-dNR8dn?58wYpS$(^Esw
z2>}xu;u9W@!SMBX(ool;1Y@_Fs3S$AbN~EW_aC(*=wmE6g<QBWcUcWG_(qd>E44zW
zo)%rzT|ZCVMUub=V6-~|rlD=AC-zspmLm`-(_PLgBu{oSy#ea%jd-qNK(~cHv-GJD
zsIy?mdpW!YSPIFrh4N8zG#}b1J00Qu&{MKD)p=Mf_R@1ikMHF3MlR`XMnO(8H)Zdo
zcwz-+;5f1CX1e*fl9X+px#o!}gG+UTyseima%|g2l}|Nm`IY&$z%SzJqOVbC!)qAy
z^B^b(@$4$MbT4~!@{+Eem|58RbnKqPBHTpp<h}CKGS1XHA`z~91<F@d2P}R5z>eZD
zC2Y=g4hKwjv8X?c7K-t|c)CY)nZ~lnGIS#^#m>7B*nOs^I(i{#a<jxaHzl^5k}Sf*
z7)g(a@_21y6~^HjHa}l)?qo%)HHKo%hM23z!73Asx{r!m#aSLHES6XW4Wj9`l$=!r
zEy1BeW6?;9(31v)Ck=O!gD*!tm|ozYvtWq9!mWrPd^#p}Ll_@>8z1v_j$eQQ$W*$4
z2>S-!Dor<TOuf6k*!&@sKPOrS&mN%O)ju8TCme&wL_<Vy=oW^nX4t^+$YI@Y2=R_7
z5yhpnvMDB0!Sl~V?5dXli5-BT>LDEkqd(SQn3Q>BR@EkNVrn(%t13Dl6<swD`0QA)
zdWz8C9@?J^AzRnvfG(BzQJtfCL#FH*7rC6yM?OK^HGv*eFOYA(()@99)jx0}JdKZT
zF+rlXmHuOa|C?T_)&Xf~3tEuOO1d$g-bT^*Ab&2n1QPP>arb;!)y3xSTblqLy3Arg
zgK@euHQtiW;ZqEXP-S@zk;IRa#4msz2l{he5+9z<!hOAmN;0)5i9ZF2I%;)UuV-0X
zdJ?SKX~Rs6#l}9}r$g3RMVqxgn$j(`>fpW9a7*uDT$8LutnmD+`xlV>Iic66_AN9|
z&r_;*zf)=`v+8um#$&{-o0v|6P6j3e%OkUH2)dvk2uYZtubtcyJ5|?hyaC3vYA(<K
zBR*CL^2MGwi&DP6-GhCiW6M|;v(3K!oEFvU>%k|<f5I3i*hN`rp;>q8l@w>EK^w1E
z*!1In{=K51ymopXBV9aGTD*Xs<Bxj;WzQn6?ikRJyvy;Mb+hMY4|oH;&56Dx!W~mQ
z^L689-cvn_(!Ek&)Zs39XOx<vGd9tv=G`{9EDqSg{34yY1kSBf=c~e4fp~2h!8HNA
z8K&o)VdYk)>MfKg!IJ)F&uL*xO)$IKi0Rc2rtA0E?QiE)ztM^RhB>1uDVlS8+Coqo
zEGTHab3OPABfSs%Z#Vw_NOi2P&UB{YJjgKn+CqfzgVm>3R@LT5U;U^}s1Aq{P^V<4
zLd8qx+<RCr?9fw@DY*MZak678Vw{isc%P_&6gW~(EX8YM#|f|~$ep~~nX7|VF6fEW
zot@I=qH9#RT3z`X(Axx|kmqWGzC*w_se&7BbJG$na30*oLc1?n4Dngo{pxLtZ;^+V
zs#PlzbsG}4Ij|J5ZpnPxX!RaBJ~N=~h9=(8;K;<9sG7BaXK`OX(Cu|9ZOviIi4qsM
z7<zr_<V=ZIdg6(*FS>i>xiXjsKZ;?g@$uRbB<d1ZATAbM3VM9o+c42$_>3cct>j)w
zt^1`)u4FZt?wtDWPEFfoib9`?3cL{4XolxZ$6zRGIt@=l&(uV$+kCNVxE4?Q>eJAt
z^$dp)XZCsbbE$xX%?Q%BVW{&~jCdAB_q0DfgPC3f>pLh^6(&|upv#O>3jJV_>KJXI
zf80R4jxnuw7#?|;qohx%8Q4afDBO;z<~13y-IhKEA!?^RDqf`N-WySjql_HgAiLRO
z{ss+R3X6^o;wRlxIaNJ`!TE;f2c89&B`+Q5Z|bBdtz@OxpKLE`x8)J(UTv8*Tb6iM
z3E|C19AD>bR)7{+0#*u6vpm;ir{d9Iw0|>nH(iZ}fn7)MERAR(HFx3PMv{-B8td*m
z<pqpzo{vkuL2qD0D`oWO71acPBdq?7dHGi^%AdUkOQ|uKs8oz)Bff1<m1tLG2L~~q
z(y!wQ4NDVAXJ^$ZTp%ShCr)c8tyTJ{guzp?o~ro34^veHpl#Ba0cN75)z<NQ#M^1#
zruA;7810L`dGKIM;a1{ay~_O*AyZ{d*Lx7ovD`uN$6ysvV%ZD<<tG}bDLfMWiTq&B
zKE659>Y+^$Cr#^?&4wlbg2BKhUQ>#ZLN}{<i{-aGV!~;2k6XPw>(K0e_raoGT*YPU
z#-m@JAJlxM3kjkElc5ptWAW86mSr*v5n3<>w7T3_v{LsZzK}5<nUAzRz?QyCZrqE1
z#EH<O2nMCUCV1oS;8=)$ghREJQi|@KJGX4>5^t>Hz+}28=yhZ5^cV7_uhzTmuy9M+
z4rZn<OsRiBWPo$o28??0s@Ps~<?)lO%X2R0-b=-Vm}L&B30?rN5%h4N7!X9q9)Gzx
z+a*Xc-sy97v&!PJ_06(VTqz-Ox~J`)nu~?lJ^QHB7pR=;GPuHH{XX&Gry&aszGFa|
ah|WxPIe-)OJ9@;Q+yCj$zy#2rCjJAW7VZfE

literal 0
HcmV?d00001

diff --git a/docs/img/domino/lift-r2.jpg b/docs/img/domino/lift-r2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..de7813af24dd470a063ffe9a513b65fb1e877686
GIT binary patch
literal 31237
zcmdRV2UwHKw)RJpE*+#s1q7s7XwqT>L`0-0NL3Ljk=_CXLQy(IKtYOt(nLz6BOPf<
z5$V!N=slr?03rFa_dREyv$y{Do_n8jpXdK4nPl?h`)0m1v(}n-l_C8g%>b-7HMKPX
z3JL&tOnw0*6cDWewSNKty1Kww008I!8VYuRirk_gzW@q8fado$0NkYD|D$a{A^x{(
zC;=eM9-#i)HKye6-wL_s-|qg~H&rIZKYGlh{QK3Eftgf)Z&S>WuLel#fYKd1XAft0
zJ7<@Z7iG=?N>{aYsekL8Tz+qh{@!FA6pS840HxH+f%LD}@K*y!O#mAm#SCR26~$?Q
zl8u6jje^t)fXGilL-Bk0{WkIk1tk?V%`sX!dIm=F1r@9SB?T1~B{dZd&2Pg{_>=z!
zsM%=PPo2MVj6?q+?P*ue3qf&j>4dMAG;<mBVMH!Iato$s;O05O%O`q9OkCouoV<df
z(k10<*EKY?ZfM^$G`f4w*yO&c^<$eSws!Uo?jD|A-cTRkkY~?B!@?sX;}c%KN=$m4
zoboO!J0~~qeg22ivhs?`s_IWQpIchn+B?2{?d%^I92y?^F*=4q&(6&+EG{jtV7In+
zc5!?7{e$1=BG2c4kVXFd56b>R7aN%_N@{8<YTDoEqM-Etjc_(<np5YGv0u@redx+@
z`a%#L=he8kCC&807Y#66kKFngxJBepqS)U^`<=3Xjj-VV5oLc6_7A$ofolNe?}dtz
zl8T0kii+kK4Y?enJN8?lW1#!JF#N4B{a%j$R#^ThByuMd<UXjWscFe?Rz`Y8*8g@P
zO_JGCoHPb7QBjbYiHZ#X14o2+k>bGr!<CBk{r`h!?EBZB@%?<<sfdLas5{(hc!8Ao
zd<Xazb1exdHh?dk2~;Hkm+`eT2u?dbtq20x)g7L@>W#2=mQpa5J;cx33EZUiiQvBe
z!1D6@^KX8fOoy8vy$#^D6S`P~p~2;;ru006NTbb6CMQ~K`GY&1oK8icLeA0)S?9OG
z_%W*%BXZ8cjT8_2cKK7O2Lnz`m{~~I^|GeZ4i@ieoV=>|cBANpzobyh%wm`m!Tr$4
zk_12?%L-|y&p#iLr_oYWxNxK0=zhtW&#po<u#(RY?$EC6h1Ffj!WUdG!Ggjz68Cfm
z-)XNe=+%n2F?Y872uT@5#(}LtfSo!go@eF=qBZELGV1Bi_gOoB+o-l7RB_8Fj>(w)
z%ZunV#~34PDgonk!sR|y4J<9L(TcJcz3zPjGbqFkS&dQA9uzG(yx!#zp4Y8q5c5sv
z`rR-4+Rbtld+S_Vm04)L$Oaj_4;n#<m;IbF9kg2@ec^^&jJ5fv(s+p{(R8!U3j786
zdms0!x`g~H%_8@!n$nf(@WwI)Z}A~|*xY0+7fi*+kEXb)sIoTKLDnucJ8|rKI&|nI
z;m0UXSNpd?iUV(rfXmRzj@ERcqYR=D!DgUA5+AzQ8X!3&r|rq=X>U4=b<Q)Cu2a|_
zrR=EXe|T3!hW}*B!8QsR<*A1^ZxoWNhnQ3=jM?HICo1gmFj-ed6g{~h$FO!a{{bTx
zRb1~);P7}Pp%Amz8spstDjR75F>9z-q2ZkPVG>|yrdTzS+t1WDZhpJ1*i(e%LTgD-
z`N>Bv0J?+#o+Sa1L;(zVoffldHGHP3788&lI2qDbtW#+IL)(>u`^x(+uJSdb=O-`3
zXG~wpWErlJ+zRQ?#WJF7o8h4~l?XJP3yLtuzmh>%7g(Oj7Ftk}Id$K}q{CCnD{-rF
zf}PvI=h0l+hz6=2GK*|cGiyB<2_>kY@v0!6Fr<@^EYC_}O5yhvuhkl!N;{-XuK4V!
z7Qv8@oD>Hfvj`C07}Xwk{1f`H#oETzlP2YcaJ+f1+xTgVk^)e2?8WHg_~QOXexeBU
z!gV|&rVH)3*J@#HHB_fpY^P|9nQ~xuxAEHJptT7e*6nMaE7`x!kQc4UwOiGm5uL$C
z5S(2u2QlM=1=V{_#%hG>QLP><Pq-%@T4qSORKlx|Y54+&c00)Q2S@o0$0qO<Fdo|Q
zy=nProNq7AEaPMS%&Jq;l<}yg{`dCM#9oIjaA+e}z!@m6tIgD_tS~nlyUZi&^a8J9
zj@gzvJaLML?`7$A60q@}dDBXL;thS{OxoD#li@2*rSFN-vI|-`<9o3_Z?`uug9<Nn
z37l8$|7aA={m2)%bf&@Qd!s{dkh(Bdw=rZVgPm6vX$vv4x9ne0k&Jq#WcQ#B`n<f9
z$t^-yvBLLBQT8-62{peLD&wTlkd?n|V5So97OzKtbx~mE{JK>Fh-Vi66@?s~ZUR$#
zf&jrYi!Eat2#1u<R-<1Xp^n8t(LdU#i`;s}yUszg7{6&}vJ8L(h~FEJ69pj`mjcd2
znQyOKvws!M2-}6M+T3yyTj#DUZM`AzLLhG9t_j$_F@y)iu-+gqQ!5g{TKC$l#Z&av
zgtJIt+g*dncO3Dptmo2j#b8_+_FDmpC4|GySKu|)IC?(h>Oim?8jas~JgNVcE;XX#
zCN3j1owh%K2d0AA3-VYmF}3q@HKU{(voKzGn&!Xom0(9%6-bnY(xR<`SXwh^S2R~!
zI?FDa>oQfg(KNt%u-y@-*PFi?8K*<7ak+SDbdTR{%)+bU?y8io*K5p^&*xwA6laM#
zI>|hY<o$N(Xq<0*`p2UijVO&Tg%ix)rc!iN^M_{Jfl#5{pjl~SA|C?%Bx<IS=4=Kf
z0X&2rly|fXWZximXUKH6)l{~F8xl510NvLfz;6!$w?jT}WcK`8r-rUAx~fuZ(r@2W
zobcYVnfmqxzWeJ%bCKs`Z!etKH1pl-06h;7z(|L+7HPGfQw;O8l3nw@<5Yd9pSmGM
z&uaT>JoCCiE3Ob6b70)&2yanT05Paud$*Lt;(WpUjcLz@j-)K*C3d@E4&p$EI0<;t
z0K$Nu?PhSoxbUJ_5bAdDL0Tj8u4KN9oP9Bh-HM%$G2&$POAW?zPwRZx@@YZRfdBdo
zJQmDM6kb+tC_*vcG%I=QeCSy=X5Q@Yr0@MI{+nI@%z3J_xvEu|kT#z@f`V)_zrW6{
zeoy+{Np;6Z3o0ttd{FfcL!#9$QP%8wc2Nmw2NYkMkd%0yLg;oY38-vrm1Tzr2{|*~
z*G5fVka#MRLchWGYP?fX+EL_V;WJqC-5<}}b$x~ENdON5_qpGL=AldwCZOw36Xq6E
zu)<i$)qC|(IQx}3?o1Xpe8`~&OcejHhy;ATgkFDSvU`eU%#u+@vS2ZoX|^Y0txtQd
zgE*0DJXKz2h#f#7LI~h2eBQlbrZMMm@SG6toL<15t=-dWTpx6VG4SWQFcN^VTV{iK
z9A)yf2i&Rk>pC4U(5$TWWALQo#EFKzRrBe!uUOg^62Jg|TnpmEA7Fkg;k*^sALZzL
zl`fm8t_Yo^?VfMv;_}7Dic_sj)4-nMAy}5{L!Q(04kW-K<l2-hI-WPPrrd7CPilDJ
zv>w0Xoa9LnkK&6hpmGw>Jk1SF)x#vdDVP{rF)h0=s`cJ}WXx@{l(}1azBX9o^=z7^
z9%c3B==f)K@j9$*bO0E?^4-oiFP7MSvj1iA)16hhQ70L?nQvX&>gSHRj?2$wl%K`#
zY2lMGtK|7-_f)%RiQx<o4^{)~4mhUw-ih7t^sG#@7fGBkFZ59vK7Sngl=b{&&`ta*
z?hXDtX3?q|(-`Twh`3X#k+|+yxPcTdjUld1b~$s#jpz3sdvYb#*WE%kx<Qekh>=Ok
zhA403NL3zSq#sB*I?2;JU8X1#xEI$Y>~aOA3-d->aG*1q>-A=h5~mnRz_YUJ_`p5W
zG`>=+`(_=FO2_sM+WZ9v`@YV9&T_FEYLV@#lJ6t-Sh0F+hJZ`FgX`{MAHhpI>>orp
zvs!HHEeM!3JA<aY69U6&RlPD<3Iafc2Y7F+idMrgfwwScj?b*lJL0sqvsz*Jg)UW&
zvpO6U-vqS;c8XP+NkI8tGguIxGX7ae085{Ar*WGmRma}l+0pl*h*J5d6p`S$;^Wt4
z<{pf6jv<1G!e233vl;C%4&C-b68Psix^jHCnCL%#tgJYHO`tH3`O-6vQ|8PafPi6x
zR0d09({vzY1}Q$F7!Fgac@Vg8g}r^@A+NwX+s5QLZ3=J@)&{PSZPue%H68<nSENSy
zc;r(Xm*u|klh4VD2M#p`@07Fdu55`5uGV6aZM|XBEU@zsGzs8bWgA~k3F#2b(nPw7
z4#r2nyTW-YJNbtjxBL^~Oyu#Sw?vVy_gbEZ>V)%^VJ6s-D91Cw4~)C3`k%^BL_)gi
zKebaSCV`Bdac+?AI=z{C6;wxC#%NZP_r;3#n9MPU;d}A7+=OtFc6S-QCahV?L9jct
zBhxGmGEgdXEW)n{FKcvY#P_t<8JeIWUDQ_C7t<qvF&A-bZE~*|x_!>qSQM2pW{9GV
zhUwEjgPs2{;b822QLWCHb$Kr)uS?3Ids%QV0cU(t;@hY$Oa)u4>k5M$8kj>FUE*Lo
zIfd)Ayq`>L*aJ1MJai16Y-+Q^gIeL->c{c?Gl(d|b_^789qa!sIja}7RDNh8T{Ke{
z_%gozMqmc(Qwl=Ii*q6<XeP^Giuk)ghJT@gN&UaV-Q7dQVr2s|o6^_?4UU?SfwT0P
z%*v9p&d=kCgK$<NFW$Igb9R&2q%p#yKv3_C`iYvPuZK_R!AT2nNbY)V$(!KXM*MZK
z1N=ZD-~?GU-CHC9Nt_fro?P1q!f8+i_=vNS367F<CjniNU=-_7J_%UL!CR96db13o
z`5oH6LE~>=OnAT`3D_|poF@xPQ7;LQA?x&K*Km0x;9U|5K@fF<@4H?GY3W-jk;gc7
zXh;J3bTA!6_9`%;I*Z80GL}J%v<d}dUVS0~^KF2?pazT#^XS6X^+<q*Yzqi~bAkkH
zp%K4))C(s4c2g3^?lv2!A&xb8{5-PuFS-g;(E$fdGwvMmw-7-UeUv|-X8OvNsi2JW
zTe8uJ^GPaO(<&untw^QzbCPf!y#34@p1WA}s=Q^pq=xg*xuG|2CM@Li_q9J+BpXrB
zG5~5@kS;Ka<uCK2|I3jPq0=-Cf-uooG)ovxk<YRIOLG<EuqzeX?jHWII^_mRsP8Po
zVVS2%h*M9nsx_yiBRsw1P`<dXV_~G5ecw;<>EX(8uf?f41d){l{5E&Sf6N^S6Gr^&
zT2PI{^ihtC?J3NQp)ef>S&#9Z^SW$LF0c5;e7PGo$O`GHV>whMoWgs4=qkorU3>Vx
zi_wffYP_pR^X_vZ!CrK&?1Hej#wk5l^J&5<67XMh#Vdm0QA7j3!+`(<Q>Iz=efQ<i
zS_PYEThI7dQ^{@7jJ4tUoXBHwZeUNoMOCcj_AFv_ab~X-!HVF9wzQzP@1BeDPEkos
zbylY6ml&qEzW?3!*-Pcv&!3R^YvirTy@Mcy{DUp6e!&$*-iLAo_IiwZ9A;y&#ZFF|
z@=5goIuZ>PEr0d8#7->oddkH_uxE#W2Rij#KsO1XBB<m-4%DMi&nhdhcR!4-M9Ro8
zT73T0k+NXWMzl`8Shr^#bl>V1v->l@T^4#<K#<3P!&SL>T8iAYf)86hHZ#|l-d1fj
zSs#>wU!1QJr(%B76uqcAB0mEU1Tzo?Ehik*dHVBoqTWXz-%Hx73EB#8);N5;?RZuB
z!;GJ?#4i}>SJH4B{9_a67K@e5!njFZ^Q!vZfvYW}ssF5i!C{i4U2Hy`7QH-qT#IXG
zc!&yM*zfo|1N}qzWPcKS;6Tygz&O~9*lk5w1T$B4cSe~c-8D;q(q_0g-dm2nwff|-
zLYIVUGWC`ERmGpAS?uQych6(Sn+s;6dc(W&g3!!Yzj`chJd3wkf3aaSTKk|`?dsxG
zHT-x$&-Af63H19qNRFNKnKGwum|h=0Z<zF$UfqM(6?+4A_;P&Fqn{N@_ODi?2(1^U
zejFRnm<dg7RRBF5RF%CrVc*aDn9=!Sal1=oGo#}SH+Aff0wG0=VpzcGB?G*38@;RO
zv%<nIM{if_GsbZdMUNx(H>jBQkf(nh;g>o_hM<0wL}VaHRSZd~GQXR&$ECEgk6T<m
zD9&<c>`;ewi?($K@s_$RSJW<)z$6Y&5@<0OO+oa_zQxIc(ZMEf5v`^pqb(t|XE=PV
zAEsyK(>6G4{xqGx<o{XO(Sn%~L`7-_D>P5Dy1K5O!b$bbZDPMf)6$vHF|<<liP^<#
z9b5ye8OIv<SBb|<FkA}>I1AcaKcpQCWn|<~EQ#k*g}&Xea82(qXZe-b|5e%Dq0NP<
z;qT9Yp8FadfCJ^icWV7U-P(S!r6ac$7a}rnt5*tvKzoZ#72)+z(;;<i@ZyyQ-hsB9
z#R#V!T-Nz`Z6WkbVC|1DJ<$Nb7)X1?lDtp+v<D>o#9)92QT-?*fR&&-oP`BNm$P~1
zcksm9hAyD{=A)2F=PTpE%IX1gwJ-Jr9gqmPt#e2QhnzIk=P6kX2}sR>TC}D-R!H(T
zg;u@*-exJdebxkK(^VdQZ2U=`-%$k}8pdX@*ZpIkqbkH8n-F0_%w<F6>wMGti6f@Z
z#wWGlg$~N`b&pxE9#x{9Wlx2RDkQ8n73whtbc3h}f@nrPF1rRn?;*J@-R!EWy_&!Z
zkq4<BwdqFBW~1AAVkK36mP^ntHd!j~c2F6jc^ZTd)#HG+TuU&j&FI?BN<7#x)tx))
zRp_zh?w>uX?&W%m1<%NSZj4qK6U;;G_{cuOOvEUpNh6)sn9HNzDhl84VSFqOJWlg!
z*LC|t$NdwPKlA%#!RerY<4_({r&Sy3wR5*`%Cl;nBjY<mTrVll(d@P!UO0Yy!nOgD
zh9R3am}^Z$@vwIF6PQ-7l%kTjqS0cRCy_(BhYMp<_f7fzCkffNhQU8E_Al@TwQu8E
z5=Qbq_6c$}HAeVO<%s+;by5|#=|a(iCl8w@602O#05d=)MStz2_e@obGY6Er1JxV8
zlaRmU`ykwmyalx;vB?Q&u6jLIJgu=VmfUz$tm`uUi^=^3%|bCV;Ado?0`JmD4Q+7w
z<C5}0i|kq!k)#U~wZ+5s3vaR$?+HiOmUIY8#*pc5!N@id&s1T<LhGaji}N0T!xfJ3
zT)bx2PbpaBIR9#<N{XJkh3~Oze<=z030fKatk^PHvfd-p;6GjZOO&A=`>w(DXBxDA
znq}ZXIg-2){1t@#lHZnABo{$HZ@2KjV!HqKle+x=r0@S3FPekeF2NlTVPIxeGb}ai
zGNxybAgrv5%fZ3%nlf424INZH^P9wB|61Iv*Ff|lZ4Tg{q+x4ssOOq%YCiGNJ|}{=
zGx)>(mmGmURI8?PfW8Lj-zx=a;Ab>l=;%khY>GJY>mdP|DWiY>36OQg=(hzOt5Ijv
z^HwW=Rk3$*Bp^_qYzZX?bQT|^jYs0>5%ZRyL-j=>uMR4gh0tlL0LCz!T~`-@CZOSA
zjb$UK>1U_0S8pM4A)*8?OU=pJb0V|>YgM;j?|sM-vUhtu@*zvemZP57O?HxJzUd{B
zvCgaq{D}n#_<oxV%*oITTeCk)<knihNCK`$j*tM_7jUc`{PUNiT=-I)DG8X_Vj-R(
z-(5zw;N{>LG7L{9-{@#Sybqs|IyhmUWJ`SH4N8L(G{A>vH;KVXpnVn+5RQDqftFoY
zoJNV3kbvsRYBH^o%W8a!hPC}t8x>Jkto@~K42*}d(`BD!Jn7ovsQTEm*z|{XKYs0q
z4<6UR2;%FST(CmlJKJL|!j^11BpRb7Qu&>B;jEG)bAPh5r;m;PB%cE4R*<FOp$<$A
zE7n@i*zz6fBjqX5FE3YD)3>V<R#`Umz+{rS4#{@|DkXSL_>>JrD1Arg7)z_fTWtEP
zLsgi{_mP?03cmu*;i+cF;<u4pB`GVQkLQgmCohHPOA8q@-L&NjnhFS68L4p!#_2;W
zPb*00n~N5G=*FH(@ll_1-2h#zvJprbC4T*Ff^#~4pJ31ZuZ7T-wwi@3)Fv&y%9uEn
z+MK~D_eMd`tSeTmE@C#+bDLXs(x+rU$Nc>r7rHy{7L5cMy$IcaX)1W7g{tq=Id00m
z&hSn--i!XYx7@u_&ns9M(%hC;?5H;M)>0b}7|h{wV4|WHPIvl<Mh3hEA_4r!o<_VG
z*|Hyrsnb9sSP5za^7+P>M~daTbk)Em37XI%5xOq2AGl~^|Cp8AQwH7u&CN|XvnNYr
zgED>^dvc{gwx$#_@=QKT#ZT}oowV1NcvB6jeAn|46S)ifUgs`}Eu5sd3^F1Ck0;6O
zV?Z{ierKO+IhMeUA(;zmutwtP8V1e7yyeThDH^R+3iKiyLYMFBPz2sA5tKA2kpu11
z!kgyE_r^GWr_kWM^KY(UIlvirO-OfuEOY}D24<;1a;ZEryK&-jz0QR8*7ZzJ@FiZV
zN{BJ4yg@9ZnNMy??v?29d}Yi=r_2z0br+x((e`vnp2hzALhM<Yys{N<&lz}yA|X|*
zE#r`LxAEvyO8r(H3oeHQBs|8Kk$^{u6(k`0;yp3#wF3|#s1UsR41PdYWZL<i1k5p8
zlK^vJGOZd8f1;I^H2B}v6Y$YJqOKMyd6NuYVx!1oo*U_N4YW3o##$){^k!^1fSIr)
z;OgK13cuR^X(y9cnsWkSH5^T@Q9w0Sg@D0hUt;+Rcc4pvGTw1!I>L+l03I!sT^rf2
zr+kbIZTd~Jn=QUdm&e`6vlkBKr5>Rt0ZhG^>v%6XV^!#@wGEm0{dq`gg71iaaa-zo
zgY#Hjo7GEjYBl9$C3FQ*7G@TOQj3i^_uUz$<%bs3l$QC{-?rYbmHy!7e6daBId#LY
z4xcfIj!Aw~>~$g?HZ}@A;~&7U)u>}(a-AC2$6ET838VV)hOks^@Ru9kZcqj)JF3he
zKCcUF_RBFVUX5jxX1Sw#Z|HhyS<#jP%4O?fQsS*$d)|o6G=@G9gSt>g1PD_V8pDcp
zE$2Q)xVU+EA^xi4WLZqd!gwup^lW5D1s;XzFvZ}i(6V&U(5}`vJ66>_?L2cWm!%K%
z%?wa6fhTo|;BgkzT1Hv7Rryu>n!te^n{fN5CCt~&Z=kH7<t3N-g=&G+6+T{S&ScRU
zhTFoQDU*PRJgnE-w-;8tokXf3eqFUD#e$ktdej_ROsy2rU!q+*ZtkeDG^g`prV5H6
zzS8mImiS1N6LOxZ+csI)<W=~i%b&$T;)0SL=%XxES;?lT+^G;()D`rclrXw4`r6m0
zmrh$v%YwW^w(luEZ*<tCfHtCpLa=^1B@JeR?gnN3J!$!T7Go2Z-(F<C(OIW}wYisP
zya>KXcD2REH7p+(_Z%#)^Vm3|E!<nw88L|^vzK5OuCA0V7XYDsqu$|NdQxU{<KMot
zoICMl&bW}EPUfaCbup-VM|%KQTJOVvXw7+XY|VLX;W&O83&EQog-=ETQ{Ulp0^sQc
zGULftnAETkLc$KW;7gj7@Bgg<@oZH}dwZ5^m$@c!r-aYEOJ9(LnnJ?Z-Z3Rw2dY<`
zKk3y`58Ir5V~*dnN@z|re7&P~O1Hht<;43Rb^B&vw<#vqcTU$moxXTb$0OyFr)bwv
zORU;bV=9w5Xq~<||LpF(OxMA9_qJX{O+{6Q6`wtvNmbT6RaeG&o$-^gDE+hNhUw2W
zH)u~q;JzG;w4_}6ULK_@u`yo<<&;Q#($%1nV0D4mbs$*zzCi?ED1C1X%rPbB>+3i^
z|HCvv;PUa-Ep4Jq{pza!he8I7CXL5Hrvqw5F+9IVt+AGK*IriUu#6$ONI*mG<KVvi
z=Fzfz&iM9vsnkdiaj<dgX$CiZ&0x53C)8oD69XPgBI+ifdygVbM~E+aEQ7lJ)Wb-?
zNeJiw1tJDZ-YYE!?^BtOfDt;f=Zwv?{O{1@w{Hs4(Emj32;&U!c3RH#c8v!bT)wtZ
zbqwh;3jOLxc<#{~6AD|G?@rT*`sl?Ivjb=9MsPm&5^#6%cJCQEP-;9>#EExE->UlZ
zcp97J6V}BLgkVUXWm0J&5+wXo;Y^N%pQ%&7DuT!vgE(U5+auCe;q}Rp1<ej(XpA4(
zdooUt9918JkN~wL;sIxd16kLmcaZ>p1}uDtsg?*M$`X1ZM-JWa#Z>|epH;9Ict;M>
zvKlarvTV#F&w{PV6>EV_r64ZF&bBwf(KZot2@`|C3j@fMjJ@xfQ>HuH#>4}z$)v}x
z%_SG!Qa9jy_Cp;b+9ZurlO_<!aEB9~<nUo{xJM7a{_tBeREvD-yeT{&eWt!z$L>{2
zbtH}V$;V#E6mkec5^c2(E`mGbC0qE?pXcJut`22JR6zM}R}aa^4$E<uT#*@l#USbI
zeb;;)*hLOlRgz&<?_Cnm@ch`u_nBlv1n#*XZF+$3<kHx5t238_r7Q*>BoC&w_q7VH
zU_K2+J_VP})z-X0L;URCo-I1vBn(sPWJ0lYLt-k9UaR`19~;<}xM9H{OFufL9(el2
z6dCC2aKEe+c}ZnYarI68^%5l4fe)@$Z-0eMd-kD4G6_hG*pCDVtCe{F*_;frPc(uC
zy>CUQ89y4dRf9MXJCLP{2+-WWr0RiG#A<z(jYwzj58#A<7DUx{O-hnuDVT8hXKh<f
zDXUw9a@NkLYLk`UpKtHGbpv!xcT#fKWpXtAR{n3Rr9NVtwx8cooyigoWB!0I!}*L5
zPvX6eF-{d|epyyX65rG-|D5|>z6nRA;zOAko%gnw9EGUVdF%zSk-LadKNR`d$f1q%
z=^*aUmA=Sud0j2Dls*%s3gY+Iou@=x?i<Cly4K3?fXAi5iyuh9^wl_|BW1jeC3uXJ
zLhA|%ctN%V0`!jPhKC;6og@ba->!A9sXzb9Rdtz$Q~%#rPc7L)(y-1E9-DM{FA~sd
zTJW^8G$LDdriALb(3%VFr<^nKwLk2I__5CDDdbyxYzJyOSc;sRU^e*ZNnCPel&5>;
z#a%Hq*PveCEfx54Q2E`x=STuyl`nTp-s&-py4ODaQ=O%lxta}w8?!sh7T#<7D8Fe|
zhq1i2Y0lzx9phJZ*-l=kR2{#3<8mojlUywdbg6Ss!ED`Z7~$OJx2qrXULUqBTA%af
zZ3iVV?;f}HE?m!<zHuPhnEV{2$Gl8r$85Yo^<26-w)|0qjRkiH04JF);gGZf3(-uw
zd9Ic_x$`piLKe!d&wNfxLqY30B;d?ftSzxknk=R_o*-dJs-t}P+^D(~3HXsy1Yc$v
zj8e-Wn;b9X5l1cv5^#m9;J)c^PznAws04_=i~OUKhW}YbZOcYBoWN8?)R8;Gq&V#P
z%EaWJul<xVjMHK6Yma8=lDkzb%saVNnill!0v&B`pH*^5tGa4m0{{)4y*})ztJpFn
zjZI9-g*xoriOU{zl#@1CX#aM<HGckcnS5*Fx`Yf{T*M{HOnd@5gOk{^*T}6ZJ0opL
z+}5kWNMM;GS9mT=cjc`xx{8+Fi2wHOP{g26|0_e^JoHQ;_;vw5IlbovcNPh-+$N_U
zH7H{`!kpC^2tF84h@32&gX!?ASv4Lb<!8PzYgM(y<*27vi*}nQ=Qp<n1tjQniUFiI
zK7ST0fOqRh_qM20)njjQnBMF+H?tTu%a#s4_j$qY^#LLC`!VOWYN~)9FhdAa2+D#3
zc?4~-?DU9{{x)mN4WF>WAo-3G$!K$|fs;Jt1@FjHF2AWn8EDr@Z0iRveghrO=^sU9
z!+X=fdtZu7B__NtG?vK0_qW1``yiASPKLZF>SXztK3!p#UH(JYyp!14M*<c*$v2W&
z@J9-muuon;7y?sdDo!PN$e~37GQ{DVaYRn|BRdkXJ<oEq1^!%Nm&t@&(*+S>Z^>Fs
z3$K3^L7ti3m*hdoDt+i(;4enKLmpL_AbBqma5$~`R`|{bW#Al*^1n5mPE*N1@8M;(
zPXco@)<rODtd$jrhv1$IJB68OcU1oNP(A;wK)zk?MDIIK@H*SX-OX4ypRA3d)$s<w
z+sq{3WryJ)&+9emuVa_b`Xm!q#4mmAby#h*b$<Q=&)+gVwpWsnzXFX*zx|j_i=w{Q
zaNi3F0zW(I<^)HmS?xU>YQ#Qm*sc}H=U$9XYG&ry%Ny@kX^dCf)9jbp(N&+;EsL-Y
z9p#n-C<3zt3#_POC0x|(A%l50&jmm86-daJ(C)lCqp!jBmco+uknFqpS_->Q0!nnq
zn@2xy!L(!(Ic#CIub>wN#z{b|4z9@q|1CT)@(*D{3gLS(kSq9El%EwHKEY_vqgE2~
zLpEaIN$i>A^-}9teJQuBw(2nP&MRjxcF_K?%Vc474yuvKnNhPsO-FkkS+)6k6?4j^
zy!@Vw+QQuVjKegsW&xe9FPd20Ba2Xo(bc8{=DdiL!^O^o``{=?4!yzRisVEz8Y2E`
zNP15*?ri%zEtC<vwv=|W5LN?#YPDnp3UT7yk;sU*gS!@RgQ_y(x3biGpLnEgY(81M
z4WWB|@A1{{@i&#lCg^&<KgnVx<j=Ahs0zoC?cxLnJlXmCs5s(x`>AduB%lkv<!HqK
zU%T4}-U;Q~OZh_){j*Z~ceN#0ZkmSrpsQkX-#oL+5`8_xKoPO+^|lI7gEYbJA8O)R
zt~5F_UN_6&SY_}!O3(?PP;%CMxRpQWqU+{}ZXDC=IQs~*80x6XZ*#7xdPsEJi8B4d
zDZQGOO7|XL&e9Lasf?P&`aA+hXHjauUVKI!38+QRR-yfPT4kfunees`aY*)Qnm72+
zF>tz1+K$ncuf5Atb)rQ?MS_d7!hyEB5MI+VPMECKU#jO3?;itQRo?Ee=SM9zy`lPS
zJdH_!MlR@a;B%d924!z=<y~qO4=@?Djb=I=Da41|4Y~H+Q_%Dg<!G}=px=jhTD1Km
zEFQ*n={BNXB-Je`X$!f1v1P_p>FfJ@{nE=PHn-t(uoje6m^wF{TlN`2)MgIq*HT80
z%zb3Bi3~e=qbDBadth7P@5$+!$xsleOwq(OH%+snE!Rt9W;F#kRTx7?=~t7#KFn~T
z<63<i3Yk?3cyse*P7Jc>X2bT*Qp1fysf{~%`VW=}oJT{eLkE?*K^mOK{X*w{^Vu2y
z*YNtXP%l}Rec|gMR+DvJX5>&|5exD1FnISf_=x&m5x}Xh@kfRKD$DlI&c46;{>T>A
zs54Qw0Ht>nStCY#(PFu6$$ThHmhuXi1X;?Bual+x1Z2wY-)c<#o~fMR@{MBQ<##Gg
zHG6u1BLz4yZmElvL~n+w^N&mU2Kn{eT%4~gsS>hjUAqn371;7imvrD1Gl0F&QWxW3
z&|iGN<_p_nHG7+mz9L|E;eIS9#YBRY3O))KkBP}4kiFQ_#W3r_;>%S7cRFjsti>iz
zsQJto^YR1|C16iqk=;#67od=q<I}Z$(&mK&r^p*hvX-#gpugl8a=ICOtWKuu?efRb
zXu6qDx+b4&)sr(e5lV9lRqcb#Gxj%&&%nN^Ra)xq283}RSL*gO>2JCAN~dA7&tpHG
z84czS=x4nz2r)>#hVd_)(1Iu?`+2#42VT=4a+TzxY%s`Ka`M;-?b${yJ(}q3mqD~I
z1X=bH)xF|IQ|kAc;VjvMGKTF$<(}a(7w_!b?t-NmH6QW<FRwtxOM;%C=-}FNZJWKw
zGS$0hMC^=>haC38<91f*pEZfy1l8x`(kf!eQYY8|cil=1!dSI~n9Jvrn#PJsQ(+u-
z()#Q!ms)PP&83H&sgG&~vC5GXS_(`oPkkJAY%q(`u6AjuIY3;M?8l2>@gs@&J=B_z
zh~9dPklshg@Pkyp*X{JeFOadBFO)|zqPnn3+N!tNo${bJb*H~cuaSUbzDnj^{2W|i
zZC9?9&vF*ci^SIu^jE0QOyH2m@jb4K_R3bzq;+`fuECi<0o!x=lfnT4#yXhIc1Ls+
zefg0oxC~-yzg0Y=%G!btd#_&6*$PKV;78un77}D_1N|5ydcooPCbpMeo_{T_8B+FP
z&~T+ObwfMLJc~kALB_4H{9Rhg(mLj7J!`!1>1=65<k&f`(|0}qO%$56T;Al}4Z53y
zEX})pE;3p#`DJR7-1B#ll2@-rif2;(!l`E5@RN)JIRAf^KpQ*w*0B<D>NguXe}oKQ
z{#j6{NWVxa|9?#&vz==KUY3Ue>~;@~c^PZdX>X0hKx@&06IGii7Rq+n-Aew)PZuO@
zoD?q!rN0`*ADSVqVqyzEL@1m%`ShF`Ba;N7=OIB1(}oNq2tJgExOae)Pg&4I`+(xL
zS0M9crb7jfxM^8rj7H>ghNKJYrroEy#QSot%tEwMdK1qtjSX)Olyr^5gzVp$)mydJ
zq^`un`pbEw;%xg2zZ=IxH`^6R0F9^ky^i(-2jkVy1f5UJE?cLRn#Ac1lt=OiruYi)
zcNWbvEWqU&WVF1Cf%wSl^+AlyYedb5ON&Ji*crTh4xYW=!ph9G5SnG^tb9PUVOl>$
z<oxFKVFxhI(h4j+F!HasZP&H7RhhFgP6DL$6siQQSB7zNX*Qtp>;%NSs-+#(*iBq8
zwlVsN9wY1$rZw@PV$i{K<3J;JjK+y^RwY(BC{Wioti>w0#`c-hbX$N(jru99<IFL?
z!=PH{L)MTtkt&aBNdN~y8PgkOYgS}1QPgVXWfu{vPXZ_{A<;=QgDy_QFIbs;hoa_t
zX<Z$ey*Ixu@)iq4z2#$o)WqMm%qPZ9X%>igNe&*gt}1nvMLDR)s2r^<I-{Khp;n(u
zF0S(QUB8Sz=Y2yjQ@_WjDrCn|r=#Jdl@LBxA+~cAn>$wB-UOb{qME?%6py#S83<yi
z#Y6il<>!Zirs$>p((|BA7cj$4`0SNv*Vxb1rxMX6cZCgw;v)|wNr01oz^c%*k{{-B
zqD&l4Zc_#WdK<+V{$2c9IWjby#=|E2sB>x(=Kxj1_i}}_AuvtMLRR()8$kmTtu(mS
zY!+_h>PqW!dFAr5A%SyXW3JzsVEp0p!}^hp`A?6Z&hseg_daDBDZdcdq8jV_Jv?sx
z2S61_OO>*3(_%D>FsW|z4!E7x>m#cov{oIQ)vPL#SR*D=EP0n_-ffs{L0cI?ul(JY
z|GxrM$ysX1SU3dnoE@6g>>GxV<CXs;KdCM$POYpvhdo!u6X}!_J9UOqJAO+0zWZL}
z`!&fm2Q15&<X!CTC=A?kunr_LyjQ}nk>dAwTdb-ienq6AI>N<Wgu6&8zj^<qi|-xU
zb}-#eBV{jkx@ip2tY+P2dX)G^5I=Iez};K)C3kzLQLr}kxY2}s^IgW+D^HrlC3kR*
za&TKjWRj)Ch$`a@f)*Nk*cx&J%M%@olxmy*79z0MXCH5VtMk~Nwe7rzRBf!}P7OIN
zub2bP)flq@Q<Mt;28+szj2<TXy;tw!dw)1x+c`IzCr2^aXGLc>Ez}^2Pnv-@Tj<>M
zQzAHHU5id0>a^-s<oih=ot&suK8*@UaVb8Czx2L>WsC)F`)tRG7tdqNZAuQt3;JFE
zE_YZSky%zlj)BI!(@{x2{q@Hqdx0B4;VMe1-)W~_#USBd7|HyP^l9ATu%yRybb#2T
z10*2R$B1Vp#~?hzzBaiPHt$E99IsbycS*6giKx7mypGLhIuXk9jSgvn&5I<Q&s;W6
ztXV+X=JiOI7e=_Mx?W_?d%+tdN3qygI!#D5{+w~h<)L?UDztu~j{g@B{7-R_|8Xqn
z%Cg3>K;E8Hhk0kpDx{5X_e+m08r8%Oi&7`XeLk-Cm1}!5qS1acY|J_NAu2Srbu4)z
z?%G5CTe+8&?zt!+)hp_m1aGDoM6^Y9i|Z4mo*!ka-mC$KtnYN0sE>HVU4^6vHgCRn
zFQdwMJws(4nlS^8B>}8b(q`?J;(GMUZtpTa+UpaK?nL`IU-VE~F|Kz9GZW-z)tqOk
zg@o}D;{N(YG2^CGH7YdDyZRIZDz~+Fk=6VTgt{knQW<P~&@~G?kue9=X_V=B=O#w&
z>Fn{98BF=&MsC_C*xXLzSH}gPu$JCu>TLL@w$**JiD=>)h1z^~C?vx5Xwk`a-SUI$
zvab2wrTpXmkTImGr&!M=AuoPr2K~7_ewL=}v4-Hp$nGT$%4o6y^2b><3I)VXveK}7
z*YwXgmWv$klKoq}s{;9di+A0jEnE82)(esRvy10XJ~hy9adBh{39!FJRAgaZW`%Ko
zEdTVv!PLKvFv6oblvP#PSn;awHr@Fy_Ra*3FIT;`Aj>x~jbn;hsjVOe*>WYP{x4Pz
zd~ycKXAYH8jt#EYtv(bNhL~&AaGpF`qzqIIX{woS196h|-JEF|-mJy+_8WJ1Z*RwU
zD>|O;`6XrP)Dg3H)UVsa(0Z4`y?Xrftdghc8*O#<n!rLkF@`rV%*6?|nGj3S24^-c
zk;fLxrj28!{XM4-S4G&(PrJvIv`)MjI<US><7%lA)xM2)S^$MDw8=-%87W7+Fncb>
zWY)<vsr<kgq5;6zW_m|k(8M52LX*((hT(FPI&DnxLcilN+@y>UeLTOtv~BFN#We~!
zL#0kzuAzy59w9L-OQ@<@K0eE`Qcnn<WHKQC!`bnXbIGW=T`2d*2sWZu4Bu(<H12Is
zP@8{Jkj`b08tfe2de%a=qh_<sIpQGZ)n0gonWV*8I*<7{N5`j*j}y(0y`;5`*LG_S
z1T?7Xy&J$7mLS6YT#G4aUdxzQiL0~YxmcYplROW8?a$?rtpQxiTz}FCmxaK;utgxv
zjW&(@3drYR+}Wj!gKNxwJ%5-$|I9}JQd{Vy{$)1V3e|*2d-9nf5&(jkW56v?BYeUg
zM%6UYx@$deNWf%_TYGVggi(^z>4#QqWZR=aQV5|Mm#r%mQ^CqDod(eEAlV1irOh1_
z4awdpmh!Li1+PR^iT9>1ZY&n76|~oR9%?UHielytv_hZmDm{7qVIryQm57-a_>xAu
znecZ5J7Y2m4-mG;;H^=)8DhaO33j?^!&-#n#BA1yEY*DVycCSX!}HJyBh1cOKPAI{
z1Fy-WSB<)sh_|Z)ZK!jv?H+v?z$N4tK0=lr8jKf5-5p#Zhu<@}@ya}dFx}ZBF`YLJ
zPl9JCOENYB^UhRD%i`j(-%7kZ^ca6MBd0ACUpXp_R0O4wRf_G*bAgmSJ%)f@5L2Ce
zDSA`a0%Eu-y}P$LA6J`DD~xHg#8;pOt>|R3-kB8|F1h6lYO_E8{I(0=Iq{`fbzG`t
ze$Y~k#vn(#Wy(QsGTv;tcxCb4-Q<yO$wCE%H%eCnR$O&bq}M-Gc5fenZ~={2Pc%Mc
zHvI$fblqWBUVC>U>t559m^t5+1JG3rG886jgL7tCzBlG^Ls^6U?MdGk7vOxUEo0@%
z3lTPjrV8^l5#|BHDM)nveGfSEito!jkZo05*x`79)o=rX<4sMuf1_e`R?ock;<@`G
z5_fR|4TnjxZ+d0%rpL<A2p(wd2R#qzQ_dzcC%zCM6N&2HLXhqT7Q00q%<(r1uF?xm
zkI9aE!|IXu!L(hLv<&(#O#?6Y>>KN+35a*-vymoW+f}sm2sCCpqD_JlF5>SRqOrlh
znG^q6{a-LO$nkt+I@lq~`M3p)9xF=_t(hb84V*p{=*>6%EOVwDT8x6`-00;`yqv9w
z0cGGJOTJj=F^-lRctis$WXvh2s15bw!J+(8Lz+{t#8a6^wNc*;_ukn?+|wY6h#W7c
zz=8w8b{TYd>$V!*k!a#Yh-uC<S8d602UGg8NV~c+P_js-je&yAlanr*w<(YV5XoAL
ztNr|k68HT4UfvguyL~q<)F@av_!6>rlXe11Uh>+RMsECco28KBCCyKc@5Yy1&7B5|
zi9PZ$=w93_!}<0iVL7G4&O)s(lIw^tbv8Y6oRNG6W9CLN-@2+|&ZvFC#iM17G<rI(
zy$7Gv7Xnpo{{6f``$*#)&3keTvTpi-z9{8SKQmcCKRc`CXQuxO7XGEv3;%q=fg{@B
zv4I!Yt3rqZA6IluY?gQ^ou<W)8H)RZ`6oJ3G_GEF`RNhFb@iJ6z|Mzziny_ua&NC<
zp-V;=J}zC^yw^LKN8hf$5Pnly{od6~CXTp{q9?1$Q*o$C1xG{E8Xc7zoMm5y9q&?2
z49RjrQ#CPO5w2)vUzB6vI%`>N#_bleiJb_$ICJEV3mk%@)bWuCW>5n~v77xSFF8)C
z>M9Ak|KQtkDHj$$ycyd^EZ0hiZokQnGkUcs+tQOH(;!OF#CLh0iNQDsXD^+-sf)DG
zbeU?jy*7BjVbKxVz=r2QdA{n6md^*>A3hzEovPoc{`h$fO7ZbYu}rRw6#tm?RES_q
zw4}2$`}6CUxr~lCF<?O;J=qwTu{CC)DWmLam-l<cql<>G^zdBJI=xoImWPi&mXrmo
z8E<Q^d-)2ewUiV^#N0Cf6hCaRd}6nhnCy*migEuiX+gL#VlC-Y$T6qca&-6OVWzKb
zkzc^ji4(Jt!NVsUi)JgU?!>jeHS(qSZuH|RvZu#wZXZ7MUNd3X%6>2Dp6{h}#Z|6J
z-Ne2v#P$1QYG*6vvlSB)#%^VHC|@|Y%XH3RNSM4NNCOvx+ZRUDT%BZAJ)nxshxu|X
z1^DCZWiroQx_F*8ExNI-(Q@gu?|Fk*Rc~e-FDE<Xt^_6O-qw$&jJ<w2cCp@uaJw8U
z8w#_rMLW}(CU3}x^R?fhI4jcoRoFTc82JDLW1a>a?}tU0_14#@J9c+$s)ixehxT)i
z#h$L%3KKFhe3e$)0?k<|_b*!T$0-yGy()utZs+s2tL2ra?)Vn3?+doYyi`!=8*6$i
z9C7u%AV5V~JVT}FBB3M**@<+~H;+?iFsNO4aUlM#Lgc^l8UO6M_nZGYoR-OA`~A;}
zM8o&Ua`=yU|DWUp{9AyZd<5#9oEeCfAkol=lo$8Nt8VC5|0>^^ROPob=$PKgYvg8~
zK5zZJX{8u40hvRN%{EOP!-890!!%~cV;(-h?9L}wRrW1BPFz`8APzaAew^c-d#xG1
zcy#~KY}{obT}(z3h`k&nh?iHFRY=GjyARE39cw^1+g#LB&-nl;SuxhUaDzg)vb?%_
zw%{rC4%dO8RR-u7jK(hyru%)jNDI{wA!XXT&U`GV2%-&Vj!2|ut+{$>`g8)X@C$*%
zktMZ15XGMniJAOg0LqnL02Ik)67Uy<()LGx|0ya7dz+K3#td9(5qHw^z5c-H)tqrU
zObh!EQ}uj9tZIL=STF8ra<jcv5F*bMI}Jd;HVAYSK;dZ3?2|e-5_e_J1n^xtcK?cM
zJpV^zTI=G@>}$g&@!GC+58br}+p=pSX_&j`)5f&&JgVR_d4mtDr<$S<2g)+uBVWEf
z%B-_ObNTgBz*Gh+{7+W?a28DDe93~o^Fu|?+B`N3yNaG{RgAJly_Or5K5^xI^@}eb
zZ!Cs--9AR^fGexJ?)Kv<^sA_k+n)E1MGa0XCP6>_;iZA*b{s;{X0PE;W^r8Dbw)_b
z6eAn8l^4EiA&gr2A{>lQ-WYe`e@G7hA<cCgKTk&z&sPu!u^p{G56a_I+d{OiF!{Mo
zj*Aevz7SOKJSaF{@!R+K&{h40(yC8TM^VTp>ezx7?uUaM<2gQQT460iI1?5}xZUnN
z@or%wqiphY3p82-42kwg!9i|6X%KsNWc39Hdaf=xP;RDst(lw~No`<fcmfqSfA{`L
zCqwEqc3Rq$3i2VpF=13ksFT^P$~s4PLsjYAv|RL6Kb8bxp?&0O>}FF&m^#a+K4%h8
z%DfQnL~R^*YhCPMAaHI`mG1yTUjO5DG&AFu8`%1ap0YJO=sP*#PVD=7<bZ!t%okY@
z^kH7Yz{^6G2UE3Gv(MH#h`n2%C=(xTZD>5)1JhYpBii;R3UOl_J6JeP@nLAM)H6XS
zHSq{Xxd^KZ=G~Xr%_-XZMj_uTK#?QEDLp+^q2-3}mr}GgHf%yZ#0_d@1#@~*R+3Hk
zT%0g%aGjhEgs-tHt~Y`0@SFcR$xj&MbB|4iC6{Gx$-1-i({-`aJ`sEJy{QY;(JgGd
zFp;A)3BtV*t9j$bg2ULftg|iiczEHXtaUL%R6`2<A|YwYodafsNlWMbgcZ#B4lirh
zdl_JS+*iUfb0MD~j+tMNc}5f)_8F32JiMOsNxW!}V~N(GtPm!GaVq<8rJ%>*VIuFf
z($;YUgG>B_E-sKkVG7Q(xs)?Zz6_{ln;)I8-;Z=`O{K@pk8S$M59soAueU8InLM-*
z;JL7f|5WP{xmn!k>P*%Dn=L81+WY3WO_)!ge^Xoum(#S9^X^q3S@k{mPbKYdVUzzI
zQ<I!PfP4eDL#%<>VVAJR!Mm?3-oK4yRz~Z@oF4!7N!{}Yo%E;uY|W?()-<f4v`QU)
zcRTQ#?J2Y^(~MU5me%6FA12h(B|5co#U)yNC+FYHd1~kng$piGp{IjW<|jx1n+N0)
zK?lpBZ#A7EX*tlUz!K#RZhYw>%xiTI@J4<=i$PMs{4wyDqB;evsg-?_y7-{HuFM!?
zwdyC~x_&0~0<X_0-hdvNW4FEd_Dug4axANGG`{`4ysO^!?i<I#HEW;TSKqjVuOvS5
z2RJFROvd&Iffb;Gy;SnPCqGN(aj=~;1>x4n0}L*X`>nM%{D~*O>^kGUO0!k@vqkbH
z*4yu=J}tRiX=nC3Mm|b@Qd)~xzd%k!)dhTL*Q&@#8$*~pVw(jAIceh|5{jh#8I}E!
z*8a~qWPh{`gkF}9Jb2&j3+SF%E2ayqc{Ks;QBpKF?)J56JF*0GO_{Hrr$F5=PU+_l
z1P^JSpjre^iq)%_9ld*yU|NkK0WApTY3LXPl(?Y7UclsdG`ZOfzPSzcz7T8MyKCX&
zci5=rs~)DuIy0ZBd7<^Fc#KfKQj@jZpQXB+Tjof;n2AhmMA;Gi8!g71TV%`NBkTQ0
zVo1t7OdG3+i|Nm*LYQI}5V>0Km4Z(R4)nO_U|&?X`SB)#R?0=gi<GvIo5EdPu3*zv
zEf!7og`~avw5{;S1Li8ky@h=%0}GbDdkU<~TT@ox%LPk~Q%f}NW8}>IfRm8nID!HG
z{R>^3_M=yFccV|dEuUia3B$kfO&{7Ir=T8N7Gkb(=79t{sEgL}`egGz<NqXiLQSpL
zcQ2|-Nw)AwX)Q%oGVci?w-XOWy6$vZOI6;1w-;=F+Kq1LwAAnO&Gi?2+UeSzrLVIm
zSKoF#9x?W^b{n#n^-a}mR_)?djGSp=WVk6#KDJ$i!mZz>nkp!IkVuOW&5c1(WjmBG
zM(m#Kx8wm5<Mry(4vb$xIZz$pXe_h>d**SUszUEkMnas}Yqw9kV@k;P`0=1-<i%65
z!u<NPt{>+ilhIbhTSR|Vgx)0&7mr~MdiBRg3nRpv4Lb0Pb09WzXcp=vPXJx1wyfNd
z-M;VBS&^p4Dy9lJsvLhY6wC2(sC!EypDIA%$X?)Abr1Z;Ly^03kEuviM<RM&2XOJ%
zbZ8Q@10JVrbSim1gnY|Wn=R+rDjKr_w_HOgplKqr_?);Cx8dD5UEBb6aFL*cI)t+?
zDW*s_@yl4-Fdq&4K)68KJThnr4-{CJjT=sd+^%>I``CBNXYxvu3Id3;(umbOPIG1o
zf2{VRYG~}4Y{+f5d&K+la5I6|Oe@7gY5C1!@P$aPollGD%y>_06eQpC(#HJYF8!e7
z#BH}9f?;_m%eZKYSc%3M6<*3(%q4#*d9ck@1+6o({04RPIrl9PV`{%ynze=Q|D)Wo
zpF!TQa;7N$Bc1&3Ia2>cIypIy1Ic-m=|NTt4vmb+9de|@fqnUoo=;8`q?%Lpqr+7q
zRi1J_D!h&r165<89;q1V1yx8j@Kxc3i@VSx@zXjyd=KMSc<=E_jj!W~7U(k9RO8k|
z-^!k>cyX%M1_6Tb2$3Il4U+=9i%s(#EQJJ|YWyy>E=BC&aHy#omqm&sZ)rcIPC6~R
z#96X~te2c+2{YwRLbHIN@YbpG_+}d{mC^Uaxury}7+XNwHfS~;^{(ns|JS*TZ$)4q
z%<|-MWIadT7`SE|%R$dQGX!AF*u4;fGX0SJ8;(2W733o;17<ZR#lDwttr^{+Z3;M!
zy+E+Q*J8r%VA^B+@~+IN$?7MVDBdeI&B;#Uj<8F7P)#x2EkgLjZ~e`OYX{k;7kVB)
z+&`=D%mGP}9bLCE<W7POmweqy*nUsz>8DO(4Vm7e9E7h<IUiAfr$rF|ni_&}{%f@U
zPfpH1F#~N5r_f6Z%41X_yQ}6}tt@OBC_UPhl-5G3YmC%i+bu!?q5VQoIGl&Tk6Acb
z(>9uZG_t3lFxy@Is+Q(T4=gW#9HxuUm<1o7!q1y&4eT-o51T5uby2zvm|GWEXRfDg
zyRUfK*$kZrv)mv7z6O16gwY0#d#_;rs^B(9oB0zfn_E}NO7`=V8ovJRnB9mdL2;%)
zaa+{CaoIGjt+XSfRtT<)b3W$4O%CuC+mMeH&zSjN(7JHnO2kjUC1LAElRs(&vUF%>
zSM*Q^8fTysI&^o2V(n~0POkky>;I|l%j2PZ_x`6ONfD8)%r}IrWeJgyvZp~9gsJQ%
zsgS`imh8)9FO^9MDU<BmkiG1C*~W}4$uL70jOllu?>XoB_N|`hcg}e_&-45JF|Yf&
zubKOPz2?5J<#T=B%eCJ<?{ZF)(LM8ekyRXoKN)Wz$GlZ}2_v~;>sw7g+r}>lIWAdP
zb_XgQ>!4#V+6|3%-mqYd_koSvEp2s=l^9rMTfCdxw}t~#lv27eD5!E%fSs`P6&dyi
zZ1IsC@>7G=rt?@PugZ_hC5WxZ^6yO5w=4-K+Oi(GpMmUM8`o$dTqp@|=sL-LK*HSQ
z{oWN~c#HkX1IQ7#1Gz7g>J+O`X*U=VS<)5<S+(Go9zL4Q87Y_c!@&sU`31qBbXam^
zc-C|~qJSc^3^(XL_Z8D#P1ayTfF1(6@{OSDKW4jswC)GM`bW<8KTRsZ?b*&o>0Twm
zL#u|0@>~y9&HI*}DjJ!Xw0^MUakMme*1n;cJMl$4q2W0cph#|%lh&Icd*=w8C?!fA
zNqY0T5GOS-3^xYoy<FcOzSVvTtMTx#$?kh$)3Q0D&f=i><Q=3yCQ3^VC1MJYAIkC0
z1#;IUwd`gH3U+dJzLb5DQ(lt(>3#v1HLFNmmb5CnF}OB*YZK4Ao!GrV4j^J#k+Tih
z_d^;g)Qbro>Se|Rna<8?=$;IJWR|bcc{9n|C)jz@x4to?_)|vcmnMkzb4O%8*?YU2
zdr#j5BIns_9XCO^WFS^^ZiaIy#b817{l*N#>-b?Cr=bt%$n$<c-!YhBXwbTbZKyXA
zC`nt|U$##M*9_%^-G_W=WxcU?1%IvwZLm<IARe`-SdHdDN%tl-;DzqGUw(MEv4XLj
zYm@PDNTp+)HOM$Ni!9YnjP)*mR5$s-?>=$a#_8Jh!t}zNE-ATobVGNwl#uy>euf4n
zLYX?eV`?{N>f}0QGm)sbRv6n#qK<EUo+5}LPJ2^;aNgbD*dvn+Umd#W07>JKqyd&W
zJJCe>w)G%^g+L*iG)=*%B7A0+oaU3H*x#X}tOo12R&;-%oAcaf?i@bt6~hO)<qauV
zblYaE*g@8`tDpS^ijt>=%-`h<lg4G!QQdm=SxkMY(VP9V=l@!g{+GeGzXQMD9{(*M
zFu>=)X;2w4lto6@urV#d(caTzALX*<dXaRnQ*FA6u=W`HzQhu@lLHBMq40D_`Kh{q
zwQ-U2)Wbr;#gysV;qb*5IK|xo#S{94TID9TlC!hr@e;>M>&ItQ-;C`BX?!TBKpW(C
zAvU#}!y}%#p2p`WrVerN)Z8)@E!b6j`lxvq>i$Q2Mw`-n=?0bJg*r7DD1r1_A=?3l
z3o!@pY+lb0xt#jCH1)FUu0$sBw)mb0!uxzTqF3FvS^mgK7nLq;>~MpuE-5oY5KiBo
zxiGc<kh|zaD*q6z`~GpWXbDBoJJ(O0w*qCUU2&5I{{C?{&Iv$;n=1G>(yKoE+E;|#
zIO_iPw_QSmt%m#;y^rZ|tv_{FKcm;6bPRk)y9($pxk~S^UAjG2Y4T_|2AEK5T`9xR
zd!oB%*5A2m0mNP5ReQz}Bf}{U44{hPyt8>Rr+_U^o1OXb)6%AefSu2D0-|`9r@K-L
z8NCoLvbUf=TMUG?S=z`~ly+|V%}V4x=X)Il|Ncyz+Nnm8h$Oei!gc`-yPVT03s3Ct
ze9Uhrb~nXuX+8Q;AP>EBM0sQBBgTa$P9crLUcsX^6iR`~dMhB0)pvQyV-+Q|b?XZs
zF3*Hh!x&`J0?H$W9Sv7`s+rH%@SGZS+U&iW-=#Ox11A{dJ)<id_{^vdso|1>cYycI
zTQBkYFTc0=FYGseXTbeZlCcEn2iE)GQp)#8YS2H!gZ~TTAt6wmHVdUJX5}I6*3GF$
zk~d3JA1l>VT})Y{Eo4kAI<I5}#NxfGwN$Q?&2{+1l~xDEm_aBu3alAcg==~#+=4!W
zBv@MT0^stdytS2K87~C?z~09Gxo0~_YN_fQoaEbf_}^`nyUC<`^e6qFBdKXk9IUiM
z^#ml=G`#udor3bN+3ghYNjfvDa<+c*)6#^GcE|f(GLF)XBN}->&$#-@JWA}!%1pgl
zx?`PbrZ9qdr`$VN-=8uCw_hW3Gl&=~cj%Z0%I7QSGpsw1eK=5omSi%73k1*gd=M-4
za!Rur2z2C>cptS7-U1t-TA&;$o+QIhFd=CwG}?LCTjic~f|d5#1x;v&@G|2%sZOKO
zF+PqtV`Zx^O(A|cMY;cDn*7SDMjzhg`LM6JYyw6ywKq<?dZUa)ujR-^U8Z^ff{j2p
zCnA+{L0&x!EpM}V-KKm->P;caiYhWSPHqXC6!+LmGd;vPHh)8RRD9LaT>k_1l4Tux
zoimQzZMc^fy`;nF>`U7=Q&2GUxy2akg8=SDPPH)862Qz3vAfnO0~X=^%+SB&?A>sa
zrW?8*(61m{|7B8ZME+rO0oW797_fQ=%jx0Nsu4LDo|`Wa{i$4~aKcjik-9gXL(q5c
zHW8k1C@o-8E;Nv>uDb69XLea!cH)fVig==C#aP#eYY3mowa86)uyi&c$;Q0YEFH@o
zoKY*@xK<RXU|W{ym_^h2Q0H4XaXwbpB{|&w%M{N->KfJhEKg1qSua^K`2F6uB<JYP
zTnmqfA6f`&UrNy-+UklYD$R~-spI<qG_~CRDgCnIO^N!a(VL#l-@-J%wETk{ZsRBO
zP9D>TFMra+m8Sfe)cfsd`h?>f2|rVFjUzSMg9VZ35;Y0u`rIJOvG!PgXaaG2PZyK^
zEzY*ZQ^PD%Grf2flpyu+gfL~Mm`MBbzVKK)!PLB@!c`<F<<>{T-=tP<t#}`inzx?R
z5Jsu>YKS4-JlGKH%0oji7TN2v)5R&_dn6qq!$&;Z=3JOaFA)}lg=hzOP&&t2SaWH4
z?4>u>I{BX5u}MlfUyND%JHRJaXi*Of!K#J4q#K#}DE6<Qun&|iV#;dE!_0F;0HuRB
z9J08TZ8Kk<c1jt#<2LH`l_)1Bb@^0Wi5K)k9Wf)0^3LO~hV5DwG5@^bVc2l7P5bq-
z`4~`$6<>{(oXiD(y%8N<rF<eG2(lk_hKM^rv$1T?AEgxuoUyRgajvPT(F#9NY<S%1
zN}fTUc<du@MIeIQN4bNcpx5sA)$Ew$O+Ui|r<7&-2RQWC6ddUc8+>FuM*m2hz8uAr
zQK%`N<L}c9<wTkE$;iskIf-oW1J?Sk$J2dRPhPurU1+B-*V0tRQ!Hh6(kBSd=LYj5
zL_Dpo99W!;?k!we%&B?op4ruA@B1KHN8xc+EU8xRPu%~1j_dDo*z^P`Ieivoj<crO
zH>RgW%3XF42&hS_{9-wf=yKOLDqq9Aqx*4<E|rbsfvEvRBpNiMBZUulhriv=z<EAE
zRUR{{KgUPB&;EpwQctU!%#ElymQ|Sf(hf?h<NfT}>dG^|<sq@Wy(9*yx^J^GJn<Us
z9Ov1(F2q$=Nh!qaTi+b~G7?4}Z@(8}m{rST=KcOY{qtO>IFS(TEqCBOEv?nB#;5a2
zGqF7UL_!v4Y|_}uP>qlXNiyZrVEgU7_B3KLo@c~vZK<#o&2f`1(Ut1>z(BoyBG$ua
zLSw$yi*RX3alg=h7QN!WOeV){eXgnA-BQjjA_+rj7;7IsoYP~87WD*GXH(Op<Zuu-
zC<_ETL&9)a(XiT8y-P4Jn1tDuP?LYCtvKH^szK=<vxwr1#vBXd^hZ9rZ@+@}dttfZ
zv`rP1iJ)R2XSqMp@oH1&QJ@(vVW;8dPc)Hv2PBJ6fYfg59o}-viPIagTxLc-QcKij
zGBB&G#3~FgZ>1!Ba`x?SwTQF3Xx?5vyI%cz5a1MQ`5yf?r2LiTp9~ZKeKE~Ht?8%!
zqx}D!Cw8Ci`=a{n3)4siJTgNP&5AVGx*Q?or5_pUC13t7XxlQ_rlP|^!ZgLYZlBSs
zl4I7{uIESzqivRx!|$-APUT8<jqPZb_o`d4DA}yCsl2d@8wv|%?q5N{umB<?S{{0f
zH$ollzKwYypQ%NdcN&9)RUcj}6tsE;jYNap-E*MIMwEcE^)U^&hStkr2CcPcxEmUk
zsSYW916{|AV~{{gP()YPwXNAVUWym_FvYt#Tr1lb4LF4OdpuZx>_rJ=?4{VoV_u1l
z?n<&}_?_6rum$L@H+{uRA-UXRg;EpsSBMcgd%5v^89H)2l7o&GXznPNr_p5927t`5
zt3Kx&mEivkT>V$R|F;3M--nyOh=aaUxca{5XSgz>M_>k<d)Z~=PTsUMY6f3j|8`61
z9#<RsEL`sp8yrh0Da+9)@FeNIytsnk$-vymsu^C9d$7@*V0XwTY_mTn7*Jmn$D+xj
z6Yvd5XIwJO(gP8PUY?gv+pa|bFuRz`C;B<6U^AMpq`QbFLd;%IRS0wO-fwZ%C8oyF
z!5lu}AM&g=Z8D-U&a*xPT#4q<=N=i#`ptEO%l6dMpn{6YN1olaerqw$nzLMn1dOUk
zY7+Lr{-~lnlg+(DG9_CuT-=*0Zr3!5pd5T%5Cl13HY%uj%9S|2w<5}bYYXDMD3-M|
zTFPGY`zQJDI{)Rt{GUXve<fi4qE^79vFuP*n(m`!p(w4t37f6$P(WrigV`6vnMa$2
zj#Um3CI}ZzIiXvSlekxb@(7MBI%FVY_i?2@HZPGp*Z6w$zTr@$p)SXk_NUW)+tYQy
z^b=1`kiGr30OA!sEq!^p>G`ZAdWG}|usrcF9LLOiIn`CYQ-#cK>$Nh*F|~3%TUyZ)
zIOEx08rl7wO7x!{|1FW<A8JUZ4j^ZQo%{+4_15j7U!98MI5%)@FWDWPf7!Z&r|Q&O
z2a7kW&7M{GD#D=>mBPTiD1%qUr6pK*G9zW}j9Jh@v186FnYtUHN8YvaO{K50EwLdH
zW);eQ87c1GC;14{5@byNuHi8HZJIKvJJOE5lmctCz0>|?(`(&TtU%C8U*7Fd=+a10
zh`-PIwBVz#b_t0<3*jJjWh2@WRRbMb)Q!j+C45dN#P&Cw21g3K-y=(Qs;~@yCX?oH
z-2^qLc|Te=_QZxjQZq!5Ax`HnreTR3+-|p!<u;Uwq#U<&p-iQ!%j&I`8JGE{s^iB^
zg+(eOmD>s`1wG8i?LNGqb5WgR@P|fd3Qe0c53jzQI~|shnMrU-_p};yU3SHBr`F9C
zci%kBqQkB%S>j&8te)WutYXx<dFSvN9x)oC0>hDXe`mziW7A>PR%h)IR~!<1Wioxm
zj%H<8i5(k%-E=F68DvY5iwcyWDNGsVHb5g8qEKFITBug#;KEXRm!SUDilX#AH-2Ni
zk(Y6aEyZEHGR>9wp}Jpp6E+&XkIs7a)-!;<Rt1e%Tzr{>yfz^Bq%=|c4Zn8EK)L$O
z{kPy8fd`Po7ib{>w^QRd@;Ue*B6ag>pLE#cwD<0lWr^ogeI)uWbV_$~YqvR?6i;ii
z;_Rpv2|4ouO7yg~n>MdUJ42=2nozl!z+-Ry33L5t;?cJQitR^CMPrQ14I_I)!@Tz~
zu}5GHHVP(H1lF5y(G#bbygcW7U5k`+-;2DTTDc0>`|LyglzwJ0Q^DO3X{QyiGhb;E
z@;V%=2w@>BwHK8wHNkw+lcyNzMhbF_kdSK1y;v8E=lEf1^x9Ngb2pY(CvABJkl?#-
zL!SYY-sG8oOLF5s8)tv{IQf%}N`BQi*#L1NyUanD=}@nD&Y2Q_2l3*uz)xrko-w*Y
zPir^>O!Lu_DtfUXGj)qG(vn$6T^NElLnBUM57sDy!*PWZL*uenDguJQC$xwSkJdzC
z*%Es*b+6ibBO2$a*>(4qKM|)_^Uc$iQ}doie?0R)Hdp=S-Td<R@DC{Gwhej+uIr~<
z7*-WgSuFf5lR9W?5cLO$3=T#z5p&6BCOLM}eCn-X@ipRS(jpW;+n!><Hu^k+MR#$B
zvAuSiV<X$$EU?9`aggo5g3i^2qA3psARC^VU>y_qA(X{%of|G3ydR)ran@X#+&+)I
z)8Ic-CL}Z<yb{>;VhczJA@Jkxw?+{?B!Dl6V2Oh>4tngSw*$sRUBGI;9dJR?ir(nn
zVp%Kt3d$tBsiR$(2h%IFzOMir1|@I+5&IPyaMU?M7X-93f4v^^Lk(zC{(td~(Rg75
zp%fY<U2=Jfgkd3|5%ZgO5?SaX<<t!whIeRZ_geMr)$3%2>;s-pkoS;M%r-k?KY#~1
z8v|b0ouB}IZPe^`=oVm-)3Y>5po@8PFl>6iujpavO@?)$f4Cg{L(Tut4ST#eMp_vW
z=-q&|r9vKhc_v6B_9<SH7K{rV)Yt{(&_kI_ZV|}jYu)-_>P159$A@K7GqYQ^>+Y@N
z-5O78#s9y3;XiAX?7>@>84#i}#e;G1G(ZcLbzz!&Cq=>u0ah%f&=6U5IbQlPA&wfL
z6(?Di_n85&Uk%L{)O{E;gQY#hFZd!yGdv%~!m_`Dt}uomU({&IfCKwuK*^J)xZ{f>
z&kFYY{;2zX#qS+d^(WV(|JUw#wg-aXItl!dKaV0j$SL^ta};krQVXz%$FKnYg$=M5
zVu?7wP;YgU?e^L*uf|otXQ+DVE65mjLw904<D^~E|MNZKXMOeu<N+`${9*i>qN6F*
zYgk&uB$N}$WJM+1efo+j_Sh%aMqNk{Z<Q+5X3cvtb%_5Q^EI=S--MogdBTzZT_W||
z#qz|ymXSUSQ%cM8=CgbC_*8a1;%FM&{F95u&$k%(eHFyit|3m>r+HAe3_`$>!v?8U
z?Mfwg3J^+P%FkyyYnO$nI^J{N^~y|rCzQW1_szr!N;jrP&5=adJ<mCxuDzTZP7=OT
zHY7QAV2@;d!EsRjt^<37@piOP(h(NoESZ%uN>aw$nCeFz_sIF|=MNv_DjP~$7yCeT
zbr2OjI~8rlKIT}4=J5!8d(ucb6a^z$W+D|afX38pprlyToinw?G`d*w1hc8Dh5n`B
zN=pdUm}Ri!8`p>587tWQd5d)Iu$KHlhWX1E?xL8GJI`a2g5T1mwDJiPA6$nn!QHrC
z92%Ol)AbB9GfC732+lh`GI(gUlr&OW#Ht*Zi*+>oQ{a)x=KgW(?&~szzwI=>;A7H4
zOg`}L&8@cI1^`|UUX?CKd+C2G7YVjRj25`0PBtr*Hb25J9Vt*W^gQfjsXuAR=hCIa
z_YpP=qf0}W=th)WvJ?><9(b&Ob=%o?z@fINxW7;&!+%uoh5OyOy)8`@YF5ZasR_Es
z6q=cC4UmwTNZ>GD&UtH(aMT%pp?sy+C*r%wTE{}(In5<`*xbic-rs&2q_O^eJorM6
z3mKzJc^2VMf*i16GlD>4-zsJw8VSX7a|H%*`CE{02dYv55s47;oCUQmW-+x}yVreU
zX5NCIm1|#8JR@`>NCmS8WlJ-mn$D0K&*8U>CNQs;4Fj~^%`_m*z=@OR{JG_?vt&Q7
z4iLzqpvlaDha^K(;~*s!UtX2KRpRD88?HKZuuP{s)v`}5>{6rRg-IW!W$_@oE3g}f
z0QxXWzo*6|+YXs!PMIEh_;gOdd$n|MFg1htz-~@+k>K)xV(@4WV@X`4gQ6QlKTh4b
zHOUe}-^oBnqPS*^ZVZZer5PF`Jj95yReK%~!eG;zLAr`NHWA-vB`r5+Dsz#ZdR0TK
zz5-Heit+|;1>e2VeX7HMf0KDvMnl<tPrL+GD2@o>n4n>rV6Io6eGuEoO%fC~)mprU
z1aYaV`}MFS&wT|Q31mZd9nnFSYGQ~LVcQ*kk5h)QXKGYbu<!8E{HN}*A3lDh*p{~u
z7l+;-C<XO#os2UtT$DB!P30%wkbcIgzb?vYtIHiHh~5{SBUmgzZ=^*j;!jao;#R@T
zwCC{HJMi~qw>zJo-6t#U{?ePfA_*Gy!ajb)of>|7=L40-gMIkt6uH<M5$ff&q+a?}
zO4Ym!{Cp2xcE~SoI?sNWXZI<ZJAbRtb$jVHd6;^zCfMtHmW-)Tc=B)x-Kms@K~rWe
zUeSzu<RS}Gu7%}3ei@SW%F5tE)Y15}{O<9bWq9^JU^ho~umEEpGGwW#r?4F1E!SA-
zGmjI!n159=YgyE5j}`2u*eTK5?58cd8*#Bfp9qxF_h$)F(yhiqIZG%u)?-#S!UhV5
z!`bpkY3-;Zm~zAwvRK{mCHfhv^2It@Ezt_a>YU@6E-l1WjW6r>R#j599%(?hW@}r#
z4(f6U6HRCvIV*T`1NXXS2MNHwQJ1!Jg{iHLY;C1+OnQ`TKkwD;Ct{=I+vAEw)*GRr
zqWY;0yHeN{qCs2X|CQkEZ{K~3Eq&`$@t5)rjDX&$w#<l3{#+0@vq_NJ-amx8|MA-=
zK{_15(Gokk%gbXI<BwgM2XTLiR#};TVzXM`tnr75+TYWk$6q|3H?P0q7uuE!2rWeq
zocBEJ7BK)i-p;MpoUk)#AUMReHM2ADCS8M~Q90!ij+dt;fHCw-&yd&)gWgH*w}h&F
zm68z-O-<nqzB#X#>-nTUhGGmr+u)~l3S{(k_+htn-Vm=&8JUIB@yln8;3l<6nnOyX
zHz%&%)k&?=lmn;3eD3$T^Vgdq3)9TE^B59f4w|lIMISj3Z2kGvx?G+c{%G&&<;NNm
zGZRpmt0yx{6UUw46Qbqp&$-DXmIpP#nO?AliQi~*>inc`Ze+NTxbBc+A4<^q`LnDm
zxd9)OY$8oLZ+6TsG{2XR<x*9PJKa7d-nztrI@gN=Q@Ms4hYwW^N0jb&Q!QzeB_f?l
zWBf*6C6Y8J4|as!Aod)6!cjSg3uOq<18Bqf<&BU7sIx^qY3FGP(y6_puD!OpKF$&r
zE+2gz)6aWfsc_g+jjkCa*YTqO-Ta=fpcp*AB|gbMFAi?IrTo~N<)KXOqE*ePJvQ;;
z$!eC+XwW9G9bpkImnuh^ZNh<Egp||!b;>9@S)|Ww{)T3}B4*JlvK_AK#U*iU5t6rK
zxTaziJmL8RAXE8*oiF4$><zU+Itki*M>);Fn#_{ghkZGp*lg~*H1{XWa_8UL*KV>+
zQgG_$CA|b)z9tuL@Y(2q|C+=7xBj~0XLiKjkMaNLIWBQ(J4rF`x)PKf6KN+2gr4?R
z)fLs1z4jR`6N4k4$_>I1{whoAWpgm{5fxS3ONl2O*qU=&>PV<@O>ZfD_k4O<khctb
zhilpT>4R_h5x<>1V8}C|d_eC>Y(BF6_{RgT6Gv&h=)$=nYe~8R)#$Fp+G}f@8U;%=
zxM;5I@J|2Z#;Ust=Tg<#cS{lGo^U)EU2~!NWPjRxZaHEL9~nb>dolN4=zZbg&c<(?
z#l26=6%uzp?@SnKgMyvZZ8`!a<=|q~Z+Vbply!T*2-Y4Nqs5r^d*B{;=rp8$e&)0*
zT4`>11_nXG=Ek{q5Z53(P$Kr(NH(4DwT&k<AC-z%(mjhQ4X+trLF<~F;gFk03(H&3
z>?IzD5gNS1O8SdOu?@g_b>z^~Lz)=bhKCMgu?9e<k@Zr>>YSG`OIuo95g{0t8$ST-
zC~|0%x$Jp*Cq9N}u%dsmR$qO*?d6TW6d7znwq&K+&Lp^P?;j3Se~Cr@)erjXhyMd)
C&2Rkx

literal 0
HcmV?d00001

diff --git a/docs/img/domino/surface-training-curve.png b/docs/img/domino/surface-training-curve.png
new file mode 100644
index 0000000000000000000000000000000000000000..992acbd424a113cd94a35bfe203718bb68b3adda
GIT binary patch
literal 71152
zcmeFZby$?!7dDIt5(XWDl+uk#cPJ7A2+}1W-7PVIbVx``3o=MINK2P=gMf5{Gz|6a
zIp=r|{@(A8@89pbUcGdN;hDYnv-jG2t##k`dV&<>B=2I9U?L$Q-IaPFp@@WZQy&Ql
z*#!L-_yk2<Q3Sl)uve56Ln`Qfum*ncHB^%_f<TZM!FzNh)El@+HxZYBzeqPokWl}6
zkAx(BgY@6`iZ|&0xds^tDZms7<)3Rbz$@ZU1o#KO{m<*ov>X4mn1=lO>YMs$H-EoJ
zHbH#t1mk`gc)_rFp>B_aL`Z}9cSA~%`WF(C2$Gb9=&QFkHj^=Gm6a#1Qu&eN)*kvi
z?nv{x6LixkA#>5|<GdGdt*Mz5eA2v*MC0Q*r;L()K_&C-)1gfWURI?YJpoG(5e>&P
ziPjIID8AjOtTt>vu1@ne_%`jgc~|Zk@7VYK(oXF=+N?X8G(RbDw7ohi9WDKij_rkn
zLWy((T?7gFueT9O$cBuT&29e6EQ#FeN=i{owUXiFqxqWLxp{ftO%3Vq{C(Z;uZR@m
z(TAXBm623h*gnilPj7NP-VQ}#I!eic{?W8Z94@`;T4<=I77ezZ_9PMhz5n_IZAGKs
zplcacfk?h#)DJq~>+$_#RMpWUT7jkz;v0YVjf9d;hfSof)|?&y!T6&$Boqls)|PKK
z{^$eTVilDacK2Ol>34rLPl-+Eg&mUd6Xnn5MXb;%TkM)YDqf!-FxD0f%9eS0p8shh
zf)p9O_=gP)|KF+ndx~tq$2^#F|I@~zKav<TcM=2Y|8$uIJ`VH7`=2)c@38;B&vwl2
zlRqXq;@!9OXCa%7XcT2*I&LiDIbpLT+*V0s_TM=wrmSil&-GHgMBd?kd#I8|CRtAP
zuU!{lNKiEm-@@Quf_RBA;DFyt*FiM@`TaB5Z<^-AEk!I`dat8eCD9Ou>4hG>Cd{N-
zU_s9Q+Dq;~drYIma6in|WeIfkT;ZQ(7-x8^v+2*`@G(IRTRcwM|3#Xt=qXi;*k@^#
z-zL16<OM4y=o;n<3bXb+o8mRbfA+3({um6mTTNDS$oTvFMC=!$-#nT7S)lA4sxoB`
z#O5uE4wNr{H-Jz0P7FuS?^(>GSlTaiE$HEhoR>>({-tF<8WKjC`|UiO?n?4cLccG6
z14(1)mK{ZPVE0L*>1d#IV!s_p#QyH}m9g1#%>}MJ_s1ijB6bYnlo@WPMnP>4RXQ?4
ztHu90FtG36sQYT@S{#!1`msL}3g5G|{!1Mk{p58ca_@h3Sz0{Ci6gCRad$fdr1g7B
zg=ktSO4;RO?G8zqr((?7)|o-G)sDF%BO{qriP{H}6wqkxTLQ^(D6wq1RyCU?A^h9b
z8wDzkS`8j`mJ=nJ%I<Ctt*X~xZ)cKwG6a`C%S1t@hnA!Sz&ZBLoifdlRtTjJ`k(>Z
zOGFQ+qdou8o>AyHp1gqG?8UY-|8$0Hy=tLOvPZojmb4cnnQvsYK-+vw*RzhyW%0w7
zOP;oAFVDgqH~3l>q#;hrPO2t_>ZK7fe9v~$G^<J?fWqVJ#jgP2@8{pzomakp#7h-F
zUt;r|sNW5>F59T;rG~c)U+NjBxWF7YMtdNh_l4Zw8kej^8e<l3Ti5RTC);%~gpGM#
zu6VZf)u2%Ni6J4o$ovG~vCXY!V4WR}E6btAK^0HSFMqv861iP<R6r&ZXI<PPW06_v
zPSx`GU~jo6(Q+h5`M%<t`f|&uitiX=U(b5^cREHIy4PBXbnid{JG+jWZnJ%LU+GOI
z6@zq&p}a}i9?A-@I~>)NTl}8D*^QA>vHHXF>SDL}?dqq1iWq_1vn%*mgT)XGvfV!%
z=Z>p?b=GiY4zG#kwBD&Zwi@YmFL2B4@^~;XK2E%&X-kK27B6VivIDbq7V)#(PEQ`k
z{kT;?rI3T-G-b&hr8Y8~b5B^AL-)#RzwO~swmh`U<G5xv^8t4W@zbYI;|EunoO1e0
zjr(2=d}@OvJne+u)vR{NhAuJbVzBH42V>TN1>f9lr>F$W#!=?p*|B-D5MF6sP}TO-
zPcD{Kz3nk-=#xXArjy-;=?@IhieovRITX5W!yduaG@p_l*5MVyk1+%%GP-a@X{y*h
zM=S!AkD^G(G|xoXEKcErDvD&H22>HK@5D~|dr7G(n(y(Cdz@{>;a2$ui(Oq_48f#M
zUw3?%yn@eOUCfI2mecTK_*O`WQKJ&Mz}m^(^6;uk;=7uL2n~XYLP@##7>Bat!;`|=
z>X;q<h%u|S{W=o)es2#2Efnb{bx9>9rNlZIgUn06FMBFA35kjPT3biIR&8MF0+d3@
zrjl;K4+P%YXw?dzE=JjYh}l*T;zSL)I&#Me&9NRDVVkP&c7*zDDNq<|dM<s+u06#s
zv5k!E8SjJraG#gd28KUK>m}JWxP7H)4V_VIX<7{gHrRHKJf(ZN7e{N1yr-B|&g`~X
z9B?s*<rzq%q^oP)03RuoHLufgSt8J^v`XG88<MLy+pd{9T}ibzXb%g`I;7KRhM7iU
zt|ZxqK`-}Hk1Ltgj=+JW$(^}DCwv;zlPWw*$fjd)0Y5C2cb~^Qs@s9G8O|7|LMx!Z
z8qtnE_PMvQ*8XaGTc1F(OeWXyu~Wxw%dqRXY9*yqRTnzK$i!rMusU#bSm-&;iyI^y
z@305oD4g=V81t-xGiz3SfBv3$F8+HG?~qEKnqHi?(?_>?#Lmfs!D!dkTDrkOG3)`x
zRr6x0YQLACj`rk48cM}l@`jd;RT;XtyOSBG^Vo%8bEd9FlY$1Odp0>XnJ3<bJS1V}
zI$zo+v;nKU1bvrhgbVQ*N{8Nn=)U6l_3ftRH?JGp0w<}x?-PIScdjt*_>)@M3<#f1
z^jVA->2*rU-lJ_%H&7nZA8~fut}?D63Kc#aRyqQEc;JG}q%KK~2}}Ka@%`tV97Y`&
zTq4XG|7o()+y_bW;y7K8iDb_W&Dmi2PZsjiF#)g#eJO%dGtgsgy7ad9_YLsq<jP8W
z`MMoa7&}C}+;-}WCCMD4NwMJkqhE*K4jc@;%y`cGDX)RlnnOND{*~?ia*w@%hB+TN
z&@4;t<v>WdjawdUzsXTDZm*X}<Oy7wvToSo+5EZ?W_5YydEV|hzStR+Va9q2jvee5
zQX=Ly-i?Lr7;QAEOpqMTx1DQW{Y4nC=^3eL2j+E&t*e#@&w8vq@G8qr){NF)VGv^w
z)vcoXpakEm%hPonQMQ7<{8g{x>wf8ais}D)B4LIW3u-=^7ex2L+|}h-wAWyWY03Ug
zD2^gDGMRIAqk&C{ZC+K!B@-`9$N)+Z*FPNH8Nyc2!-{_ay~q(<Np{xp=6u0s$9E9P
zy1USA5?(IqNF<(lv7dTn;d)>0UWxmcWL$2Y7x1USWAKbLAA&lqkazc{<4oj?=2CZ7
z3t^8g8?KJ8w9X87rRiG8-8MP)2_FSjF7{8@MkUGr(lBT#m0uDN(_cxPJvY+AUsLA|
z9r#o~#8=@UdSSm+K9+Dv=uOyumz3-4rIOor-LcNG{%nRMR$Nacjm-Sv?y1=(JfC2Z
zV>qB<y+BBLcEdn|ebw_D-{nD;a23Z!9r3N97kMwdq-~_m1%VkgPHm@5!Wx3EE_bfv
zz7ie!wblCSM`WLzZ&`cV2eb`@a?hBQw9y9ihCwgLOd%=pC_X|ntNlD5ihA1iE!cH?
zsTA^1?B*;U5g9I;Uv<0`lk|BT<IUZ*h&4#IG^_&uvTvPx?p|H!EOqrkWQX*$RwvIS
zi*^Q_W4)N1mXl9w4hChWRUPl**1cHwd!8=I=2A`f?laN-3vu>09oTg2_^Hgpll&bA
z^Yqrx5{os%{pCb!Cvw8W7_BC$hlNi>*-}avI~eJ@V_JfB9F$|dnGHpEvv|}MsQG9l
za&vRH_-9YU1P}VfxBIJ@&AM_Q>HYlnuGcQh_&_m%u>%(tC+YVSUpVMqaSVFYJ{~W_
zZmbNip8|C8qheEuU^=_<M5k@ZFEZN}999h|%RmxiV2IdkJ(OiaQ=hdBVwe1eA-W?*
zSxSXjU5SSJFg!%(t6^tkW?w)WyF$`<*W<jCD0!iY18%Cz5|r-2&uN}%9tRl|RrVK>
z1}b7=0kikSrk<XB6~D(zox(K~1eeo)_D~j@)4-x$wn%m`s%h=0@&H2~<BdzUEYUEX
z*^X^H*~(=YxyOl~aq-bZZ3;o}JA+GjDz(h7;b(+O>NKsdMma8$W!U55;->cdgm;)j
zJBQdy@1M=n;lLboN<5@Qa;M>%d%9^fUzM?tQ79u4yo^jw;!1}Znu3(?dpODQ@Gcv(
zni(6vy)}z5{P@E21zRr)y6-cQvrAL0HHChj7`H3;2gc6~ac(>POcV8a@D?(SajWjj
zs!W~42iU0zUpkiSsj4evEXrm8p9S({-9&-B;p$>XTgO#J<+i%v!NdHwb?>F`tok)P
z5_KGn{&w>o59MV%>`|u}O-LN9XYCosYJ}L@q1YE9p1P;E$kI)6KEo8(XtevBs8?In
z2=CWvbp=NXWXiEkV<sSHtwjsb>#(imgkLJ0k3Hd-0-?h6Q^_l|GMhOu?&Kli#GXRf
zoT+pivrdk9iL<mi4-Gm2wIa2K4;9e@ty)U&h;vS%&Nk;Xr^&Msd<_?Mfwj+3^WNMi
zwZyZ|vrtxP<m0_=w(wb@lb=420OoA?cv{7&;OK#KhIpfV;SkTr!B`l|;iuGj`nfFI
z<>nQkW8Re;%0mwg2L^A4v{_U$6yx%UT4$3yA@MZxX{%taPX${Ldi3E!nz(!Yb2tM@
zE*$>lViy1Dp_=DPPNHul%xea$@HT~q?kB`<b6XO0368pi?#&$x$vZ2_V)1XXpZ#ja
zN?v0BesLGddoGq1fw8(Ib49!KcokKF%>({(?MgVVdoP2P$;9@TVfolPC3)u%-iYuP
zbSAEy4Mh>B<G@zrc&`VpQAWrCnUf*ZTW8NFh>8(Or>1XmH#SR1@ubo!di^eZyT6cG
zC%fxj{rmc>%QLbrrJ!;rMsUfk)Qe5yi)_^WnD~Q@`GY}ne7(yXbplRCcp`NtE^;M`
zC_WNi%=DOh8pL-z3V9%YGMtVwYs#s;!whP2dsUM{4yy`8TV!@9Lmeequ1ufzV&g+>
z@$$l9STpv0f~?Y`RcYw^YX`zl>H@PcC$D7Yr46*3D3yN=u^Dbf#ouH4VqtBV{Mb<1
z8-3r<WK+vw;MumKIsV~GYUPMPtYG%k=S{k2i3|>-c3b;qJB7^7GhMl!9d+oph1j}t
ziTp|w&NwokCX=1PO&0bi9LKbMTC4VF>fEMKiL`!R8hKL)YS2(4Bh$PPVbiH1)D!*A
z$UhXK$8K4lD&xM^-(G6*kyf*H*vWQJvc5FU<5UJ@5HrD(4`a>ZJ1{>#(vA>`2pohn
z?Br2uPde;oCkfE!!wyb=a{C{RZJk2m3hUdtT|dQ@zpi%`JsiqVyvSJT9v<ePc#~}=
zk4+b(qOMJ?PmK$WXzeRxkY-iweL>Amjcb_sVZ%wdT(K_%iogA1gjN0J;(^%4!GYp~
z#kDM&^(CQ^ZVT47(3LD*i0I+QNaX!nL!BDS;zc{u4%D$v{60LAY_pa*@gry%TcHcl
zt}f~mI^&U<O0=&3ShUR%uM?&-h<YJt%A#)bJxhl4c`dUwCOnc!FNO7ds^KCrk&Au+
z)*uRbr?R>^K8E%PgPr$c0F4^jf4WE~{88bYsNtm5*PImr8Tf+KY?r`4aF1;OZrGzV
zE~rLwN~5XilYgW{;A5)|t-rWin`<7quxE(culhEBr9b)+*$i2k=Z$+c(v3t*i-!Ue
zu}k~+mPyOPOt&=0MVp?B$);?U5%y>%EgH&;vgJ0$L0dbkpE$0+GUgcKBT~;gP-N&1
z%EQRcd*jObo>C^xHy$N7?`;A}79|?X)%l9>v<stoe9i8zryZwN(<VQpD*7abzMp4|
zPb@Sltu*>IX{q3t+ONjKrM<Dv1`-NfefY0|fe5qj&QHU`J|AzH$tSwko5d07PrQfb
zXYwEau%B+H3ep@AjkUR8=|o=1`T(~(^+Q6@KwlM))%iY&i}P}qaWp&V<?cgWbO|ZS
zp@1#=?w;u5qrSC|iUbpNH9PfFklm;!H+il5WeP_dSN)_U`^8>$K(I706(KR-D=anW
z7WNP4W!O$UplW+%5;1gHcy>DJhj#F;LQ@ORtFzf;M_Iy~khuLJlG1$Ly?xiUW2N(2
zR9Vx-N$tZ}N2z&`s4ji|Kqo*UVBk4DqYE*ODvDMWgK9^=CbWiZ4Dn!TnD`*q*PUj`
ztmJ>HPBnvD|KimRNc(kXrZ3BWw&18j$`MO0@w?GxHzq9mgT^CT^Rm5>0BQ(iB;byV
z?BNjYe$ai#cOpXA{ZFmbRjE7d2wuf@F}EG+Rq4Or#a_MhPFA03sIE7fpzR3_T?=*X
z(?iJZpG!C72+68;V>e9Ir5vdFQ_ycZs4mXw+9^93XQ><IRQiTEqwdtvtsCk-6w_U;
zGp=70c+(lrVc>rO(pB4ZnO?M~5Xfn#i%$|v%k$!}Gu+Q^i$$lztCR2Z&$0KWWa@k&
zS_SP88)+S5xSWcxZS+`EilEyhfvVPAp2yxVx^0pbQPq{1YW!P^X$q#f+l`J&deLNt
z<n`KHmQm~TV=v*ZhEI+T=fo&Do*t@(X&nrRg(<(u97zy>p7b9)``by5IxWvXlQmpS
z6!#jI2=o)##rZ{{b3R)w6ucVY$VVaG34sig+^+j<wm+hKK2P*aFq4j+o`#QBHXa*J
zJlvgnWIK4fq3hQuJ0uY^ILHCDn>6(vZ*)X6dX+$2{1CoaP;Hp&hKy=hcpT4pBbLWc
z*rzUBT8{1HlD8`aLYBX{#};ULE+9&D{9U4!@P?~1;V~3TH|&GY_bpWMS7w6z_f%40
zQ@=EiFC2u~?C|3XMO7q5Dp@v?t)nJCbS6wp6z_}~n}xz)ab0-1Lae6Nyj!|QL#dZf
zb#WBg&>eVhT-gyCKC^rIJo->R&RKHIf`)&3eRY3r{pFY4m-~TyS7HkV4b5UFn#{Z*
z@j811WdX|7js!3BlRFP7M{gH7FQ-8V2mLHynJ#hDUE(T)!p}HH$|NAkk%rF$m~}fv
zn>bEd#Z<l&`8@DB5o~&cu|LYL&iXhgMk|~YgTp#=kn0B7-9|{X5--I|kC&nL7#zae
zwe0fBp82VJw~gzv8egsL(O7+t=O|#DSx+p`nc-tD3@$lfeDma}yw2;=2!XMvJPR(Z
zpgS4P5?b0eO|X7-F_~lY@Hdv(5iHV)!LehfLEf*(yA%;4OTK|$f<v!<DE$J<TwJFi
zE8?ghvw(l^nF-lP=ct0Oy=4=Hxq}`r_2YUCQY9i_t)}9-TgB0g@5%VN$jL@D1g%e9
zmts`st%e$Y`LUHHJSQxCu}M9)0IR6rs>diFg_(wbSxl=}YJg*J^5;wLi6ZmznlS&c
zX}-I=`igDGgRMNQKu~eI+_c(zJ*C&1`gogBl|lk@E;nY>ASfsa{<)OT)GUoT9~1W|
zfiNecXMC($=%VLEVCQ-a<4FT4#uGMgrOZe7{()rQol_~;iG8nuj>_Z-d@vQ)uX-DI
z+|tcj^L;CPt|lvkE$y<(9Uha#sjk9hIr+xUHveuaC#MpdGw=m@$u7$u)pw{e8GKz&
z_r?17@n5tyY9=oVh`H6Bn!d}4f{UF|F?{L0WNPS>)FCX#vFF2C87e<yYDb%Ktkca{
z91F{It6*vi<$6qrRs~m`HMNZ3$z~gIPVkSgx?;0D*wXDlq-S*FPg6!D=jqYNMI~{w
z+p3SfvrEp~gI;;F5Ii}|5JE9Yl8QD>`0S}-o>1n|RKqaJ^J^6L%H<hB7NnNO;xU;h
z4g*tWD*O|tI{diVUTG@DW%+((V+H)h{(w)SN$y?X(-VX=m6ey+lb}Zva;v9yOCk~%
zYsa@`%T-^$I`1I*Jy)CP(LYEyT!@B3GIf9@4t`?Wb9GVdKj5i5Uq_WSx5pG*MO7tT
zSvOSO;sp>#@`_j;&w6+LB;Vs@hyq(i@?DMD3?dO*<rkbR11lH#*3j;<cZIctW|klC
z@ji&ykA}ltQ@_`Jp>s=w4}^OxKEN|Ei`RPV#-x1y$tmUS?e}?iFmWGrrnB+p77%ln
z*nQC|*YuWo_$rol*4ZqeR>XDetdXy~*k^RQ(=MajjY#LMUK5Y1#^J}Kb47)OMasD}
zhA`KOs~;;uD|vh?sh%E_kk3AS=Fp#g9!F1<T-Eo>Dr0>o?(uI%P$D@t<Q{#CJk*qg
zrCCxuo$f=+*BhSw&;~|(x$WFwU$98CdNE^acamO?ZD!5x;I!GDkvmGip3y~WU8{$?
zy6zULdBMNL@(Erkvq1r1Vi6~lgAW~^^wPcA3Zu!?V~OVI(YQA(1-7sJ%TXStURlZX
zK7_@2xJV!9W^uC9G(An0QyML+7uQWAIIox4PWQTcgypwa!KL4VK8C6Oin__+0wlO?
zRXoQ%L$`H|y;D=@<wHM?p1lz0{N#n=&m}b)=TY~?4F~oWfepdft~&L)q8V7%y>8xH
zEkoaTxyAB2rv}9o)~~$qoRIi1xuwJ#E~jKxeorP=RQ>6YAAnl=3aZz0z8>~@S}5q?
zV(!(hOy$?LMf$+zQ3Z8fnPly%9med&(@j6XHHU@;Tu2X8ctQn(Zeb*fVcJE1+C5vK
zxEeovchadrj|0mt1E`}tGE2<<2aC4)ir|PdSIfct;xG01GC}MkQVSAS1?BCca#Xvp
z0_;^xWLMKDWqw}z0&Lhzy1!<M<i-{${f*_ZPcqSKS5deor-VPIj4cv$N>g)UtO?$i
z2qo+R+|#-$&b4mbNyS$}y}f9E)TqA1qf<XU&HxosJU9kbXL@?U5+|B2=1H9JoNnb|
zo!fD*T5XG0p>vxr&dg?x?ZTOyO79zU8QAi=p*A5KL}r+0jHDn8MV;O7Lljbk-<MKP
zx{{d}4&9NDti5$moIbOj#;O?19ggWb>%Umwp3sKE;7$&kX#0E6hmBborBK}OQTFKg
zd^dz0$15LcvO!N%Qmu{aFa3Mibf+d}Txs@S9BciBybs3cuI_$ho`lCo%4^EB%=mOk
zTbb$Gc&%kh5ytCdiS7;->Ix6d`REMRp*0t%WjzW}lZ2<`K8yeAIPz6$n}0UI@<RI8
z3z4-|*$9)aMgO`w7wIpn^suzzqLmakryMCVva?39ruC@TDM<jb3_@9KRn;9O@Fs`i
zDcPdlKwreWAxy)Ox*na)O2f^`f%jg&HtUdCDNBxXi)Lvo5@m`(CZ4NI1zm3J4s7nh
z-=~3s8A_SlL?MfmPN|j|vk4mtrk_ZTX>0wqM^1V}Z@_G1Mc|S+nuInQuU$kbYChT^
zAEnEkVRAD&mAw```Ih60LDh>PIewP)^$6j=xyiKG=tZ=7ud9QD#nK?irzgknvCT`T
zF!|plv6$TL9Ba6A<VGEk7r#@n{?iA`si2~+X8)&P?>}6PpNNQVlaFrgR81@ld-6Hi
z?Nkv>^M=W(rlLJlYZt#*Ien&+9g(6-^kT}pz4p&vr$Ud40m%EkiZKrR479zJbo)KI
z;N)T`q@eR(R%g*)M5rEn8MYLS{LTc!grvA7$9Zxti*b2lR!hna^4^I)g8w(KTJ?|;
zt9GX9c|q5v1#jl-krx#&V#N~q`N`iFJjIBv9cvlfbkex{H&sb5gxsmyw4d*HdTA&$
z`}QRTdNlG4X8p|m?lPQz&RVc25nibzu!o#1%(_|rOV}9CL@D1P5f-C7(E82dA~FY2
zO8Q#w6}?3WIW|2ZvRX@UCY>qT?=k&F{U~6gyumRLU-4dLWW@M`D#gdU;j6}RNNf2g
z$%*NWB$h{69VCsw5C0>MTP2XFRAlR<G=B#MfBov6H>DRgQ=nCP<h4t|amRrp5=fwA
zJxKTm$(cq8$X3ks#4pr;4p9~bTfcQUvFuNxlpY<hs@|)#|99BG7vld1vz=BFPa>{a
zLI>!xbE=fimLNQfCf}gljh~&PnHxV0_u(v~T9p%{qpviFY@N}f#y_WS8Z(8D(@}-p
zE?w%5ohVuAw(R)X$n~}ZJ3>O_^-Qe`>sV(Lon!>R`eXkLikt7}OzuDN&sfU+ald)s
zvC$pL?y>N>FSCz|0-w!>KT{WL?H7VQBF>?`hpscpi^Wz<q&h~|V*95?AfBp)81)nO
zm+7yqjO{d-9EJkcJ#d9Rx<45Ce|CovQCVi5s$=bN&Tf#H#om1NiQpg1Gt#>)V7yc6
zHlrBGhS2KP)EshQc8`K6sBU59ifUyZjm+0Z)a%wR4cPy@7BrFZ9(f(QqwOD=dPJfg
z*EbLSTZGx~G!<Bixa@2Z7Qf=0IKIoQpZygZ?Z54-FbR6ioWA;0DHPLuPk_!p?Bkt(
zX>BAFOhhN&_=~V}!dF_vR%zDJ|NCl$3}|0#kae)63vQK~61dXxaO+J1a09>36>LUu
zE_~sUc~Y8o+f9Hjc9}SB<k|qq9(iGJ5VI*ddw+vcj5yCTkY7JPM0Rn(&bOXjhIG?5
z1g*5xuH9io{;%x_A~0^3fxCX%K|f)=W>R}P`oYci>GaswPnCW#B&xO-=*c1_x81gA
z{8w7~Iijfkn?sQsc%TcLl_i;xsmWnp-E}F!>)(!G16`aB;yrs6o-))<S7x*~&*$dB
zxzU1~W^>IB6b8X%G}9MVrswsAW27l~_RR^`MIe|yU<%m*B4w`5;o+sc#Xk&;HY+bz
zfNs9%5e>=1YqKpK0WL-17YwF7bJSmgYWk{$FSOI(`!t9Bd0I#FfQb#l@+wNy)zl$$
zie0?6ph)l1!Mrk6v-X7FQf`GY-fBizZ^(=q$@}?#T<+hh1sWM}4wq<CST^UjW4*6H
zvJ1E-a{*KHh52W-q{iX#j9VWei^dzERVV{wD=Cjn+E)G9_IBkX)I#l=(dE9>oxO=#
zmr^$mz|K`{6x6`J!?LqGb>7aQY8LDJ0iGTa3BMNizf&xD{9~xA`^m<#zyB@GuQs1@
zcL443;qld%N8NCTCz={N!PyBs-$;n^f{7S%#|zu;l;6)lYhU<7%^|`7%p!Bu5#v)k
zh3Ah7%PFJs^77H~aTDvh!`Plg?!GV~xXF}d<+P%}2%oh6HW=y?jL+nPu4>2I(_0tE
zvz4BJ1grpf>Qpp?5-xd8VPU24`5cy<ud<3tbqNu(+H1Ck^OCwt_y!jg?vPs`cOX6f
z<A+5E5nF}l#fI)t8>#hIKEQL&j4F#UkEeCn28l7h4Y&W-=)vShKL*RAxYO-@*zqV_
zt<q|Gb682f{F8V<HGW||JO@u1OYk5f+4J(0_ER*&3rdFkmJuAu2YP3lrQtI5W+RL>
zAZXrN%Sx>%HfZNFkLz3Mq)RQMk%=AuEESdXGYVB!rEK=<@>niViqZo<lzP;ejO6z7
z9hyb6|6Nu&efc#S%6*QGhg@rcdTK3u#r9wDu9>_X#+#Il*rA+7Nj>|OUPt}qsMPZ}
zTE5vvc(k%Mu|+_KFd@TMH*91$>%I>$o*?0}P}A~V`*h>xt?_qg<g0lWeFFQk0u<^!
zI_|sAn3$MKGs5^7h3>xewwWw5WxW7IsX6XjA(P3^1%}#ZKqf%mr#eJh!}yl_h_PYi
zU^hRf(L+B!0{i;@@do=xD!HDAHm$;9LVuyXk`O0&sIGF>gMTf1G%kVlOW8sj&30C1
zbg)dX@!DW!h3&#~Qckm1Oy-HBL2D7eBC!udhp>!$lS~81oZ|7`;u>~wr!wnr(k;7%
z=w4qJ3n1K@>TOESzg&G3^(<RHRY=PDyX)cl(l!)dW=N$UUvg=v#(76Y%I#=#jafZc
zwJ>b9J6b2`;QgLvsS)J-KIB<6Y7ZI)d*%dCpoDkT%yx!sSj2I!?-l60Lcbp4R8f0j
z%cQe>prLy7A?g9O|Kit2^l6bpC*~Nmnfck-yDu>s670UL1#XNLI`87@OQ+^&(I~i1
zt5!RR9;ze&Nd+Jbj-hok>sK3BgPiZ)UcZZIly~3B>7<bxkDhm>Q`<)O;soT$<MK%l
zsYKkon|W6w&%5vK0QK!y@4Z*>pCm=EU0XNmVdSveLJ(px*(P~`+D8!%hOpqmb!Ah-
zjmko$+?w6}qZ&zF4)td0mgTL<G1&R$j*3g0PBu9k{3}bb$E$v8;}b%`G;C7zS!}&q
zCHi;JUQ4)l5r^GkH{c;D$hI@Qla(F`ePF=j=6@P-Q;a#A=y4Dg%?Ok&C6XNS^+1d%
zxk!<`vBVwjuu+3wc(AseYm-WQhl<wvLn60zF+ykcNqehmyxawC!*ptT)B1;Tk)_(<
zhb_|mgfEmkL8J!lk{2YFLst1&UKLsI^<O-~O5j5uz*+r%5?a938u!G22QFj?9C$p(
z0~Xz-UgEG=4g(&I`j;zV*tfA`52cYAlE-%mvmE9bBVrQC<E409sFw4sbtE$JT!f%7
zNb(e}eIvZwk@a@+Xv^BF$rm*WQWYuxn$Re;sUD88LtH(f;rhx6lB2B+HkoDTeHf&I
zp`u6RIDVv9)54lp>;$Fkel_4DGr8-VvL^T>+ic8f_1f9}n`sl`DGYc%NaCx$pQHa`
z)fdH4oBZsRvBw?Bn1;P)fvBs}fu_<aN0);hT6|cyK9128Og@r3cYVAR!}!IXCdpmL
zZv9iOj}DGc9|Og-{E|dVmbuLLQ-`fn?RxhrND@!u^KkO%ltm2F3RZ37saPv(=h;ve
zrCnUyR@Oy~5=pu}`QEJlf;cu!qi%iok;_JLe|(L@+v)X_?%JQ8EL2jGjJL+Txg$Ay
z<Nu6>xOx0Dm@$mWq#Vu;H&#&O83{JYY~JohADA>{>XgtVDe{i>^RZ#D25~m{=XqXT
zvO@cr^x4IE(O-Ii^SE)<*WS8V#&|&HMbbhDg_?6BKdmjv{G|Ev@>fFqKb+M$2>WH$
z1CpI4J26sTC33N;6@8l?f@fDW{cQBno;=-Osg#DV=N4AGF?I(~adZdz#tzcSLx-P=
zT4Lv$zQ?s#^G;15ZX+pnFm&LmcUmA!<mP)B=*Y#E;&a`deFC1I=J4lS0L3=SfDe{X
z{7<r%R$CM<i#XiLHUQATkT;!tU9z<CB=z*Yva0G7IA5KCqT+adjV98lFJ-HX2awkb
zySRoxbnz5|;eNi`zOc(z1>8uov*41tvT}?K+gqIF;g;9+6Dh|N{f{|qjlyTRoGs?-
z%|iEMjS>a>ertRX-Z$+hdeL*{?C!R33Tq$>$*TL+bo&U1Cx)m!7U>OolXxramZTH8
zEEDb$vr}G1kA@<YV5~i4&OdM(#srsr=R$PP)2CfG^11AnW%&U!It?!XK-3H&tCk6n
zL*+t&TL@dph>DWq>wFVb1Dsj1p~cCsxeAkkhjhzj{+p{A;c_oh-KnTx)lP@QEhR$J
z`r8|twT@56EV<O1@sDzID(LhJHkO)Z0$}mjxd}fi{06dCegk-ag<-8CNK~=uM4rEV
zD8~CdFYSR|sD1Nj%G#Ns44_!LmnF%r>)9CcuU@_4>5i+tPpg?LC$K9K<pl>S8cV?B
zguBU+a>@MAemmy}GxpIN{4C`JWEGR)p63Q`CvySGX^Qo@0YuskAIdpt6p>{&`l?;c
z^j&tWRO&TDGNRh5rdV{)UIe0&KH#4{KYzSotD;C9cP+Zarndv(DLxtVyMs_`_hzyL
z#xLdSH_`6Nei)REEkqDO(=voA!2<JvkA8g04oPUD=ZNV<z4&UounDA-_CpVaPk!Dc
z1@4dOU0lh-;tqo_-ZrweZRcn1ttU4s9OoD{HHS(H$BpS)Yf)}#jdOoj*I%0scX$@c
zfOp*!3;@F`Z&Z`xp2W;XQQLd6=M>+$pl9c*-upb1B<6v(%~(lTK3S<xotCm*d2zY^
z{`2E<d3-IVFYNz0Z6uT?8n4nY8<$8;Ol1Cp4V>^~^`Y(@S}R?HEK+%abB%K$x<Bq5
z72LVv>XLxz!ej~yIRz8QO^PpOb>rwbKa*U)u;VTWRIGU7+!-$P<s=9Fm~_=+w+VNz
zUqUa9yfzyaQd0FaCl`HKCAu}FAei?Tx<BxIo2u5VSkG;*OS9jl(}IDfZ_o5{7XX=2
z@7;Q?l6#MyMz`0}s;Kbq1eGt~2`ZTB7Q2y>Fz1yteX)*qdD=3WlQyepY9B4|Wa;j#
ztOK8CIL8kcCo*Ux$B&o#TQYsbRuoz6iX8sUY2$oYh{M&7*aw@bMW<Ud8SO3!{!~=n
z07A&}OM1%kxXk6kw3om5ZowI>PWL_cXiLS@e$l`%kuM9})n<eiz3=B%Yr=q=Fle(W
zbC2N%ZlVnwey7iq`Nf#cnotuOjs!O7;?9ag^O`XUNXIk{DlLsjdt4s~_mUV{AIE;(
zA>G)_zCfGWy=mUW$ZEYm@zpkC?MrsHt>lbKnHN(9h~8AsZrsGpY9zFY+RIr-^yl+`
z_IIn@CYpw-CH=GJH<v=Ks=-@#NlYF+e~+F|FvVqE?tge#e4e1VJV8f${`|d(+iXQN
z7Lmf@o?&+qcJ9{H*e$FeOB)D!vX|!#bo+CEolfN52#yEP6%PLl!|%ZR71|m9;|*tj
z5{H#gCpk(;#ns}Wzh76uX^>kgT1e-PZQHTGmQ5kO)29VV!}3f|b4BRp=;Os4C7w+M
zM_nzJko$kg=vyHmY4~xG=$y~AF~p*il@k=S;Y{05)w`UpPSMbZaouQxV?Z{e=2n3;
zsm#mzGO$yxeLr`MK%^)8;q|LZK(^8yMj`JU@pqQGC|O>-52fJX<oI}d?3yLEN`ju<
zp&INg>%35RG41l=(x$>0trYCOKeFQBP^9e*@%4e_WpL)YRS533;2q7l?fjL4fK6Wm
z-7?oLos8E!(n1|s2xclM8p&0i0tlWt8kvJaL9mrGuOYNX-F|vGdaYdUd)pm|OZbcH
z(HH)00I*RVEP@(`Eksn0kX#>uFn>IO(|mKnIJF$G`V;qf7qK<VOkSdAma&C^^SIlp
zW{b-Ws2nYK=Ucbj=-acqT$S5phTD2E?c0x>8Z_wqN!<dxuYM=az(wG5fUSw1e1_1J
z9)UBl8Tfijkc$-x?tg!B1f(ca2pN*Z1khhnh%s0i7rUR@)c{91B^MvSa9-A8Pp%Ug
zP@W-uZ3eN?<3POtN=ic57aJg0o&wXd)f;f<zCPlRRetqq+`O>P2v_Q5fO@Hsy12N{
z76|5F62&NAiV_`lZnQ)fw-9f%l!$$AzSgFT91<0w7+Stsanq9Bw;O2Gi(Ie2Atkcw
zd54?<bQO1dO@C{$-0AA#cnSzP&FPXIBB6jpuLO$Da^>nH2?5|%j~1ikfm}M-D2f(h
z*7wLIs!d-0dm`pboa^&0AqzS}{wWV#S7If?`1D$>$}SU6MbPc23?U|M)TG^YTKph6
z`TgS)%k3Xkz4}h=WX|IUYeUIH;l3@FS|*T5!#HiDk|u4NMpVns+wiGQ*3H~Dx@|hz
z5O7!3^{HQO{AZOS4j?6=DjLx!>gtlNxj35I8jz$Y2SQSAvX_Lcyn8&CCky!q#pYmJ
zD5)jTV$w2A+~xl9(>H!JdKw|2Nk!yPE`slc``dyf;#<a-;&iPzIXNvq_clz8ETCzO
zw2bOAUztgVUdJ1-@7?$@7Vym$OjhOW`7*cD(S-37?@H1_OGHbocR^JCE1-v}WB~!v
zJ%L{${LAqM^PK&Y1=&hjlQ*#hUOUgYWF>?DcHP$GmcM^ANQmVD!oEbH)2V(#7B&ma
z#j-p0x!Xp5CEvM)8#(bSZOiG3=+-E!aknC6HZ_8V(N5032DA3#ofK@<J4A{48z=bG
zDWqJMW~xzTqwH~?(rbC@2bf*1F0}N`oz<(h`<ztsk})<mG>^{h8@gl5_ZcD`{lD3r
zINj1r?86@-9z~mVzjq4Xv5|}iJ>@^@w2vHZFfPc?EoBJ}<z;x1Ben;<BROKV6oP#_
zXq$hh)&Qgf6(Cg~|25aV)r=)v7A^$uh-K9_OVnqZ3O?4mJU`uj#kv#K3|3>yH(qOI
zBV2ZSHOgl+>TqqSoy%hUHIc4+@l@7kqztQ+T<l0WOlciUT%RgfVfsh6Q<=_gUM}sO
z_o*8#yqhQUy~TT)5;9Fa533^3%?q)aB57A@?=$Ip2KcU`4;SiMBQgxN*qf5_10zCL
z_5dH8l|1JMj-!sr^(CRD)FL@5VuQZ&+>1&bN2oIq+FNaxr5IYAj9*~a&R}G6&$<t7
zZ7el-Hvn<vc0G*JOhb?Tq!1a`2CkoV9@&ftM!{W-bdxS{j%gWOoE^&}2E9kdYbPQ5
zDy)Ms)d4Hc+TyWWVPnN%UfJGVhzh4wTB|;;*h1aTRD3s<T;M*@_Gvg<7q?ZpB_}~~
z2D|^hWN}Z0YP*E%)i`XH-}i@gihTA}{caoSy+$DCkS2RZy#;)OHz$=+n+<YUKO1rL
zr$?4CR0dxOA{8EiALeC0exzt-u=dASC$=u2;^Ydp0P^$XksRFy52q1jRhqm3KETLs
zrCyz9O=)g|)29+_k3MC+iFPH3|6S795^1oIr3iUAZDELpee8q9=mkF_9h;>vVvZ)>
zwYS?<EAySTx#z%?$MA(s`YETtzMMavvVX{ZvQ)fgT2lKASAT-5zH&H-eMz^*HiYGD
zZ(9Yr(v8u|V#Qoq2d}g^dQe+j?vZS%Ir&&ndUr|OP4n3+$Ge0Nnoj-0S<t7e9Yeyb
z(GQvZnIDoiv?9@kaIyQBdW|(o-ft#D8SPzsKj-9)Np;_V2PKf9<n^5_FGa7H=*7(Z
z5c9f4vX>8;VlwV*F~J}y6rSh1U$KXVpXJv@5?KEF*?8YPVmqY%fzJyRX$wR=&amIl
z_k@ft<!w4%5Cfj0-scu21z8$-`J~#f2zQvO7|2;TJLt!mWpe!>djtGfESd1eOkL~8
z7N)}5`BCpv^VF?cm;E26Hlgt?F~0EvG-h?G#@?fGWbQWWrfzmKpK8lzWbcVS4|_22
z@uA)2_iuAhmem4-*|Ci+dEP=))awKbr7Lpl90`l7oT&^kr&+YA#you7p%jh6ot<iW
zeOx0A)TE5JLQ({HJ$Al7=_@NKd5ox^kd+hi)&tJTsR@HF-gy<LmPjBDRn9FelMncv
ztw=Ki<LNkMRtoXf{?Q@wsOj<9vtLb4%4E0IYuG<rLp~_w(O=_qmpoeZ5|?|@%eTQ?
zvzi_voA{W*X<V;Yur5zAlp`+Pj>WTI6s=VzG7&x`FFgIUPT`(V8P><qkj|VV;8Lpu
z+o^}8sa~~k*^l1p(~PUgJI#5({f*?N)#fqOAi3^qwb0T*i{ejl(rd_Ra1?NDz8(mP
zaq_|)7oO;Njju(m!H;AnfKaDl>b`N(Kn{LY;;ccq;O3-xVQ9^lxXM-p9=Qk^&@9&w
zRC|Mit7qxwu+cK<8g_Qg!C`l(f?ztr^?4$nxn=B;om$D8pe<~B2y0=$M{h8~mqb(V
z<%fvNV`Wk>OYfE%^*ko?!ynxm?si43rG!^^>$Sh<j-7lz5F`gl*x)*&KEJ+JAPuB!
z5!-(3BKLsb$s)jRYpSwvjee=WF#Sgvo4c|_j=ngO?*k`#c0mp#Kc)Yy=$$uRMBQoT
zPn>=)cd8;Q8@61)6&N}`735I*CKelX@A#Tq*$V|TGwb@<cSA{YW<HI2!hU{{du-xa
z@mdhde=UN5<2E?YiVxo4lx5_}hb-K4k-KxtMefzfNMg6o0D|rPcT&<`gOWzfijCmH
zc}o|C1C?USL!Nc|`kAa}|6NiBZcIA2;dXkta<QYnU>Jq5NANrVqB$V)uf8#=U~9ji
zxwd+f00c0<iCt+gpQe$5XWS55RJ_j{Uc19MpRZq4NI-La&L{=Idi+w0R6<ATf>-3>
zMLlCCUTo8eEoN%JQ*NwU`BvdaiA|<Vx-OZl`W@VbYmhf(Dn?Pbn$WQv##lapA`1?4
zXLCyvRy30wUIao7WhwX<pJZ742)|@&)Tf;r&G2;@aQJV6-Zec00dn4Dm%3#D7KIha
z&HsXfd6|?@@0#Qkk7IJE!dldZ!Y`M(T#sTR&|3X-Mhi9*bu<mFS#&H&yA$09YcHlN
zEE995f`Mm?$yHgh6b$*KUq#-~`Ylh;^7Wm{07V{C;^sLU*<L@6Bw6Ze^ER8O7J!_x
z>2WLoL<=5U#2zSu$*-!KAR#5S*I2OZ(ZuV@t(n_5A`(t{O;E!dn{F$b?ls^D>To$@
zmn!NpVzz#^f<P65M#YB=IvOaa7XHhVvHiaQKI=bl^k3q!_MgwX?Ff?cJIQX2vsAL2
zy!M&c8^=K>JtD^?(hp9l{mN?Ne6|+I$*IC?*m4voAgH<9h(>EZ9_DjvB8-148Wav~
znw_#0fmn`l(|^%)3*3P7o!@yDqch06-h+?!?uO0L%ZLR&ldAOEk9Euj)xtSBId+r5
zmgRyL<|2W2b?@UDqTQ=hZb%|340z!D`Hr>c8|8pdITxnXhfIu&T8;;a=Jls7R78&N
zte-|BW(g9*WWYlZv$;UrJn_WB`riAwqsVnNzBZfow@c<`wY1CTeF}vr$R6^=zAye#
z%l?kG71BVohdODSsj}5&dvnra58zZ4AN^7OwPs-Z9caMn+*eW8vFQ|MY78lg-?p^V
zDHlUh{^j@)tpVm7y;y&(^P#om;YqyG*=I<K5@(gIj``S8_8`t5xK{x{h&QC+(PVt@
zXnoaIWjnF|+hCOL2owJpX%Guc9F~&u<?jD<a|`)vA)CJr^CvD7*ZD)zu^PfYvZ3}~
zmqqC9>e9I8FmmH9O)&`*#%mER6@pFEFC=hD?siqy1o@4xN|!W@N4?cdUu?75Ken#L
zD2adI@~~$GchK${IuD8@>AnrOxbIM{ub{t3%DfE{7P+r}|I0`%Qi0Qh+zC@znmVG^
z$ifK1mN;QhbCOVLKI@BSGycKL=k~R|BtQ+CMCSuV8Z;HJ8<E`(M^r6AwG_hnNGM3J
zGbbh{ZvAg=w(jflK$D{^JmzrCXi~{S6Z$%MelT_|&_&BJ=hlC`AksT0P<=bfD!U~!
z+~5Ct_VTE5>)TDNGW%ZMb_7{6d49S-b!_t@8#q1InyredziNG3Lx=*R&9_T;N%=R%
zC`#Vv=bL^^r_YVYvYW)YlbEVYSi5-bAOTgFl1L$HFSJV>))ddyRU6C@#hM6KjLV?y
z4ymw5Z74Cv!_T;ry+)^dOHSbKDwB&ul!n7+8fSDzUYGA!*?Mn=aPm*q?GzTdWZ?O!
zH7C)iXr@_yp1fvrnWBh3F5Ceq#)soHs%|_1J)SlO`lr%?3`tGpD>u8q-_HV2R(?TK
zmfVx4UMj0n1*CUbTP>*Z>AKIVLi1RiA8uZ}<8*53AX~`)rYHFT^*U69DgydIPd)&!
z-4w;uiR2W*KZ0^l!1M(>Emb>hX}Ilm(rqD%D`S{c`EE}J(WRIGrpRP$zfl6gVs&r4
zf6`mDw`b=z=SSpS8M5pi;32r#YNcG^G{rSW%exi9<i@*+4$`auGUccMLP(7%`ZcSx
z1#3kp=81rBDjYXdKzj9X6QHw2h}t*xtU<#D{Z>r=y^cqgAfe%M-qso_YvSMj+6<FV
zwE_j9a>r_2C!FS^<A~G_K``EC{Yq(=pdb8T!t$(0(Ei)K*5Rj<*I*8XhbR>d1F<iB
zGE6}IQvF(qrI}d?5bc#7t;u^%hVd>FCq6Ze#CuN4Z6!;uGNvI;+vwDMmz#&`uY?<f
z3kESUF-~ilaeRdV7+8j1BhaIWYtHw&^FeAT-ZlR<lOZ@Gr$Up=kU++1y!a@gs&zn3
z<Jx9Yh(?J|AQr3adMT-{{;|~RF1f%s@7C%_ZXBWp%&IOvPkYa3wRzfkcI(GfcTGA<
zB9_J!Cs5lT{{WetDX4i%X^1&;2KX?{G)N<g%djiDY>m%V+<a;g(-T+*8T2XL8d|%w
z%T$-8HY8?CRoQZkhIdneX0Lo3cM}udGsahqN!aN5lN3S+L=bZcjH+!mjEocQ9R8)o
z<1-J4o4062iXGn1h3$5Y4TGTA%}CQCf3O4qPbMFVs)00anuNtL_fP{@<?5tec(c&+
zBA3i{^;5yqoxw{Gx>+j2|4PlqK@GiZY-yU;evmxh8ubRKCkFewk+uFrKQ}+WT!yu(
zW%k~ZGq|6yAm1K2cK=8{l#dQOY{q1Dn_ltQ_A152b$FveEOl@kO}F7*oahH~u@r5i
zsOyvxk|JlPIO&hR5r>84_kHBpl#<x*CJysAX9sr@@%&`v{oJpQ%a0{})ndyw*4L8^
zGr;pGtP(ywFg&4r=Kv6x8is%pEf=bzK^e9x>ICmErhbiS<wlq#B_m^FQ!%PK_iML$
zv4qcU=vo$YbVe%>RmAY)omob0E*!XbrfHbq{@WabogUKfITbr}v71F1?Hi<)=HDMr
zD1X{D=vK8mg$HMf4=FfeYv6t9ij)D>uB<~d+T&OGn#uwkiIo<`EX5hs)sxk&_W*AM
zy#15A%N@k`1_PIqI3C<pO=U6|j|$udN|aS;_5~JpnQDM=I1sX9uwxW1{`89%V1iTL
zPG~C9Yj%ItRG9aYf2%ACPz2w|p$7xLZq*S=_dq3Ox$If=8XUkaAz4v8N^g&+owd(F
znV1Eri4B^y?h9Jyv7JXDt(mgglA19z719`bU18>9>a#*YJ0d1lGOOgsTG6h4MmxP#
z-Mz$7(X9`CYVuQr^uY!{7%`|PUwPbfyi4lQZ6B3E(R@>%!i%_hfE^^2%_A#a#7ETW
zXMl=pSK<9zTXTWHAHiZc96nRqXnNBa>6m9dThCy>3BnQq55Pzq0lDP_$e&kJLT6vt
zJ~sLuk6&!GH~fWD3s*BM$xgcKsfIC3oOQPlXh@??F$ER3H%W!9Bzg{Z5&SmSX^-R4
zMry6u<hXi(m=H#Bcy2e=wetl7pznVDr<2DrVzO@DWPqZ;;1vRp;5MJhhsdvy*il&$
z`N!+B{qV=?Q;Mj(I|?%z)-Xr&N0MQ(F?p-=ZwQ7`^s~j9S?qBPvcFxI&GgftRIr(>
zN6v?2^n+cnQ9xB<V`^YTv8bpcCQD9$P^6!j@B^q$`v<6SGTy1(mlM#TLx>3Q<aiou
z#lw9Q--RGD=Ph0sv^_E$9O*UDcBzGKfOK;H**VY8vQaZV`Bm4jyeGs(p_bR^{%!xB
zzDilNyKR30?gbCMo-w2BA&RvV$-i)FmIQXj)HWf`a?NtH7(5kG4r=bhtW=NcM4h+8
zFC`}ssNrZx-Ch?HBG>w;)h<(uGbx#tr&`WAX6Y1!^<~|tkJHjtY%?i96GpS{52HUy
zsvORt#j@0eeg6H321GI-;)}q@8W9!ZCK=B|OAzv4r*4+YuUh3gn>Di+1yi-WRm4?^
zab7jx88Qp3>q{pFeS*ghq<n6<Dtq=?#<bW;DfGE&3hz0=lM0#_Sg$QxFCs0HxELw)
z7_nSt4)-ZB=?nq_>YgOWj?VV=kzC6+7A&g$7f_h-lj%fG^Vl`|7==+fE>7FEqjJ4T
zCFT*0;_|T~xx80xC|mzM8|GBbi&G!xAmT@7B~6oeYnL6ZF}Yq~(d+t5JDr)DM4CLj
z5X)bA5w&u%wsw=E{|Oesy{ZB`a&*K}@K{B;ccK=^u<%Ns`2hKP%!Y(ahKT4W*&|oz
zZUB#W3nV)6{4ja;$)|$0TVv!!ZR4&SppNBl?G3`sR|f-lJhwy1>I=#bzG9IuyU9;K
zwI}+>rXGHGk5s0c4JY1bKLNE(zrWJK3qq{k4@$4`fnb)I_xzK*CsB-m;LCzd0<QCU
zm*Z~F-&Z1r9$^k*NG-gq+79$+EJ&Fv>R50OkF$R6p+4>#o}Iuv{tn`60q0H7^evtr
zjPA%D`SBWi^JUr^K^*z_#orTn2oE9u8@VZ%po~lA^}D`@!)VeR_2DH7s!E1C78A>Z
zJ?@n>RYwGF1*Rc_L{p0{Kq@tLe7JQM#^9Ia{0c%o85w`|M;50KG^P)4$sScb0u2#}
zpT(o7H%x9<R2aw9ai<yQVxF(p*3H`r!tU6o;3xh{CpK=n#w~~(pcDoAwVcR%!lNz#
zod`*s(hDFn(miP+8l1t}cy4cBs%_g!P~m!L#(y+!u$AU>50h!byE6-fMERDJB!z_n
z^j!~^eV7dpnd#k9-6uO=#g*{glFAF^o7n!HaWIb$X}qv+W9<Az2Ltahw9MtO+Mk48
z{2SC)4F}G;j@mDtL^wy&FMoQ{qW4{U&fN;IaT60(`BzsYH!XC7Fp6LIYjCuvMWC--
z3-De7X{(=vQ)@5*YZjtliR(>Agi6)D2Ruc}TH`p~acq^jh4p-!7Pq+?>dZ9!k3&+&
z2mI>@G@l%K$WWZ#a><QvSpI{NtpX|P?VvIu{nXR%#)#)0eAeQRMMQ;ppFTALFqr}o
zr12fu;A9o#)0YO73vSIES90(9haMiKE}*G3AJJE;$i}I)A}jD*v%5fF03cB3;pzjS
z`*sAnW{p$6Ab<dmz(Y2sx?-3t&rf!p=7Z=aeKGL(v~mN;T|1T>+wL2yxw!C(F$ewh
zoz#)yFyao~)H8-W_&HBXlxNp~h^X=O^CNMu#YRZ)6fk~ra-bwZAdm{99+D}*2xwU$
z5Mcz43NqfIW!Aat7(dL^&-sUK03=mt8Zh+9M^&%5Umbx+a2Y0{13>=l$Y=*GOT&-m
zS47S;#RLKMvu75$JVylPe!l4c8Z^?o2!LgXA;AF_n@~$I3St0;tR#H|C69R63$0v|
z2@t*)bnX${0Wo3O{U=7_07-kz{lmrwPhVd6q6~rdtEj7+#_70b@4FJa(+*YrgZIx+
zg=oP9LUv~`9^v`L!{`_fc&yxOtM=0Av4SU6-~V?GfX8usL1zbdUg80b_l=PlMU*g4
z0i-|>2*Qq5gs&!apohwCfZfOgfkTWF@aI$BsJAx}Sq&hiX?1SqZ$)|`1Yn!J{5xep
zJI3b0Hr<)TYcD&5n*~WUQi91!#pN~cLg5F+u=v|=gTAMubCws}$Acf0scvw!BtEUP
zDxW6OWQyJVnz6k$W;xZXyz8KsVlm#YSnm<yZyoO?_WgD}Rnwg}t;F}f1)fBMP~|lM
zC$yB7x`^eT(VX$L*&N;3r$PgOcpbabrrKYE17(EBpaMt+<2Dchq~9h8VT*Oaqhk=f
zM$HLOcGS!&$qSY%ues`oSq!8<lN;_NgWB<c@^9Jyhq1Q|t8#0jMimhj(jC$%U4nEY
zARr>$Eh630(kU&SB1m^5A>G|6-5}lY&E+oM?_Ae8=g(&E{o`3r%oz6=;~rB)%EJfB
z<&TYzPgBKeMkX=jZ37Ehb<UUlMv~uF5lW$2*S{@D5K9E=%|v#p&kVpjRoIDdXRI`S
zd$Kh?HZ>hEqYYw8HUVHvd*5zp7f{+A&zgkY;K1LDXMvmbnnoYVkwB}C#1N_KsnT~0
zHa}ipMoVH502Z*Jh%1qX#+d5GDKrCUH_5i&8io4@nhU`Fnr~%=N6VXaCu=K=D3&s%
zV{3ClK}Lnq?@C_5>)$QOXi<621&noT38qj}Ou?`bro4)Z@gsew(=dcmeTaHdRY`95
zbN+*|ifv53X97<#4dlXhEOZc`m#<Yx=U;`$!Rf2NHrnjvloZ+?tR9j_5=PNKr0wJ>
z_%&g^rZ5+j&IBA|FNo@$Pi!Pu%V`&;0S>N|008VzY40CS=P6-}e2foTaHIvpzMOmM
zs*$!bXm6%fz~eNF3ww1w=d_+Yhxjf3Ef81UH}h*r{^|n6tPxP3ttP<!tjU+$8s7|*
zQqv%k=kLRJOM93Ou6DWA>YeTLQFs$LzcTq18yzXT)Rl;@bbISt1%1_^=OmX8$m#Re
z-#=7}YO%aKRK1f9IQgu`0sno{wh-^5qy5usM4^(9g~GLX&4%`3ZJ5hezBV?$s7)W+
zK0FkEgIZ0zmd#Ed+yOk4Fr(Zgyq277pQq~`R@F7sYkp|bD?>T-H7T*{Vs38C^ri=c
z>+F$_!d6&Z&#j<h)($XamjCpIr&)MiP0apIrn#cA8UU*~Mae)$Zip0idB2Rm<`Oi3
z1gr>Vv!7i;OLF8YWYAC0EClr9l8=64c$mnt`V#>S;N!!je>fp9J);@GrU=n0`W$_C
z7~446_hP{;ffEl2Y66{G&6dwt8p2S)&_9gMKG9A_P}gw)+rxKP#eUxCs`11wvd+#3
z3WY5COv&|Tf#Srbgh?#XnzA#~wpW3V&F?L)8_qBn6~!6bPIkF)7`#se?=A+CtHe?R
z$Ej`(!aVF^{RwRX!YbJZ_(s(F3+wiH5MQe)dI7d#27x9?cRpR>I{jPJ{{<sQ0rh5c
z1{J-(eV0Be)iEQALLvFV>(ba5$Dj|`o71Y~r&s6yLj#~eB0xt7nr@2gVb>{;ZENi{
zFeHJ*vz~ku0&Q-A+KNg)K@*n(WB-PeCeDlU5=2HoWlhpxKJBJ2ngny%NQw13MwbHN
zGOFz`EzAv_K1_b%R4}qE!T^m%8`N5O>gIN3_ZBF$%lU4YDT^@wL68S9Q-Gl|l4d#m
zF*~*ZoJ|fRvM;#d2^w#9Ewu{g>Kq*vP*?S}m+0_TEZoERPm6(AJ<gxua+vnLFKrb{
z;(HBP5feXDYk$r^q|r7kD?hr{4_LJ7sQd0$D90^Hk?z>*5;q>?w(qh~25hy}nyc8X
zQ5u%!Yx$yuORJf0Ol#OOEiILlP>FoRU`m5_$1;Y;IXPysEwE2dJ5sQy+JB&d`aYeo
zIEcz-<;j1~K<WZBgWGGEip$*!k(BJB{~CH4l-uBT#dCJJD~r0(j~R{^1(8N^PmWun
zYWEUW{$D)U4Dcgj6txpJmS-Ail#<!IFdPY@E)ki}kxKu&TNDFy+)OSd@K{Pn0b4>z
ziOVv`o@4BQlQNh!U*G!q{W0tVJX$y~Vx`YCVL&?-p`&YFmP#U&uutqG@tyhb;huMA
za17vit+_|hskpF)*cvKP8&hmkbd?tc_Q33vdme#{{T<^TczG-Z>=@y5FEd-He@z7U
zg{ZX+`}$24W!8y9lhyv^<9I-`&p4STCDS_?in@wxZbO^tlEnsJl%Fxa^uhR#&F`7?
z0^$J9&~Gx-;R39uUD+AjaXCBu4qB1j;jx%nn{u*EV?o{ZK9lV7Z030RGfpk*O`egO
z@i6pzzeYNU13s{ZPqhvkY`2SJnLY1GX{^L)#jgha%?D=g*XE9SHB4(w1}1Jj7d$?y
zJld`IbdA5<!CveTRdez9j5N14EJO4}-b%;o332!9RKG?B6m5bLY)q^B-v4k<AtOJR
zSude3^z4EVgp&4{cRh5KM_ZpcmuCZ+mXiGYPTlI;4l@I?CY06Y!fxO3hdwp_%xW2w
zP+4t$;k<n_H&AZa<Z(|5Aq|ISJtE_Mm5%y!ihZ=`l`vnqa7BNeSr!D=s>pNuWiO?@
zAJ)P>&a7JbjrI-pf@aZ5vcv>(9ry6@1K3CeGmmYVh87OkSJ9tNu@thm)vy<SO=S!|
zZ<9&?pzkqr92-+%5I<~<`rq(PD+gHURo$J5+Y^|Cl*ytWv*WJ<+D@^fc(0BO##w!x
z>Q3z&wZoMFj4QnH05*LG<&t(MX*Vnop7&pw@?IQ#@h1rAnXgH?_^r)lD=-c2CWn`h
z586!XpiMfsxQ5Kc6~QN5?nzXLrGWd+L8X{R{dd5yL05s+k&DDU<$_X6w`~oF+l%#?
z;mn${(C05+%m5?BUbov;&Uc=LPZ%CHMF}yhQQIGK2?MV#PaR`Q*1>z-t}(a-Q=)c*
z+wF%^sC0RVxx0J3bjSZzN1*m%!2W`^)BtjEW}xl~RDts}(5v7ZZg5pv4Z0Zqm;hIq
zw39Zp>)p7+yD@*8wp~}GW@T?XWn6*F>kz100H+W)>)Oa>m0mwaRaY4hek6Vi+(-DC
zz%7L%e0K*z6iT2=JNilzch721?97;oPBJ&T<BY7XV$~CRl7x*Uif8SvFJGADJL)qy
zIqToZt=E1|shBF`-J05&Vqg9b=122@5#C9W*3dA|RI`Esbpe^T2B4Fzr*PNZ^X<fA
z+EmTK{=mzdcFbm+GWZ@naC7cG#v}l)J$Bx@8AMA$jdAbGQ$kVj-YJlyhoR|d2?>+H
z{2o#RZSdG@ymN;JZ|r`HdTlfC16YjkQ>7e_(*21`X;NK>U-Rxh2c+rSTEkXHFCox0
zwMQGsCMz1P#K0{XLWNjlA8WawBeR(}p@%NL*3R+o+VD&X)Td}Jx6pVus2LLlaBhx<
z-Rk<gz)V#h&u%<DTVpFb@?^hy86{D`t+p?PpLZmqV6Q-}tevTDhwUkwVF1+2Q?Q4L
zF7U9XTn+{JYN#cw|L%<ZGax;q!IDN>V#j9#xKW28aa?ZtxO~0%=D$rlry%8X@{et2
z66nx>EUBOeWE$kaeaKU(l5y;b4r`=Ar)H(L3<TaCfrJGJB>E)Rp$5dyl)siCb4s4{
zyTYmcc>Czz!3&@~sb}}E$Lq;7R?DAXOhaLmil()%21yJpjanZLXfA;#{+I?$6FvC$
z7qh!=e(?W}+5s2!Jg^6#WykX=eRtho<0+h->aB(O`V$=G5SEII1izEhleni>?T_=8
zAO!Ck-^msggtQe@woB8@NThD)_03e)TwyHH3tY3L#6<l@U~z9T&L#Z+7R&(l;Xi`z
z>jhQwA3)MyQ8EAr=4%^w@;o5GP9u~5aw8)d>T-+dvY~c7?h@Yax;)-A0FK#W5YsmK
z!PGR_JAB1%N)yCjX_!f~&D7ePa!z5*mG$*8lG7=*7-C-;_7>dGL|VM^p?%Jzt*6nk
zPuN&j@Yw1);qTid=#5|{{D$7g4Q(?JP-!owL7!tb;NqzSSe}q^A=_3Wz(`8<uE`Z7
zrIvs~;|etU8yXoInXjEg>D}jl(YwFyehyO)9yUE&C+QM49i)U3yQV#bHERUh3ymH&
zrTK*mPU}RVyi4LO?auBbwg|0B(@{|{KdLe-%X#`?ckT<x6cB0xst#!=YG)V^@>-*a
z@~&n&0hgul)(9C|r_M&i1?90o&cldY+CO48)1)2r;Gv|NvVc~Ae>iFC^-jFk)EIjl
z23(pcIW|WITx0dcOKAFPk<dg?6GU95a$ur9I@L2vh({qAFVfk6t>b=S1kGCf4El-T
zknnVSVi;`?K#$Ln6s9}yc^RL16+#_SP!Qiq+kwdZ<5#|sO+=9?_mczRB^hA_D(rv!
zHP^ciHx|LNsZm+cO_Z>Z(eX)?+S$^!@zZ=Z4g6|cUN=D7^byU>K;c=mc<R^N%Gmq~
z@zjdLu(n$F#qCY8BgBRG)&Kt#gEgponwt-ZHdz!(;h3nt+eMA@q5P++PX%17p{b%-
z_D0_DsIN~X`Jibc2S>lUcB>YM6L}_gTE{{0jqw9paN=)YjTS)d2OnRT(hM?l+S|&?
z<fpLMNW`iQ(s5c&G$`|1q!DqMWCK)SIfeTY<=!SF4)xd=#deD>#l+czRI1;RC)?LY
z167~}YZ_!4YOw{<Bd|KX8g(w}hb>@+`sWn{=+=q4HMaX!&<qWG58%i&0bXe%3~jG`
ze57yI(QTulT{XXVl|2tF8pc`)od^kLEM>H@Fq0f-*(P?fM`H%9OeHPu-NOYiEXEk^
z6WWi;o7kQMl4{pylFXk_AE;*m-=WO#E^wWeft{vb;3P+m3X%y$^^T2KTI)U(BbPA%
z)HA2(90qdw#-NR~_fN-$pdmjDWFdh&&Mir;pJSs^v>teFQE9xzIg>1<jpgR-eh);v
zN(9s*;-!wo7T_rCMvg@gYL-KJ`^&Xu=5bS<w`lWnb3eq6gfV6Fh`Y<AL3P61+~TUI
z8b4mW&fYbN-D}9xIz7~n6)|ml>)SWbF%GYfF74~b^)GGm<}IxLQ65A~_!n^6StoLz
zZBGnv&xON_?IKiydLix<e}tK=5g}x2e5q5+OGa2<#wGZ1%Rt!VUZM5@vTY2krR_cB
z`Q|HN^Ta0)pfR$Etd0PNXA~CpD;H_+W<C3GS7_D()ivSCoPkb!elC|?l9^u{XX;`)
zsa<%A!)vPVQI%d6R&bu``%dA*4b^=tLM2IhRdtKugX0i&1)6teK?6y1bK%nqBkDBT
zpuGTu_fBt<SkrX!;N97NNaS(-n^Xffk3Vm){3><aUre_s;nhfb+GgNS+92lFGgRr(
z1&sk}t@pFgsi3at;W@eG7V0<S->2WT&yMe_U?+Y(xBi{TcV&7retU59E0cGj)-IW&
zU+aTxuyT!#7Un(R3bilgb6tH=wnz6You6Bx+E=O33NeXVKlNLSn{n}7JIUc^u6#i<
z4037z8DlHV014clB6HB|!oasO=mWZyB=u{yY>IdKT2(<k4fzi*AFls;I{$WVOK;oA
zq*a#oj(6KGb&zq14|QBD1jVXcj6^riv&9@zNBnbnPzgVVKGxuz#Ol%KPkhe`^Kwc7
zD7WiqZKxksZ`=9u<wDtQ>)z%UP64w+5koPErN9FVbwG`q&8!h<)};@<vzP_D)o2yM
z14Pxt0LlNVgCIXpc7EM_MG(mh`+>e&Q>nZNNbbghjxr=3o9;8q?d}?G!ONjO@HO`U
zCTMF`ZGF;!Xd={rjyG1&Z1>EyyoOb~{*RD;4R>GOrjpRTqPdDR<w5oXl<^08joa4p
zs<`G^pe{Z!uQn>Idu@F2LEB%P;UOOGzt5c}3F@n$Zxkjk-7%h_?$Bp#1`<ulF$iji
z72Nw`&A_sR=uPe<MEc0{l9hL|hvI`m*5vhdY4m~EJ)#@y1^7tTXUqjXkD#3!dZsSQ
z@mVy~mOHa7-;E5`S7-yC?rFT|nSQdF^}O<g@a*0su8n!n&r=&Cm>CQ4lviTx=LPmI
z?&tj)A69u!9Gdg=PE8T2>h@oZ()ndM9Fn0!TT*5SK7ReMknTzU`rdU8zkrwQx{JT1
zWCsg6kLx)51f64jDmmJGRH04u$J4n!AGPvpep^Sqp}0qXVBtdV{Dh{UO995lxUn$H
z$azVSpz2$`(X!XLhztVNXiV5ekplu-7G0K@^nW=FoxA|p{n*B!Q`~_S;5CsH!JN=$
zpGd$dlnEFSA8UQc^!n^Cy<+<=>PKO3#}VRx@G1Nb0Je(-iP2(7$no|>UxnfPMni#m
z`DRV)roXn&W6bUn39E#!S?n11zQhyg++n%;td7g_&`IvP`vnbUIq8&?#A@}!@iUjy
z3gV8i;O}_}lMBkpt{&IwG#Zf%CaPT+zwgy?hcOSdF^?)uvsFvbpKXumqbt<8H`*m~
z7{1pXJC895<o_P_f_WHP@yI;~%e^zu3C!Is0-^P232n$t-@?=9>*Yu_?;0xXH+g0%
z%jdf<9m>{f`;-OB9o54=$J;L5SGP34a#tZ9aD@g56lx6G^{~KDjTc!hZcsib!9!GU
zyJolP9<uTX&U0aIwzKR>#h6>TI^MlFQ6QpfzQ5=UVNBk>!!zez0ZVBsq;q;jl+H7=
zLJvqLWfuM+rHAu#NlM)s9oM!t^ChbXRr<=squMxbJoopB!`KH3n4(*WpdTCRayJJd
z37`e{54Hxb6YbM4MXqfhK70_N%iM+9fd3z!oEg*za${J~%TQ<HLH9<vBxriip|4cQ
zp}$31lc+tFfBHuIV$CLz)w$S&$Kd4Lv)^0)&Bua9w+89h>(ssh=3rS3nb6(YimGt?
zJ(HAs>S8CL4PpXc^FsTqT~0HZ-MSEpfm3w`aF@Hlf@0O7<v3>q93Yi|_^V>C+KJ5h
z0n+*Vj7kAerD!c}!M_+GNjXMH{NYgFr==PDW?bB|{G)s{-V8w<#X1p&ereq1gq+0j
z#I2r_a{dV}tvg4id<^G3c}{2oL-&Jz%ykuLFkd)te~|>as`9vc;Q}g8Ghn~?;|BrV
z1-nqQ3N)3t=k4^_`Qd8ME@)M>(9wUp`q*=k{>3CNL61H$25LF}`fHi&sMg6M{TNnR
z<LDE!Can%`xgZRk+i+v?WO(l+r*qQ9F)qg(4#uFss>%Iv-`&*yPIXqfi2oXx(FDMR
zjFDMzzcP2ZSdA?J#s+9jGX=~5nxU6pgcQza7H;2t{``55ulo)f-2PnL2Bgyjmf{`u
z|EbJqQw7p;r{A4!%x$H#PL_QRyz)KaviN~%SfN6)H6GrjKCTQ2qH~f^43~@&I+9?K
z)wr-G`FFb!BISjVk?jplh1N)p4M#*YO`vu9OZZFxIt8?~Fe@jxG{mG^pCum@jetm$
zasfS(=BwRbINh&5@&e7>yxFIfKg(<28?Oh*k=N8VPO|p<0wQt&a1CdoJ<yU#D{+Ze
zGXG$qgt*I`Wu+zR)?9BIQ&b9S`pFB9`rT&YdVSM&b)VSrL?=Dmq_5A6iJ#b5H=3*I
zYX`*!JKqs5NsIkAKS<BOEHmoT)ZgAI8>plR<11Bq4V7?!w_`A{+z`DIIE&7D|85ug
zTS`jo&*m^9LI9!ECD7nDZb;29FGQZhXC`%@gbeIW(bw`=?XOv`mil>X4MY}FnAFJ`
zlzv}DnoR4LD)k1l-X(Fm&s!~scx>kD9U$Hh_ppI>$vm`iVgiYncP3Cp$$YxJ>$Tv`
zvCmt|LEw2<%s2c?Q&LxO03<vY;tZ%lw|)>)^F=R;kjv=rfC$`y9RGGCr6pxJ7>&2*
z-Ge;EVB1k`jKgg!bHF#F37H+DK9(vko-Zz$FEmk2C0lRR_*tR<xdaH}c<TBxPdO~z
z+?4c}o~tA@NZ#8`A*s+Mz>uzI9GG8BN*zq-^r1ls;0T2>02!ImqoKw5{}iq+e-*C(
zx8M{==>1-OsoZ4547Ag3M%r^}k1&AfgAXKb$MXKEtDe2ETUF*kPMkX_;gSll^v)a_
zHA+(nxiPya4Lq)C(zg3o^h+jpB}QfZ3To!+3a3Z4$PT~9U(RG^HFF=mArE9EF~r^;
zDd4F*IBJe-%cm*=l-jhv!d~&-ciH0HhA%N9n3o-MmN&Kbl@%rAWQE~Oc#T8HG2>t9
zHgMaASWau2C!!!{v8;ql12Q$Ne(Xb=*Zl2P>r<;EQBWd6v{`(dN$n@rN}A|)3zm+2
z$&Hc2?R5-)+H3xkQRPhV6hp5A7}d)=SE)d;y9mgTQUSNiNzn3;B+))StzNehDIFPQ
zWX}(6X(-JcS_$z%6YykQRT|$mz%O5})UN#MT3>LU);2v>e-~QVwD!5yB0goqG|eQ4
zCSSwcV`F52$;GMWCf$<RrI=?;X(%@t5Ag{rf7#w)W1U2Ooy%R_W~hpJ3%aOzhbkkR
z@8v4VR8Z-u#v<G581@->)VDgA8tb{<XhaX&t?bOcfTno$hVJBlS`Rb&iO6us7eW5T
z0`LmS)BGqeDPz!iF=rQ551LaB>g#U>T`Xa!RIHQ3CUz3s%0s?Iz>Wun!@`hJ6tlK_
z{KkN%sf0XnfOK%R<E4I-4j|HxO8rF$gEGl_z$}2Gt&eegZ4OT^`I?aCW3XBXL$AJ&
zNHe75y*Po%^Mr`!SB(wMHz%`&IcK#lSuW!)ZqB*dV+E^jMZOCYXXEv%C=+Paq|F6y
zo=8451R3{hlO+cI&D2-&9s@4(%K>;IW6Ne!p!ZqtU4PPM-sg?ljEt7&oSd}-0o21k
zMbVmyRFh9fbhP%qnm6bnxP(<$^%dXQvV{i~t7#c=J|AGsOZdRJ>CA0F9M0NVnL+d}
zep3?uF#Fbfggsv$-}^^!mW}jb>j8Tbg?H2Eh_`NIChuO#QZduWbm{0C_e$#V=s5_m
zat7_Q2n65+hrL=vAKbTEWc|Xv<*52qKB?B-KiHezcx;~~;+b~WzSV(pv6Fr<_l08e
z)A1q@r%}Ul$o%8CR}by@^mGEXG|<rP2U$kz!!Ti7dXsr4pbIrRZ=(k^pNNh;-w1-^
zwGCoSzK;p@lS0EnO#SUmL0`t^ttK6WZFrWihe5sk8Ls_U*CS82(~D)nw2=a;vNnM_
zN)z6BQ)SMvnqNd#@?xZ@o4K7OT!L^HNyxJg^HiC9^3SejWWS7kPx`1^Cymo*8eJw>
z(cc?_P{VxkMgq?j-xu9@2rqePg7K8&ZaHURVTm+(z_FT(H}1M@%dxt8K((=@tqr$q
z8~i&S^dINl`&O(fZ^=1D?DQ&loR721-u%>h^Q5^m)w&)39VoZfVjHhBc{_aFeabz+
zcGU+pFsiGouY{AG81=*;n}|Yfj^=1su!h@!WkTBem4q(<3A_V)J#BWz=jErKS)~}Z
z-xjKW5fz7cMhjO~II5be8n>@WwF}rT`;O;gt5u~n4W2Tv!vzMX6nMw5a5~-%M)Ev`
z(-B911B>-gT<EVqX~mFd=ugQ~1aA+*kVV=7sj|8T?5-qiPc^Z~hQ*((JP|{Rz_6Y-
z<?=c*0-Ce6)iW<gU5lg_qC(-mh`4Yk<<|OSLKuDlLG3Af&L7W&k505APNE#Wbcw?T
z=vh<uOjt~n`)u@f(dQdxYM*)~NtHhC!S`|`6)v}vSkZij{+mLDoVq{{k9Uylns_Mf
zE94nW&?oY=zy5^36be)&2wGI?=Uyj_6}o_StMt(b90Ve%2v91ulZ_=L5R`ow%RPo6
zx!I8D6Gi1;A*V|L><_+}2{^H34SL#yFjv38)y^?qqYa?SuVgp3!9g$F<WTcVY%IfE
z`WFO$AqA&E{&J&UJ5w$G;f4KfU!}~oFNN+$r;q#IDQkV^7p!D=)9K}|baKy`y7EQT
zs=gYS89X1Euj!=nI`wuU{U|XoE-|t!itY1r;7f>ayJ_M-_y6ZHn%=-h2Ozgm7U8HS
zLdlGpG{Sx7i8`bJYyqIu#eqT4g*Jp1flLcg2%FWnVs>4)nqP08S^hBz;_)rr4}x{#
z0DGtKR$r${DT@zgq~}#%o4tH-NU9nlX6Ix=sSSI5*67Ywsi#(c-O?$0OnhF|%+V+9
zrS+cB%9rL>pKBJAe6zj<;N-q(U|kU5TqVhZ`TO<$^+!l3Fz$gt^%$@VjqJ_hMzRZn
z{>?q_nivMPrF;!=Y)@Bsj&MJ;UFisLaZ@Z}Zsz6%p4do3Sg}l6Jcc2hGUl-3lh1<9
z>mQk*;V6B<SM3c=%4bO=Fivnht*o;(SfAlDAT^?V$HmKe)ud-z(I@X`tB<z%H7<oE
zD>BSQzZc8th4<viKL5K{>tl(-mu6G_hbh|d_Xh&Y8W^VZO%$I_o=7Dg2>(K%v`rE8
zXb^t8LMhB<?8*WskFBRA`Y9};8`^C6TRvVB(!|5*#zK@_$6X%3InsM%+{^m(Dp6L!
z80+$uU#>pf`be(4GBJ$deaZUI*K+GPJ#RW~gwI+V_;<6urFTbAglC#j(&8o$4wLE?
zXP+z@V86<DF}gSm5c4~(ki9pBJOlkcVU)lKmMpH!6ZFTySZaJvY=V3WsrZ6iy#e|)
zu=if$t+^W+6qH4p^SJiznD4V$s;Qa_M7GrXH7``T?Ff&!{Psb9FuW5fVUXad7;yQJ
z*HKc_B@~CT_Jw}P{`iKD|M)QHKl2qjZGkrNs+D?pWVoIVh85Bzi(Z=deSjhNneF19
zHKT3lkkY}53vQg;_4^mK-Xdq>QwV2bPheb0r^&ms1_}`24mW0xy>e7pdt}PQkRyYQ
zLX=qwy<tbb&?h+MWZoaSQBNLH=|^o@Z}rI-!3woYc7A%)JnMh>*^NTH^-bZN)2QYf
zy}8@hsjK;#5L?uoa>^%OgJ=GQu1#YH{xzLK&7^#Ses4}lb+E*Y`Sh4C&+OxdV|Z+r
zJaK#T;RRdwbZHB;<AMWo(vjY(lvuc1C2`W-oBAy1=x@k5(Nh!l)X+Q8JwNBg#$9Q1
z|B{@-4&zHi`E?K>SM*$tNgU?zG2Bo7!lIlPxRdl4EHbDVaGr_!`wofzz0|H2<zxp`
ztfGSbVM?0iTm{~ouR0u4^v0ShN;Wk}By~EJbP){VZtP6Hm<kf6_LEsKnXBAe8g+k=
zvmAD!EuZw;=>#=ly=<xqc^G8+uK)QNN3UHgJ-Hsbke!-qK26dmd><s({Ka-e+4u?>
z&$x@>3a7Jh$&iNmz`MEKPP~9bW<C<XQ%qE|v$_-Y`$I>{9}FF-GszP_EKkV8By7=6
zmu|loeU%mgUzkSu^L5YPH0X=YTE4fmM@XAmi!F{Jd!;`o!N#ayCFir99zDp-e$31E
z?nL-Ut$VFBiuFlGl>nR1pnT4H7k~bToO>&2laGW@;G~a-kMpA-A-spGZQ7e#j>)7(
z>%BR$^yK?^{eJTAu|@k!cwgHUb7zP_oFoHfS#~w*KE4Zi1kPvaL9bLhE0kMj5+y;H
zUd-c+Z<)9mtl{`d6oIiA8An<Fsx|W0kidUKG)@=H)<j)YnW{X<ig!~&f{ih@=#sGx
z_)NBiL1y25kqmcAGYZ-rm0%xc{JzF3x+&d>hKoJR8+|G3qd^hyR1(k2OGt9IHf3_V
z5Pc`(CPu=bK<~BG-Ty2JfoNcW1QKV>2a%(RkiLQ?y8YgeThj09vZrb5X79!m4!`!v
z7pX8PGK4IG-6zu$+cr5z5Zg2Z;%{{H6sZ$$7GJp2wd!Rxorv?J@#0NK&Bz2sI>~7^
zf)kPZ9|<8y=zQ1AYd*w61N`qu^fVW;iiA+d#D!g2*7cb(gYtk9)QQG&pI+P~%qS5@
z(dcu<Mgz|sPErFTSrQvlv${o^tmVB0!;qhZR3{^B)6ieA{jo7KVsR}ODysB9Ysptq
zmXDE*&qlVtfjy9A9{mA#-LGs`+(}ylE2$MHR&eR(iV1?L{3DOIe%0$D&0=nT?F>IH
zb|x$R%G~*onu4O+-7h=Vyka8R^ws*yt~8@7t8FVcO8i#B;^CbCd3iCAY;iM&p@|A?
zPwP()@)b#y&#x7vgKZU}XKLL%@-FrQUr(*TICR2Q(;i;On#J0y4*!1W<lV48E!jhm
zuV-f-y)4hHxFUwej{V<ve*@c&@vtJ`Gu35nB{@qsB)UKE#UoPf(;q5#^9G@Be%hmz
z=H(5PpCHHCBR=h0<wQ8XDI$^F-z##j>(DGKr*hj>d5<qzw_m%ly`;WHq4peh{5b?d
z_IL#;@EdJF5}w3xHYQyr2hz>0{riwg+Iy?T8yT?2Vq<D0YoOy9L%1TNd6t!GUBiFt
zCd$Adi&W|@CLxjg-G<$L@aJ}X=k4^hVs6%c-sY&2zJQ4t|MFR+SV<pqx+pm%0{-k{
zy&M>|Z%y6Qfp{|&iuc!puM}y8ftWW4TesMrFcd-}l?T0T`0SQoRc~VRJIva3(Nq){
z?5#(#PGtqDaCSody7BZn#8jVgw(yxkB#Ds&c`e(mD;{CPrBZfrN(`&+D%`*NHz*LJ
z_2SV+kMn`ZFf(${CvNqwe($w*tSYr;!GBKB&|sQs8mqU5zLk=`tF;cf(`8ca-Kg!{
z40YR-A319#8!0oAyKE6qW7<qLadsrMA(`suiImLypYN^={T2EZdo6KYLW)5}9I8)#
z!Q`EjehN~ZTV8c9`&oTz?^-KWdaHDTB-P)rX35u5T++Ftg@^9*8SuQQ|CMK?IlvL&
zLZL~z2^XKv(v1sAmU|oUw!k{$`hgwyKSM#6kUUMyj8Fh;E|Q#O9P<2&gc#D*1N;no
zvzDAPBGw~D!$FrHbv-qV&2FqxhZPAw>DDAvcjnv&?cgJY479?Nrf<7{;RyW-oxd#J
ze%jX=c;gbvEyAT8pod0i{@*w8lotofT6S|mFf9z`BhO8v%y+W(HEN3TG4m`4Qw8*E
ziFFexF%BBvuE>wV^n{MW+=)!_4HU(b5DlzQD>HGA=&<P%*wvjYD-%vNi0!{U;qLwB
zi!emG3~LdJhlnpN9bYTn&QJE=u8@TSL%?CZ#b;WtAWq5*_C&Cb+8YRUlLG7mw>!SK
z!eErQ!(IACwnCJgK?FCZq+(>dHzD3z9&HFk*lXPk#%4VBZEm^LS3T^bD8VSjllu3f
zv#FeT8;{0IF;Rc$=1vr`@SpOj+4Y-|$~N;fp7?MdG}g)XSPI6QKc(1<rL=C1+JyZN
zAG4T1*FY2E%=rNFHy>fgN23!d+-_(Ojm+>19<r_F8^!ntyIU=OP|-f<EGt||^>8&Q
zs!6xVnX)c#x$fV2$%&fh6HFec`m9k^Dw)bcA|uIwhIn#MQUw57Z&Obn;c(Ifr!K3|
z;AWP~V$Gig;bdp%`l(J6*?Ip16k3FZv@rd>6GG)-v(R879-D0-Q}pt8PPTChKNkon
zmwX}qfnxNgHkIGJ(D|^cq%df-FVn30K!kmJ{-hoC*U(Ape0d&OzFy55v8dPkClFl0
zJ%(5+Jw6G}7>khE)R=n@TmyaJnw0PJpUGKrAuQ{7IKC%H>z$k_HLYr$u5aN~#Kja{
zYdLO_R4V&g#L>wyg_jd>n8I0-i0EU&<38KniuAD$yKQ{@XQT?D<MD!}<1^7|Z7Dg+
zCR)H@nD|~0m*83tX2I^-oQ6ZSF0lZaH=>~c%$XQ%KtLQbf;Sc8YrIXCLcxhs5%{&H
zc=mWv<i+1g|J78wOJrd5FY&X_m5{SopzGeUnxP5vy~j4FcB!U&E7^H_^*Lj0xp7*r
zq1a7Mdz@{7ADwL1&8EF#>6QAh4RSUsu_hLx?9Nj0ki&ns`HTsGzM$Zu{z8ZhEv(@!
z>nxWhHhQNwLb|B#?7KN^;lVwR-A~-Q1^UBp+kc<la#i+R>j$L8S#z4TERnS`=zLu(
z;!WBPf4cOM)UKrlmnh>ueBdi8ltGPyBAqOS9(M>KEry@P)Wd&7>vAcEbxVwpZtZ|%
zb7afQZZ-Xmg6=Ka=yIjekvG|X&La$Eo}&3=Oi`4Q@Z)@QmJVhO-L=!k2$UU9wA1ph
zQq6vyfkvFiZcJY6?~P8A5)cb4GZ?%K^_as2o>J?)Vj{cDY#7J>w-nX=0DWyA_$})-
z#M3q)r0?M$%VRs9h)p&9%7r{6tJ|-*>3>yOQD7^H!UlWt32Yc`Uv5(|o~B1|w~N%Y
zrFra}$*~5M@57%6k@QDOk%M2i7Teh<2;-UyQeros+OmmPGQgk*c;4(}N>1|EUZ>a?
zX-M3oW~T(3&P<$y7lwL>n<n>aw-8HMxP`d(GXZ-?=yoiHR$X56Gnu4ix9Kh1SsOs3
z;liDg?YB1--fNc$&bGqhMwffi1i8Z}Z!B1=Nn(;YN9<ybzn%DLkm(sLy<cI?Q=iq%
z6|jrhoc=LG6^s(WpFh!f7Kfj`ZR1?MMwpHJRtOILhqp*DQPI79$MYZ%fW(6fO*@sK
zrf)(`s{~~i?W6%gljFR)gK+ft`9_=UMlm6CcbA?aF4XulgY#R{`=)X6!kyV6v-(J;
zpjYw{{2{qsK$5D{=4MiV(-Y--_uo8o5xGK*M~x0yf!JO?(W&!0&Q!FIgoAhy;Y22W
zw;_(;uzMiaEC0Jcd3J??yXd+LIY9w{!SfTZAdWR#>;rX+C+pq6N+D-$^u$ie4mFn-
zt6$dWJHq0T>*v-y$4%DEPn6({+l1^So6`bn5m$x|w!*n5-{{%D{@ok{HuU5}ws*gK
z@n>-<V4f*zeTl%CLa7gDl9~#HuT<d3*twjylsT!`W}7~~hsf@)!Md~LpMDYZiTb@c
zLvkFjDWlRo7aDIY?xFG{-s@8=49Hipx$a8+VTKmqEsrPK8suh^q~o6%Op+un9a+g+
zNok*`DY`}T`fKvN6~+SnP4Hy3+w)phaosJ_Uz%7761AL#p<n4O$~^f{><M4YggT%+
zgs}*|vnVi3B*=JxCyMnJxjf<G#yvcYczYNEvx<0(W{$q3F`T<l(iF`YZ^YAZ6e7s5
zoUq=@MxcO8(@;b89(y^yXjn)njLT7?fB!H<Em)5=^M`5ED2ifS!s)}27lRhvYcH^D
zxdEX0o34qAhJxzoFeS@Z=y9r$Ci9_K#CM~KV^l_K!7-+8s~`O@x&wb24#rLitPt1j
zyf=lzH+b)I^i@#9j!2@{6~pfn3B+FROr)O^th$9kwuU22S{8Ku$t2RPLces4q%qXn
zTyfimFHW*N23SYxmcxakB|ATo@w#!Ec|_G0P#ciQ)FHnbb~l^dVMH+f54*HK1q)OA
z4Cc_yGZ+OdPmZxwKOZv{l~lwPPe`EA=}}54_nM+9ex2!rwlUcw1n(v-Ywc|TZ+63g
z!gEdo+mOtBVYji0wfC~VCJ>)k_@9Sev8Z)&&4wP!xMpd{(lMkEoM8h56<>QNv&Pkk
zasmcDehm^?os4Fm;o(=x4G!+=@OuGlB>0cqtqRJQxbi+FQK5?;OG^!ZaaRAgoTFxk
zmdr;ZV=fA?)Mzl2dXsGA^gL1#xxSoBUrUTTepP-vD&zI9ruPabi|bI)=6$zTQIYVN
zvtnXmexn$6SIL;wCI7pNWUF{Mxm3W{#~F>qeo}{zwxh9B$AzecSkOqt;E*Dm^N(Ed
zXP+*@(N6Fm|AdzT`}U?{vIZG^pbg(DN{=HmX{><ztJc<*vE0Av?51cafQfcHc3uu@
zY87sZcuwe23X5on<kTS($a>ZogMT>x_?V0E@k!HM#e{MksL$ex)FUfBPE6o%IH`5v
zrXqbLzslF?iTn9VtTBnY7Y{o{aP(FnMy{puw<M9eo}{YBzJBmom#l%Ku!H(bVQ~y=
zBw@9{bo3Bs$nBG=0ygRE?$`l>s)u|+<<Xio5}6+VY-65D=y=X76zl1^+0c!mX@l$m
zTV)pY<t{`jO@(7TGlO0qHzfC&qmK{?sb2Vg%7&dqaPd3gNKI7Xpl?lc5hQ5jwSuM5
zweCbdsFc1d$T#KT@>LTUtPo9fbaQlA^oXZ3mQJH?&XxY6o3_`gvBS<><1Rxd|7-`I
zfnH$22|WB}mkQ?QFEk<Y_nn!2$o)Ki*9J`RYIHj{P-uK}OCqLjb~{wmm*EdLhvX*W
z!^+pO8aA*>Q0R}=+vBt)U{h%-9kNkl`cD=Bxw#|;GOhsZnQrvU7tc<<TN-24$XAx^
z!NaC2eZI+%Zzj)LJe7E4q9yv*ywhbP1E4=@(`2h24DSaIlSrgf$;!^^Ix*OM8YPI?
zDE+imv2PlV)7N;8CV0<Kx#cR;>&LF+k#`Piwra`9HXrIZ9hK{4K&nJvDz{S3o$|#+
zKY1ET?9+yd<3%|Hc@VZTQ0%9apRmcGPZ)fgHTpOJw?AI~*BZnPlK*~Wzq+-CwX|{2
zNb~%92$HC?;WBjb?-wc&1MH~h;QrPY+uilRomq<erO6S9?}-Oa*LV;}kN|DZ?oGn4
zr^J4r(`o_-2iLoB-M0`oKCXP^iR#(}J(@7^aykbhV+c!07A22cwCUV(T;aeIJXKkV
zVtZuEY&=Unhb?Fvs4yon!0BdAGjcQ1KVT;<<JhXWB(fyRkJak^ko#Jf%)e+lnp!8_
zXIN7p`bcK0<+Js<Vr1U?dlW<)l*xUF2I%G3zErR#bHOMpC>Vf#X=AA1as*thaiF(%
zVitrrtbvvy+NM`i-<zE8j^*xFu%MaRRQgk73ZJFCK84f<Xwa1zH<N<$2x%63--DmM
z7`}6Ic}{C>q>Qc))6&ENhi&5s34f~w(jOupen#J~FGiGv>rGDrV;!AJTtj{+gJ&;7
z1>+puUw~T6<agX5{WS3VENZ@&RAItfmF<7Ff+j<8p@D$Uzq?LPPU4|1y@)?e6>6=-
z&KqP$;2hmqwQoE;JiVzmQ>pkH*czw1Gsd^4mUoqtJsa0SO+Lb(*1HO|W|#US^p@#&
z)z|<9cRh6d7I*Lw*&e#yL2M<%Hx{xoL7D<1_m!6^j0RoSNE<)5t4mFf_Nwm_QJ&U5
zU?!jognUnN?gqJcjjT{<_@bxrT3SI~e(@b2@SFBQS<mk7?p|=877lx}+|6G~Q9GPz
zUMI0wx*sK$(dYS*vphy?nkot2rqe1HPa+e2gDUpchZLb4YlY1F%g%iNPhH~D#&CP}
zIV)u(A8EKVq0;w`{XH)=kOJjqU6ajj8r%gY*ht|;c1Y?^^l(?vBZyr(oD4f$_Y~~^
zowGwipbn4-I0W&2h9Iw|S7YH)8SvJaPZ8q5VR>qxHFB!dD45&zym%up*WBE^-v)ZZ
z+bB4#doUkFBwjC)+@|wtaEftDw?GeAuC6I;xcs2-;StU?h)VBg6RE?7;t)F(yKz?O
zc1~uU4|`=79mLxuQeF9^R`3wKmIX^5v%twBhq&#Ses<G$FE5&H#Gi~aFL21OkuLMS
zMQufdVS6OZ68b@U@U=y#$aLMyEBFX(dS?FQ*ud2;cpVm_BZ8#FOuJ+m!f1)_h<N(m
zK|&t|3i3@7a%v6biOaJ8Ubg`ntbo#!&FgpRsBWFVFlo31m9@0^S3)K|AHnxR12=xX
z4rn3<#%gvsz5}0&+q1@fFZ1i4B-g^T(W28~gxvZT4HxemId_?FmzaAOZsrz{hlb$9
zb#o~`H4ztwb4+FjO-zi&AGy#W=j#Tf1eEDh>Ng=6nvKK|p_KZMGY%w%uLg5481|a_
zC24XA`lgYIVh9RGNcAs`KMc&P&)0*{`UvE!OTJ>%qCS#s57`K%`)6Q-n`eU^<*+t9
zrwV;p94hLM5)YC=niQ2$YK4ge-DkxWW*a>kFF<h17Q;JW2q?ao@VHS?(AKu#w3yxa
zN=*HliAT6usNDjbndYra2E8uY%Lh1Y>e)=$+0OwY3dnk9?I4pvvWz50yvefQZbHJ$
z-mRtOwzj!kk8fV1X@xraMfrK;SDU9p@=ru2*C~pNhq|dd$Uef2+txIO-}Sg3qMIN`
zEic(5e-_tfuVF5r|A*>X#s>Tn_GZ_oPd|Acn@$4100^XZ2N}z81sYWeRiGVe?PlM@
zqZZY;ZXR?eCW3a<!CBCRx>lBYV^+&tcnDRH;4A_g&s-Xf=MO?1TXR<DLdz^5zG21J
zbiU$gRWF>aC34Pja5OwBNgL+osE-jxRbrDqG}!;Px<tZX7a@NSNtuwq>EqnJY3G}W
z-Sl()Ef#B192GN}#uv{&Za+|HeC_e_)G>aLSjJ8Kl2?A(68G>MlmINRJ~HCGTndEO
zE@twIuW8FxJ<TFexS%n0GI=!vFM3i|k~bE~x~&|qWW}spT&Rg^P8u}kd8h5OmvPo<
z%^u!j6X%$V*w~WEOv16IDn}6NS;O&8H}v**3>@V^(5)kvT+_?6{{7#Otikk$u*bZF
z4~$?qY0>GMMMZsD2VB?K_hu)6m)HVCMkbVWgQmIV!n^aZJFW~OT<BZb510%k;zeRG
zhDhx9fTJn(2nYydjE%ovEa5i3Pe@4Uy}J;+>ri<*bOP-Gex0(zHXwL+9k4d^^8oTg
zZ{YD###=sEk%fh6&nIQAVlga*vue}k(;w)2{ieC!eL;3?99ttkYMHw`DSWhsTZbAK
z)6R3$fE|o{7`2#>@8$8MtT_&KG$yWW2NmyjcA)W)JhtnZN@5NEMD^rU&4Qi4U9G7O
z8;b#C@t{lY<Z?uzru%hO>&6h-QTifS{aDPG7FgE`IN}*LeihcQ2}wy<@ez39f9qF`
z?NS^znfP_%v<XJyYbJ>gTfzth{`^E-c#ZSF-WMG3?ks2H;jGoA-w`n(g-VtE=1uyR
z;npG{)Y#I}VZj%oNUr?r`vZE6uQUQsT?`4DRw`ilSWL~Q=`p&6?^~em{4L5uz1>lB
zg|{hZKRVD^eR4bS%Rn4Y#xJQ17iZ@r-b}k3!cuf|JRF#h?dMd87TE#0Id4D0o+C2A
zY@WXz(y<8ro*&6kKjZ<UL0=~`ayua_r}UY70!6ikh1~FkU@ZRbF%63(;T8*xHoxv(
z6=jImX=*ugT;01E4e}V;bbg`bkB&u)mxM#S1*311-w#mDWo~L3+^Vr|mF1UPgp}=R
z?K3TgK07hJU6{-IQ9etk(KvWAK|DaI_nv{C{)G~@Xo-kUMcejh)XCbLwl~rQmzkrx
zJI^2Gv1`#;leDw%>D4=BNuh9b^f>nh4qwmlu~Wy3;odq<cCc>7%aY>-InZA&EPfSE
zOqskx`OaJZ77YwSO*A}Anng!0c@v-HSFJB0X@Z0}#j=T<@l`)o8P`C%$w}SZ2{_(Z
zt48;ym#$#GJoMNNVZPgSk6Ysa6pWV?P}Sekc_dLYJQOPYpZqQS3t#O_mGv#P1&`d+
zD5BYTjOBje^jgl<995!0g*}~GZ=4XwnCk1xY7>zD(oFFiWa!pPMJSUel74K9yu85w
zAaQxw_caKvY>e8=&&+hc(@HIxGUSnU!_IO+@><<${brf*M(~X2Ldt2#ks4&uy8ZCy
zO|82$)}jL*4XJJe%Z5Lt!I!(kAnxU&g>^-j${p)W-tCiSGF(bS<$>oTrS0Ft<#kb?
zovHC}=!U`jB7RGhypHELu5^m>cY4-!vNP2OXWYTo-_EF&CYACy=GHN8;4@uQaT_(f
z3O+Fmc#n&VF$2%;c#{TH$NEA)x%-zHJb#Pq?k~Y3H~f)GUut7z>TGV`eFB=FvIdeW
zpF<;Y*<tKDbHWXPG7C*q85tfXuk#y?Wz^vOhKvji$K`E#Txt&v1%<P>>PI5+X}uec
zZNM5GdiiJDf%rC%W=m*TrcUWH8dwSYB$5Y=-?3->bahiX7WEhOyXZo5JBWHjwDy!9
z_mU2O0x~Hd;T#cjQNtF!^SF72!Z>z6W@Ed{G@Mncb<lAsQA1|TzyDR<iK@-x<RI)a
znXwB*&FA7Ih+-<yhO`=`?|mkV8aV64^u_xT#bMD|J&hx{y0G>}NuMd4FK$Cu{HAZh
zP<%_g>H$VWZMJVQpC={gcyI4VvUyDICmG1K&q4U}q7>>c<ih&N%krW>k&Lvn*%#5<
zc^X=?96(5CZwgZc%7#<$uYUUGB`;N0zP7EYi2Ny1X>D&PEHdL(v@564t31ta2=VaP
z^K4m5$UG1%mmH?pPA8IL^II7pRiBHIq(V5R#5b)o2+rX(d-SDzG&A2!UKL$+b{f<#
z&)qDNspPlWvW~mnz69qKcsr2lN@TU9Q>k)_WCZ;urXoS>yf-q37C)Vep;!_)>pHFh
zgbR)raC2TF{Kvgu*l_rzC=QxY;QefOwq^u+94ix~0|aLi@eV+<tY?7Zl2f7;>i+M~
zX`}n#4}OA@{1fFMslkYO<S_+j(Bi8^v`a7ildK#Da#m8)LE)>ufjv7zvzei1kjGyc
z1%{?A_INCvmI!JbIB#}&Wn=qLl}#8`t<c5dDM?N1XHbn)hXG$96U0}M<<CZD%QViQ
zI;7YR5A({Zqr-Mk>yMhif&t}BVflG+YMKh`Pu0DD;cy*2c5LAiPGN>^6pFh<^*C<<
z=&s?cejs0`asIQoVLCL!11^18`qfw5`s$i;El^(tg+0I!($U0zTqV@c0_lv!(ryuX
z$=2>3i+3r}xMoz#cRv3j@w<8FIS2dpi1`%V#{=pMIvs%?pMLENTg92dQ@{jty$`ju
z_Tk7o?c&eUH@DM%T`RrBhu@#!IVmMP`qH4%IJ};i)qnKMNw|hqwrps8xBUe}78{E5
z{vS9b!wG$m-+|;hc+H)nXhM%txoS4E5{K8@(JXHB%lg@%xjiFfTZ{b=dc3c+mt_0P
zYokzB_8Bn@7yV1J#ia^{A^4stKZSLk7vjeUy>X)kL<nE?SY6)F9jdaeMGCCPo5dfM
z!CD__VNK!E$=!a8rA^k@hcSun50)gC#M8v3SMDqKF&&I-KUn>4yoR-njK<pgFkSrf
zLvb$uf0^$lMNsQaLQ;9izb^imu!8MMI^YJbA`^3Sd#0ew@y8#&*?^8Z0#`*)9O@pU
zeG2|s!W8s0k-P^_`Q;~6G`5&o<S4O5mmZWy+o}RERjsbD-@XG>yE$XxZL69Wi**-*
ztBHFx{kpcy$TsCNPJv(V$WK^=5=ogYlKwl^xhOeJorsu>)-I(;&6pl-k3$1XRlK6~
z-z72w8s3G*o$8KrWWm(r<VJH3u9uH2+xue&EKdaQ2l@J;AyY&xvewJ-(!Je&JXLp;
z+mjt06)iQiI7cs~HMac2S@Qi!NzqjMo+14nx_uEg`H2*seHJCB<?th^Gf#hl5vF=)
z=_Yn5$+yR}4_Cg2A8l*?gJp#pfR4$a)2gTgy%$eB*o}rx3{QT6Muu)||D$(6mRUPp
z*t1{!&HcDP!hE4*X14zHx$Q|5b3&s!&j3^caX_V&-ulXJ`YbA6GV_RHqS<|WUJ_S4
z>7coWNwpNs9(|mH+N)iJQcn|WoN1u~eIgE?DG<%4QSj$OZG=P2(ztZS=tq4|wFjkr
zI^c1zmRh!l7>H<(VNw<Idq5%3<NkcbgXvjL=%2M<85cMzmd~a(`k~(mh8!5DB!dGA
zJ56W1?Y8R!W)0C+s56@MEBB1$>U#20t_x~<F{<;{dfC?H$=)m>;cF3GuXMg~yvg~Y
zNlMzlOZv#e>O<<Ai*HEIq<S_f%awgJis@*lniPdQz~Yfu6<6rs&VqKwV_`{Q!aRDz
z)yxt3c|-LtO~KPT@WGvjtER>47ny$u9x-uUSy?AIgk`rTH*`a!^>$K*sW1%X(&@`r
zwjF`bumoO>hm9+8DJ3D(l_8`b=;<PJ!*6f+ul5LPtWCci=h(RP30hWL;)|!$GaFF8
zjKE0`;|ynZ7rJ~yA}du&jiXP}|9DuvUI<(lk+n)tf_43LV%YTWn`tIX!zpRj%VGZ`
z>V{*&kQ$KaJRj&SC~~+|;XBNyH+q8#dzi*IKWD^w=8#wwqmr;+f1Xg&U(39Pd6}}f
z$4a7ks44bR<BXB9;VJoqgqq`t+TpNWj6)&@W{uRPuwbcgu>{0TFhrnJb!^FxOe&Kd
zCs_#B&U>fKkl~f9Zi$?gAYteUlBrelw;CEWv(VzxeIh$DUqmK%8N7jEpsco^`Ypdp
zVD;37vSHD}LX7$fYx3KlfoG-ZqtK_qAiyb(pfYLhdbyZ!aoA`?UtMn<7=-$y@6uPI
znBq$ki2PcoAT=;L<%=y#Q!^!C_&{lx1dqcUo*S;1#CwdS;7Iq0Vj$R>5EMl8q@J15
z2}eXRW$v9Y$_&yNYbT81lg}oO5Ul!NkYRO@{|Z!X$X4RZJN}+gmu<Sa@@G1PP(g`U
z3^y$(Wt>F`LupX$ltkrCTHHZ{+slHgWmj2;a^tBK;)QnJ$Eo%lM3sodtq<c2A4p-3
zKjMC5@~GyMIPwq%Z|^Vd>hEthH5x>C-g}Gh{I(Yi*=BlmWrct~iz1aP^3)rfLF_tm
zTzUu75Ed56BB^ut<wU=zia>aE>L784X=0ctq~oCc?A!j&kJI1xpmxJ{8knsFE`ikG
zy;tDBsJUT~?3tzTE$|--FbPtXhPds1%!!{MP;2O~vIvX}kou@g<kFHtan+N_$b?_E
z(^J)Ns~Zv$fMF3xYM$9cfjN0lPqZdv?9V+Wws>*Ff!5q`Ih|e@)}Xy{+4`a?MB_}X
z!H(BaNKCgJryVUGc@B3PkJQH~N&7ADVQs{0_a{27g{!{mHpWeNKx#M8T(~k*Q@}aP
zftMH#<+5#Bq$>V!Y|oy-2s~U~f!Gdo2ALNTa-5y?KF3uQGcLK!({Iu&^AJ=Tp~4hv
zbWGsq<rLgek5fLSJM6_{;6{sN&Gnnzne@j%$Ng;2NXaU+(yG3lGPw|wI40BoMUe*m
zP{(GdzutiF6j4RyNymXRm(hNx3_Y&Z(V3@H)Hp$)JxwCL{tVG_Y}31z#5mvIy`0Kd
ze^{2dPN)dbDJOSU2qtY53eddciI*w$s)wXav*K|V+(K&{Q>oq;?Y@Axgs~f7!JSu|
zPq~&kI2#sp<BW>JiELECQj?SpYAET)5?i6;-ni;<(X6FjEM2gx+f`etMZWp9O31wS
zjH=74kR-0z(KzaeYD_{uUOx>fsZN?4jr2_uoFR4utrSn>DfU)W49oF^+p*cmL9#KM
zV%cM<t~bOIxULOIAC@dXFFbD4D*EfDHz0<$g12IgPv;u{EQ3)G9@a8mR(U~eB%jom
z=8RK)%0i_0%Gk|v4)x?gMQX$?Pj*9q(Qk6RCS-7S9os9eoDT_nwkGf97!s4DljWKv
z`@b!vqq+_dHhhRO>&{*diNQ*Jv-*nKb}Y&i`8$eqx}4&T)3wX*E4NuX^WRvoT<MDW
z2_}A+l)lUVsB3s`u%qOhmvP%cf94M*`GYEPS7-WH`Sm7m&{!VrTIu+#?o78azEi?v
z(0R1Bwyy19*jp9(Q@?UE{d?~!C;2ikYq6fyJ2%ZbU%I{^hV83rLN3IqIrsFr7{bm(
z*|szO=0ZO0QdwC#^MzE45-XpA3nGwiZ3VwG{e#yuv@Ap%26tGWno*HdLKcB+byd;&
zTVJ0!qa#+KuRYqEN3aOPb#ri2=gR&$JdP=}f-Vi|d1MIca}rD5<v!zuwT2O!b8e$a
zl8i|CGdtfer|olMmT;NgBlLG?!tNyuH+P?Yv#G{u{>DA!f3sMZXjt;if{E%hXg!$Q
zD85H^OdhK4kwVEuG`i`mKYIX|(1XAPX$9ltZ!S1pz=GYxQ&~_j5>fo+vwgo7Yx&8Q
z4zKPNkEVx5hqZ#b%JNp@$L-Cv&2a-@qS5|(@KfCi_Yp$RdK-aZ{$en0!lQLp`trvL
z^|yA?wSFn0!P0B4zugubkD3m>Yr=rDgJ@xAYw8sH%nNnt(Xa%JLY<zkMuz_G;P317
z#0Q?;G1mmk|A(rt42ZJbqD4Z6hC%5Vh5?igN$DOC5LCJw>F#dnp<578Kf1fSh7dtY
znxT;p81jyu@0@$@zxn&T^E^A&UTf`DB>)4Z+<d5qZ$QP`K%+2h`$=DZ30M0YeF9DB
z!VitRc&v)7QE{OX3JU=?!-CoKVTb9q+oKY9cJ*@uop1i$npFvIAIV-e^vT!a>%^SR
zJLc)Vc1I<h4D&w^$(-n|qr#t%M1+2t?x%yG?Gb4}aVhl|mR7~T^zLDq4)B>zS`=Dq
zkAjZ$b@uG$RTeKZDvuYeIJ4CqQDt=V{-CZ9S6U2+JEHE#x!wGJ&+@m^tqG~oOgjZ?
z@jCFZ_zX!ii9gBL=guaJrFrwo4fjL8h0_r6UIaDRr78aYmdR{uaBRUp#7yOh^a2oP
zlZ1A&B!I{0@R)O$Lcs;-`H(@FRwE6l`fQPO_EcmKvMgEr^x3&V-*ubk5mUMbvm;zi
zB0HW@Di3(XK<#`Pu~RIERz&8teR}Ye8+A?!I#fi9h|G@H`15ZY>w}!C>vf^d4slN{
z;2It)Y}op9thW52`=*L0z9u|;YMV*sV}h+6#Vx|}U1qq)()r}au!b6}-z6t_<RUeY
zWdhQOqE_RzN7WR2{{Yw)@Se8^*U6PG=~hdpu@f_|=wk9J?7#Jq(tCsxT9s1a?-%^p
zT}8Kf+EoX<jW;`?74#03eDO8X?FqHbg5E`)n@l)|J-0FZ$>uIuK{KV&3~CA8>Uo;%
z)6W{b^<w{2h^~(%tBc8;gqp~gV}`B>Yx2oti}shhUC|F+D6iY-AF_cVErEY)2n|3`
zn3OgcUmPgURo1hA2CS_w?+qX5Tu-vfHQXVRUOPTI^wi`%@73^F)}ST}w7jyQ;3}L3
z_uFbWNpkLJQs~B*oNRd|3MtCwfCc6Cldq~|^$-%`CRFzhnhUVU%1>{smRspqtAQUx
z6fI=w*}ulH`+Fe=#Da;#07R}O&g@D+fq8)?ktmXhBxk}LzL;eAgU|e1{kk8y<=GeT
zejB!@I5M18C4RQ1OS-4c{k!J2mk2aC3!N8(y>s7LpZ_MSxK58yVzjAFpY;A#i`I7(
zJuG-iuIr%yr<4eRT`GN~8?DVSHPiezIZ?2nNX9&0up2Nh<#Jd;o1VoG08UND8ZLI0
zGFlbykj;ST<0T4v%z;)*d50JcuD70@A3uxKBr&vwF?NMM@W!t>r_nVt!}T&H9+Gy&
zyr%h+!`4C&!><)~yz*tg-|se;R2~*F#<(4azQd=_>Hep4*R+xAfPg-+^w}?Y=z#tY
z8B#JRj@w=RIy}r7U%AnP^@seSWO%0S9)bR)q*8GgP?2G6i+}d$>WJ^C0}>ocSCm0k
zKIyvI;ldHjm-oppZSdH8c4p;QgZ}5CNyhn0qpp$G$N1b4lXo@R<{V}+DBfef+c!*S
zy3sr1Yz9HK1J^PmeKXLEr5uP2{YsGe;br*41J5VxH6r`h(b|Bg_J8Sg#z0WlN4ooR
zJAK;bZ{=S(1=aF*_=%3ici4>qk%15U>aU3cU)4J4o!`j25a1E$=lMPIHkBl};L&e&
z^L8_j#Vt(XI&1$r6n^k{s%I23gd?6*zpw7`(iEX5;^&g8+3+;86VDZ?;eH>&VAbqL
zYdFehSbvu}E5BD2+Z%sXRU|&L=gC1ih1Zb~lk@T|_HdG!K3djHOoCdx^oGL1KzhX^
zWxF8qa(~do)8gMHMgCpqjS4Fg#QBiI?q>Ox&rK)JXURodX5VA=WENS-(Ul)tW-KgY
zlw4}3IEm{S_71yWCT}>TM5)9hzIYb=iQ1B2^s0Cc-lSCg<{=R1y^xu}azsPE#iwV}
zrYL?0gs~~IJz^0IPk=@<Vo-{F?V>LrS0OHZ4K~U-DbzC)1k>tz^6R?$HOe(>e&H-<
ztD*0V=m|mOvtaDPd<l}+go)4t!r=c%VHXi`O@80k(q_wdZ>>Gbf`ZFUXU?FYIj~1p
zv?iZXx~tQ2O^Hp3>9VNRAwWl8T9X*gFscuIqL|SK3>$jDx<*lPetgBwd1-3>pr~&k
z-fNm6V;#_(w#2np;Je?@i7`|3<PVTm?^nnkF`L}S$L~H1t`ign>^pJD+Oius2pXE>
zEJrRPDD;Rw_$`vU_yh>n*wZ~Y7|1TYO=QxJ9etDJy)kgQf_1~@@(L;C&yh^xWQaS=
zx={M(Goi*FQzU4+A+2ELJjPys9#o`&1_sQFV#LgI{cP#Ey_+A6SQ212(>shY=0q<H
zefT!;Fm*QPCmio!#%@8s2Qf^Y6^W=dn~|YC3S)TmJbM3Az@a;$hDr}S0$!s2#HTzz
zOj>5@HGDD=c_K=FB!znkXq0;na;W<7J#)#ZE|`>i4JiVT^R}R!1MxdeJbKiRM<zZr
zr}=C+^`nW{0b`B4eDGUEG8x})oi;zr2`=j3i2iDuFOR;7DFl`m-Sk{y?w@QV(C*f&
zO}qH-((ShN(;NeW>(hEnKd1pa`j-aHvlkyKt}X}fll&r?Piv`${cp6%_H;Q+!=*J4
zf3v-D2zWK0g%RpI8Y?2>DVNsNU!J>MvMk@zIWTLJinHu9v<{rcfR6rt07(se#G8Jk
zjLZHELj5JVjke`9cv6GXeu=)!CqqWpOcB?ofv|<$g?qx!8Gi_MZkE8d7}nisi3zOH
zr|?db{HpWdkHMlYVokato}<U@qvLDIIAy62(e%h>-?D7T>`C>n>a;lqHpOY5`RmOk
z4RS75XaAYxiKq7V^uCipvfsdpVrhO-ZkxiXZ1FhtrErP<t?Ew>1tU?4mU%YW9s%bY
zy^6DmKb~A?#+n<2)38k>zn2|YV~C+AJxv<EHW<MUJt--bwQCb2Sx)#{b(#f9?ub$X
zD8XD6@BiV`v*)!Q;ka2es9%$eM80~3CZSs?#QZj*%zWn4BB)ZQrzL{ip=mBCwi{RK
zRE{<U0<DR2M}pOO)0CZv<0IJ^0cz*K`!9^D&Q{;6(ij0>U7hN5O*nOjs{(;M73!W3
z`vT?`PmDwD3t)XS$GuBet7B0KSlppuPG^WDhtLC=b({MdxIky0SZL884#t}+GTE;Q
zR?_-WT<o!@NT~npR*su87Rl(lrFNzRu|-j_Mf;*bJ0-@0i3msQYrjE7-XY|<T%Mw}
z@D#Dr11()-wkBABkp6=M6jjeW2|fuy>sk3c5zlWaukF{eQRLn7J44tkBikJ@dP>0-
zO^u^SlNrURInS=-yqJWjY7!^}UPBb(C)_#4SjtT^FfcJ6Dc3Ls#@8~31W8Fo_^~TG
z6k^V)zYh{t^JiDgv1Wo4bc@SKkJ7pX{GNw&Re9ipz?m$5Nr(>N&d=MN&JXh1cvBQg
z<e))Q`un>=dd|%6`;kYq*In8_weCL87Jp7bn9-aJ@HO7RNS=Qi*(eJkSM#O=ERG^>
z7lm4Lq0O`~jKsj(>~5mhx6alt9E4;WYRmV*M6>R~{I`OlyV&V=u5s=X556zbZcu|*
z6!N_ApC;H9(<$IUv6H}i<*luN00$2*_B_#{PIV*gF1J<55>})y+tI_=o6wWMSeVnW
zFm&lx;4MIN6`odPX!tRGZ4ZK7vgb7)v8b&mtxokd(wU)WP+;YE!iKhKXF(98?t0Av
z;F<(7$r+8_CTUMw34p!XerxF~1#a#!M|G8Pco7UnV~Q1<rau_jO%tW&CZ7dT5932W
zKwh{IiZ@Q)W`qVSP{_*3PlQ>#D#2TA%OC}YcM|1$CtnUY*X2PEHh{LS4$br}YA)g_
z2HBpAeHcjSG@gm|3R3%U(`)qwjND&XE5I}3Ywmbq*4Nyd?@8h}^lS_a%AGr$+Noh5
zNG)#~6~q+YA9M!dK2Qt5tooXzEjlJ9I=RN8g@q=pvEp%-N6l^CfCY5&ihlk<kDsB-
zNuo`WsZ6Z)ot9DPl0Z-5vV6`^+!ZM%y%@$f8qr*A73cchOL&qaxoYuA?j2=Svh+sg
zl6GEnF<kRa6}P4-+`8~k5u@unk2dRMrvjo`HeJ^*%IG0TlQo9SA+jf!Tq_W>47`md
ztwe~Q*8t8P?<?^N*Hj2B^943kNl^ia4HRj-oDm}?P2`~0i)S*~xD}tZz8usbXHvx4
z?m1g^O}`6RUO`O{lG$DEuO{8C{l0L#5J+#BjB(=JkpE|+60giy%Ko-zQ9;CAeUOnh
z@`-FXEx`#+YPIH@`%C*5vvd44jIY^G^ZmEX37AZWudFNo9{df}quau<^i0%f`e=*-
zO{#-uL@R-w&yk}N)#l@OoKO15(RN}MK!I(2VP%T<6wsb3%gj4TuL%6#xV|NLYsXti
z_B!S0MYW+^m3Wz4Sq>4krv^8i@)`M^U{^nR)pCkyDD2yLvwL@~Iw7J<8wrdOwVV$#
zIV(<OjeiaL$E;s2CKM8J6ps?er2-t#rRBh~W%CK`!D0O-apKM7+qxJy$(?XBy04M8
z&#wr(2&QzXIfmm%P1i12#>Hd>8H$4|sXYmJ&p+G=rr+!~jAcr5<$=mhS6*#Ljj@Sx
z(R0a0<v34RkVcqyNefVE@;t0~FvCu??u9N_aBkY+a)~LT^e4ma;%4s}y#Ur%*Xt&w
ze1Avn++T@}U&vwp`eh;GTiU$QXDmt7NzWAsV!7Qk<vBwnZuT7a@?Rs@M#UFM>cG)q
zQrIEn0neKr2X~=X{f-p*o5e*?GsaMti@S(-9<31J<W@%7Qu_1E<d+lNgrt5*V|_5>
z>Dd3QKBLg)w~-8KzhDxBK;mnNT)X{Anb_I9kSSqAEg<2J-=k~xJcF5)`hEG|=zH6%
z&u_N)zmZgNEH2HMML)|5Pi*KY;9C5l%(+sj?*_Y2j4AFby*;<gJ_U8YMTgoBi8JAb
zIs|@-?zgUB%D=RpQ@B1Z(7t&Wqg|c&-TOb-L>K@x_v6o$=wys(yJPO+bF%n`rU_+e
zm&z0irFD8|UvJB7%5^=JJ4|#(o!BYl?8%-*@R>7#4tUpPgS;?3fcSCKIWmo7=S&nv
zGb+N!(6hjVg^9A1#Se;7EClHPIMNH`#$H$lW331HjTYo&`x-xy6U=8yZyFGrX2oJt
zza;ftLNdoYw|LkSGwc-*kTFU;P(~XFl#)yYA{`Rk>BaOo_xHpj-Wqoiu4GBBOSUJa
zfHW*VZ^@%6)TlO8$K{HtyQz{uFeeI;5)v8q)^>rMh(2SLjanFK^Rbj|q%tmHX>E!h
z@GgE^ndfx#WQAsAveU+!KK#o|+aiR6L6mpxcv6_{kn{-hRzGAOChD4##;Ab8HyH<C
zTm2=U8jpY^d%{*}*{FFZ!XIHKRuB5rw9W>Nt%(j!u=7aKnvan~v&@(0*!uVtfzT)0
zq3$D%lTNupjmkh!%6))T!>?{U!eo6{@szr{@G^<5aYnodcylw|F{x}adMP#-?;72+
zJFytvm|?O~2bkX5o(Q)k=GS)f>(RXVd;)E_`%O9WGk=AJP+aHycXGuY=Cru;E2<cA
zVLA!$svJ2cfSxj%$0o1gXTUe7wm~^-%tz?kkUrWK+Uv60X1~IbfF>4G`rbQdM#4b2
zgyE===)ilFUb~ax220>Hd7o;7;G6!XO8ZGwAd{&dmF~YP#LY0tv*8U@BF+zhpKEwa
zPde|W#XE)l4;2I!XJ?a+*xvTD&}XE}KF{Z0S4@|{4zNgI)ah{8$(ogh#(xn!oZc!U
ztHUQz$p^j5n>t_zOqcW2;T%yz5`HJrE%*V+$+zjv)56lo1mWHX%Apm>LfP2+d7Ykn
z*Jksa%K3G{c8cVpfk$?(*XCGUejrJF){t~3BslL}dWZDY0kyqT{+NR5y<K{o8?uUe
zx^o4_V>^ZZd{Y7b-K_l6z1ZkIyi$UJi^V;60+$JJU>#Lw7~FS8S&*`l?RNpR;z+i2
z?pUknK1nX<tIgx%J|7&AUde7@CZkf%Rr^~%K;yg0jyASe>!>`_MY$sW{P+81rKYm~
z5UZR-&w98nPkURAk4XuNC01#*T#x^mF<zHa*`j{5+a1-KRc0+)&#ETeqq-hSN2d^)
ztBY2B9KGrGpfQFe9JuS$`|tKfX`|(ACa@sm5ppCkP8>B+h#Xr?B}NElx?+dlz^+LS
z#J%Qdg0e;4ZJ4-3gPg39n8z64e~|=Qqmoie^m-XxWI_<zo#)T4dh7MM_VLI$W)~)e
z%|XItBG!e{XG$)+_SO6&X9?n8{h61{h=$M6_)9Ds`kGrYb+wWuO8^t(!R;*R5gj-E
z5ha<zjuZVACrTC25yB76IiuAB_&;FhNuzRRgsg0x{q0)OYg@=T<Ac3&`^YVQ5=~yk
znQxjy)ziydT8ufR=1A$e4X0Q=><4iK1(T`sm38ai{PLlT0{r@NVRSpC<gu&EHVrw{
zIo16gW&x~b_ZLsC%_yPke5{Y_(b!{!v4+nfM0PJ1cH!yE`BZ4=jf;EBwPuaBfv71W
zJ~|5`_rM*-z4W<kaJf;f4Oa^Hu6~D(0&E76aEVoWZ=PS4d{96j`(m!WH_m0DKPDoP
zugQ6K#m(6V2YDLqzfJ=xWC>B^$+Fa)FI_b>S?(2r75w9h$ZYBK(tCUo7d6Hp$n~51
zX|%?UTzVSMj&lM?1{C&;Aui3>Q>W%>O?S|1cYdwKB}Y~M6QZ+xb*S#XQ-WU!s653E
zaTq1>`;5bT!bQK@@LtAt*j+(4y~wl4HTYm7hdHD`a^2-ufn$>p0XB9c8QHt%ROs(X
z*r@76s{ekUWb)t|(%b=Uu)NOv+jNgNDFug9MADilU{Pr=a-{P3!xE*IF5A2n2gA$d
z2zcjyTTk;AGK{G%Qh(9hI7C_pod&16v*#LSzcL??n~*!#u@fr$OK!<%ZX#6P0LBia
z4y36pz;<?Uo0WbFY?fd0@6zb4-=gvJws<R%C^9zaZ9UwpoyZAW7<sC0t~|!He@_Jf
zt+%R={4P-IUJ~QN2MRPGbVs_L(=bMpESlp;KUH!dpzV<CEo2=opgEqa#wJ!HerKvg
z+vISqEoo#bxqR}r;Wr_EwM|%*v$!wau2%*BRV;X&U@Y2KU&A?`R?VW|{zhxecO;!1
zQ7Qpz->0lj`uN%GKZ$4^0P-5l+C!=N>t^&@oBDMIC5!x;AL_f?%mP*h_{jAH`@4DS
z(t^AAA9m(YpliWqUw&^Vp^m1sJq`I>dQ9m%P7!^Uuk16Te-a;1l$hC18kLrm?IZ~|
zM+E_j-;rmzdei_?iODr=DN$|-nHw|J87a4Q{r!&{c+t6bh#0m7$)5FXMx10b3))#B
zz#Q7wv(WU9<tSj;MO)R~CK}DV;G^ok)(#EDc?}j$^h=h<H9^g4qE&ID&C@1?qsDug
zqpC+zrmUT$ee&^{xbDbXr_OtKK&ao4?hfT8T|7G7mNEc#AwTyfu(N;%d9af2_@d&3
z5f2VON#;`hR6VFa_c}(qLh)<ff1Zyd@gvBqT*Ei-^TdDdUmuJ5W_pjoo`p49nak-8
zJLn0ltls|E`V8kec68u=xR-AY=C==h&ZB2P8E=1hVYW<G+_s34&@b^T>(QV_)&cld
za#UNGy7VDIe$O#XT(}OM`fIs%L8?_nkZ{Lz#E;w8b6(F+Cn%EIOaip6C1aJ?gGc2g
zDP*`}T*};>Qx1`{)oO?et8x|Rkh@jJWxRs@GN>tW??Ek2pRz9fvIvEh!88O}AdYT(
zb=a36;ug4BZ!~fOH?{usrl&X=d>M+EuKle(*YCdCBfQ^D)Ix5!VnAZ%JYnZ~<R+ZQ
z_CawS5B~0jnW&%umJ)0$u^%Xb{s$eCeZtzJZ1*rJ>RKRmB}jQK7=}ok$W+~V^K0qX
zZRw#^9o7$vrG#`&Nd6sK_S8Met0T4O3|X1~@|b(&QDfcaM>rubiF+0u?R3}KEmSO+
zEK`~El-ZQWkADQwwAyPQ5piYhyF_v{N{Lv%^X<<g3H7_kTp?sI1Kb7^m6;cunQ4=U
z5r<S=;n@q_zFB&=O`qOb*4BrG?|tCZirfsL%*6fqv}gZijM00C%GT9%kRl})g$I^R
z5*0keYxq}aj4_>{<H%DWml;S)>?=8kb~DUS)!GCW#+lw8ys2)TwBg<WgFMgvsdxjG
zzV+Ub$C*=T5mTal2q&uuCAeTpDCz|t&G<Gl`UTqhd@1`(Z7<hAFVWb4uJ4_ec{9Y;
z;6FM3jm6ic&C#YCDG)$z{HSk~y-Xh#>+FXD(go~2Xq$$lew9gIDSP%8xvJ!lGnJD!
zNdP745F&If%_Bkmfsmf|Yu6`xTe*;#e2HCM-O{4)r)H9|MT>P3L>)0+mayAMedD`_
z$If&QZ6;i0rO(L?j6)Q2ZXGF<9u%nH$;#1zW>=0+41{)FVyo}(b5wg0&~g|TTf%)R
z=4X@cl=>i4|5c+<kf6gwG%abG!Mjo@gzuHwgzvZA&@|$0rcdt(%mgYLh6{~^jSCfH
z>*G^`gVc=~WYCD~3S`lUF>ivt$C?w6HM05m5J>Mn=%}<S^WMF;Xl}kMZg-$+QtF{~
zv9ZYXHl7P`{hp(<Ey@W}3Y#^0J7o>cvt~Da%GGwSEKia?nnAVs0aMxI*Ooy;HJwsR
zPSJ$(vq%|kv*=KF<#RcpN~fCPjX5tT*fb|RaYJT;fZ^At^tppR<)h)sInn9~ntIu>
z#+Gn+^qK{4UAhbPBhnsn_Z@WzGVBJd&n@GSp^;#w<MH6+>x6$}K?w%6FY0|m+!Q$!
z>@(P%8Za)#1Xi3)(Y`1O{Fft!>t4JVk<(e%CjyOXg&;w$b=oX(1i#l59+^tjyaD)C
zr_~C@@YP;j=A3W5whlzr1z(wrPbO$0$C4{6fYp8^Z~Xsd5#$hqhDV^n#eF=07$P<*
zc3YI-$x|+oWrm5zZ0Oear_dgNvIKp~R8}66IWTx@$K6PYXdVah!94@Ki@lXCQoT{4
zWPQisMK0ggajpDCg!KMOv)_Huhjl<tx@<;`PN|~!2c+qf>z`p|C@#D+<oOGQFDfvB
z`~yjP+acf7PWU(J2=dqYUqF}viXX9)X3G=l76}9V%6I);W4`*;$EobXOisTMeElHN
z7#Ki!i6M#v9-;ec$Kb%c2B9=ig5AoaY+H{sLg4GKq+rmpfTzxPrliFpNHhgYb^2NC
zoDU6U(j^w{110iO#$}`NOk~k(uNr?1jw-Bwf?KRJ?@Z)%S0?Fo9cKV_DoA5snc)wk
zoU2Qc=}|eB7Y;@CC<f+-kMaMiqMkiNQ3M8H+yHov|Jj!R(JcizIH~X*J`)b*%@=A`
z=z+GzAc=tQBW0>6O3brqTm(YS6&LH45$Dlojy~mQQOSP7Zu>v5RtvZD-FsRN5OlXd
zSDvo$mGpi0ipwoCaH#xhKX4N?*IaW*AcW+Ma5jTbej<#<#mPL50El^asBS;opb5Zf
z?30-*R}nPxyWvPEs=RYG9r9(GIF%$p^w!{T6fyvhBtj?kos!F%tS9Y(BFfiR5pPKs
zB{f&qnTy?)KRz{RNO)(^QD?=dd{5`?c+Us(xw@}guI?Y}aC`QAyK}=(Im>@ozRu5I
zzqR(mk>3~5QtR^U=jk%>(Tk1`s;vo6f=iCPIFdYH=atH|1VwNWwkRj`cksVo*Zx>x
z`)@0Nnhf(sD8>rqhu2zFC3agR^?*Qa5qe+%1*T7#QNRez+<>iT(si#u6OXij9$v<i
z2P$)-u!BV|0^Ql70EWGA@Ds*mWzHJ_2}y-=3}xjVuU=;*SLMFAi^l>il+UVeM=`gk
zMGa&_8#YP@1q=}a=#{k8s({1w#LV;?*20OL#wgVYZgZgJn2AwOH8UpHCV`Px>w#+i
zh8ZIuk;_f$Y%$Vz{8$D!>y(TM-9hO~>+ZG^48VsN#k4Z;Z^2E(^v(GlxtW{F7h4*G
z^KXqFFYrC6+}zDqY$%cqzn=C|cvjx3S0T;z{9NYh>MSLf#;4PH!;9YeZYnEuR{1hC
z#Jw~;F{t1F^VMDQ=93MVZh#N+LmjJ2S*O6FjD?$kGxs~RYMkA8`E1=jc?^sHs;5AX
zn37%3Vpt1hbgSZNcaH@Z7evMp>Eo_eQ^Y^%lAdr0TQt1Ej@~yEnP{6?cC5rc(BXrd
zE@krS);rN1Mq)3H7sPLb;x;^1&QR+qLN63wXQx6W`G<cAB^E_3*>d=&-WHpKfid&t
z(WKwdGzl1re-tFPiZ#l@$2hOoVRlE}HW|RfG9<I)oseh4wC!}s!}qD?=c`&PwP$_>
zpWLGk=atr@obkPYR2!!`xt0bUtCkkwX?$Pt<|M~3P@WF;3m@D(^;C*&l;k|jKLx7d
zUtOh<Jf>QsNb7O{<e?Mo4Z|QL_UiCzDfDo~1Si{#QHR{`-Ww|ccH5%AtLwf4E9@9f
zjv0$W1;W@_Oa{(A+&B!z(U*JZb~Mp-F!~)?pG6Z4CD@X8YpdRGZsl;)IaJ+Y6iTm;
z38X;Y)^Jp)vgw=!1~@6+3kANam>8!|cN5d7c`DebChMG!+TQ)}GAjGi<YDLom)1_E
z`~M7BK1u?LmY*K_fnLYh?f5qE<cL*PLkju8>kMV@C7PV-7qNzNW~9V$!Mw|D{!+Eb
zlLoPE<fp;0l7Y<a>O6<D2A$vAM%1ih2Capz<_@qKdhJ1h;-0<Gl-!rK-DSD)bE?VW
z$`Ok3iS+HxV{I1lPiUw4V?Rki&E*{4=yt9fo)-l)h!_>s?iNVQiEZ<eM`0grSwka*
zt15d_=B@M@;km2D9af`x3~&0A#J(zOV3d+DlEAFHl?2A+3t#1btFc8_XyG04*2cz(
zAsrj!{}|du2&m{L*GBeaZl*;c68G_*9jhIUs+rP~R4II0+@@>oxTJ2B$g!dp^59)Q
z)|xaDY5eI32a*4%Tsm{=NI=FvihktGYMZs)^RPi>^+nN(_!qjv7bo{#F~&y%2Ku|#
z`>h|5=9E^X45#xZDt?<wa1C2`f3U*jwV|lF(wktN9|B9foA(r1coXGC4!^MD5><H?
zH!m6L2(Ql=3JZikd@qqL(;nJu`-?%NAXO<z*w-Q~&IYbb;)oeFDs1whM!|)|pUhM!
zm3Ru<UVH8S*oT4cHtYLLUU?f#o||uS9NYV?7s92~vlcHDW;RYgIy+867wE73yMLhi
zYl3Nv*QdD{8%6GTWD&0+vhd?`Ea#-dn!H1-6`jP*Aa-}%&6;*6i_DTQ%Px+8RD&s~
zb|L;)JUO7cqRrfS;My;G!n%&!FWBm4DoZLxyR}@9mVVD+ko#g;!hb$zNi;>V1P8>T
z@@xae$9T1?!GaSeAqIbmvQ7FybG?c&i%SvcfR9*a#XuAa(MK~^s*A9_8S|`J^GbNr
zuPo$*WrDso%zycmqX;AhNks_-4chBQBOcV4t}N*d)R~~`XPf#<zCgnnsoe<`^g)fi
z!lQi3^P96~LjQE3JPF>_g>svGM;^KR8!EWSY39%gQuZcg1NWU4So0ago$lZCj$$mL
z)Y|rhBI+W0uQ(#UJF?n3k<gH6()VC~2N|d2f8MK!^9R{E*@V&f9cR-D_bmlJEc$N;
zF};mCZQ5Gr-~L?LX+rj5Cc~2O)ibG{)e>(AZ=3MGQHYOSdYwl<D?x2&T2WGmY;e<5
zO;@?`kzG+z3%lzS_U_r-`c>0(<3j4Vt&=9T_Mle<tE|&xB>8*ofpx<&8QbZ-fpn9m
z$Xx9UBGuHssEzd{;JgG3c1eaM1!&cE3CN)iO)RTg=JM}ZzeM=PwOb_8*{YPm&4Al`
zN@n&PhQx`=MVLFj{yW0liJwWo@=J=EJP+mPcn;pG^OTp3*x=5Au=A&~IdN5SZ!>ni
zh>wDdV<_iv^+}oHvS(q)vKx(l>eOVYfc{Lr_gDW+yubA_^8c%go=2i_r~s(Y=VyPC
zqza_mqq@oL?0jEg+WIMXuRw_K%~hp`wk_q4H_(BM(tKI)zV**QYu0LEcpLP9smvJU
zM4ZU|{J#Ex`cPyj7vl9QXv~$DK?7E&uyg5)$do*KT=xA<#m0n`tx<u|(VcsZL2_cm
zxjAhAb)Y^q>B)5x`4%4U0JBbSj)lhd`x=VD=*s>5H7wdo=~B{iMYm6}jK;jp+=e@)
zMh<GpT7d-K10Vd6;(CeMe&DS&te=A^w!J-(ZMINgv#mU0U9nH`NGSg3Q(a!QSjB;u
z=nSipfv$r0_%sU`^7eoz^`5V{7J|I!^sKtC;d-I<Wt>H_`PP%+oE9*C*w`;7v-)&o
zF5)acQHLmCDF2_2b4q=<xSv)3@}E#6Vs#RyS(x|lqa}r%bp6C?foxH#iXhKQvGIew
zT~{fY3Ok#+M-sYY6GTM!yoBWPvUVxZuhDqW`H_L0SCos;9;dtEhDT?r`$-q=Z&?5o
zo4cg#0^M#;nl?MMh_u&dZ3J>e7VYYp_H5xVIaFpeWCrDuq|!{6hHC7eAQ~lixXGgp
z2HJvgYJOu!$QImS2?wikS}k}*d~hAv*JpR8L8RM*wFlK_u+7Yb-T`R?X)2H*^m{VY
zhJM&(Sd&TYzPaZxyyd1<7TBwG3mU{{*7BG6ems{r;~)|WxAOix<8WcOEe}KwKp9x^
zIq*&+H1H9_bn5s5mCqxELew(Z-Yn;kW|@s*@j}^c)=8}S!s3{T&wESW!YWc@e^dcw
z0TtbRlDNJN;#oXu)1J>|rVOuIygEQ!UY03#VRJ;*Juk4FS7h>=oM}^T9UEr4MXwxK
za)~eu30%N>ksWH%VyPhptR!p{DI!V#Vq~)NlJ`_>`=@#4kin8uAhluV6iI%zYQCfR
zx4HGVM%5&=^tE9&HWpuz1C*beR>4{l$in?(t1-w4%(m*D)VFO5<-DYU$>pQdPZ6;g
z`I2#S8v$mc+Iwi&4{ElwsUy-?6JV$|)rq}P2GbzR(&IqJoY2|ROufC3gJ1oI4I4QN
zbwp)}D&0Ph(|RGp1<@~+TUG1>&1abyWTve*C6m*hYDYrm*YK+9ApLSgecJFidZ^#m
zYAg8TKsw5UasJEH2XrDZCi=I4QBd%6$2`lzWb4qg!SBQtKzbmnLY8<VJePkX0Z+U;
z$rYMo{&Fve^!IC+V<0<#VKxMG7_ykoG&#*b?fyXm|5*t0gYO^`dHXS0whv+w!Wm}H
z+Cp_L82=HfvN<(+)rh$J0(RFKoL6chO$bLhIpNK?tNa@saT3rBFufVu@*HG#xt?%_
z?W{QM>3a?wd{D@9->XYwh{DE1^xWUYU7XMJ%~?Nyt|GZ0#sM!Yya~ry%%Wc4tVii9
zS8XCmATq86DlO6jyVky8lKR|u=K~}+c0U;O+f!f^g?;4j+h}e$&0KC3_nY(bd+$6b
zl>0Qg9h)3d<7lAiO0d_!uf&;~LQTTc_6ZE9C~!zjWtECOnA;+$IK!+aHR=V_WRk#;
zB^AQG#5MG`0u22i!JNJSx$@X38BicL7ew1fXU3|vng2s5`h}^gLXK%iZF3^)Sdt1w
z6q8N49b$txrO5Loe3_t#(a2J-@inTm#tkom|If^(h#2&|mr0*WOQ0C0lHc5CXTR@+
z^AA@Y!|}sV3%>>{<lRH|f4u-~3yIO4tT5Gkg<t4SCi-Q$@9{CzuMqM+Hf=;YMFtlW
zqMwH6A36qHSa`-&-+jHc@yu;p0+BEF%+QfwVx~!GZ&-eRm#v&leHOp7LzdYU=+zM0
zC^;u)h<Fp?^hCO0{&;+(<dvr;oXzM<W+47iuo(mN67N!D^HFFi^AOb@<!lT)b;)Rk
z+t<*2us*@Ko+>4;$Pw03u3YVKJyW!cRd`tp2ufMTcYpQI8F>?bkYIF1=Q9-6e9OWf
z2+D=_dCgwV5efH3Hv0>zmOY9g4{7NuXS<w28VNjPM2YXVs+m>@4a--o4}>oygMkrC
zuY?sE#JvXyy^JTuum^<AGxw1~HsProF*>M3`^wGT;XhXi<;Vj<la|-G_&J}6gb_;z
zCaGw&k963{AUd=jV><}B$KwIe8)dcoB#Z7xn9K9tXxY-$PEk1qI3PaOlp9!J1iq$7
z<#Dm0Q6v#b(;R(FwARcYu{wz4Z;|?GHTQD)G2oF&HY;qcLNG$G{q(n190|^F5@|wO
zQIq|!sFAZsul>Z}`)?*KuMBL8zdB}SIybPT1ZB$d#+(0=qtfIy`gQ7e)k2?DlaOyV
z=I#jJ*%lH|f~_TU>u-cAM&UcHC*)sL$d!ySZ$>C;Nx~<96G3Sb#Ab;#iB7bgT%4UW
zP9eS^14h(nU_QcDdyp9U&9hwANWj_QgXJ?gg#uXVG)GpkwmdX|nDu~GQ~tm`ORUmb
z6p!jSm`pwPtdpfl6>K`K8+9Vyaql-t$)OGw&{f<Q`M2_WL-y$Lk04)=Yaq0gGN}7E
zn?ZYgSgNN-77_nkrXh84k{QvwsKs%k>;ZLYp{rHwrdLs?04+rywswnvQo6j293xJz
z_SXd4o#uq{{F`a<^tvpZx~WRuC`lS96!)CO#i~edcS<JoD@hYBdY%5v6CHIV@0fTe
zO~uP1iG;R+oY{t~XKgER7+vi*8Ub4bm(t8cF)7#dy-v+!6J8_j3pa#pNByTf*vk#`
zVLYQY=$PV&WQ8h9mgIUU0}*?`qWks;Lwoc#-J6pM2V=p9y&Hgo;a;U?%7#N&uBA;R
zJ*Su`3)HT==_BUMaSPw1nGBt|chG=W7FZn?s;6~n$1uz4^4__Bje(KSINuZWRhb%?
z7ht1|N!Emi_-wZDs3K*hit$*Z>|w<RKU5_6INya{VlgEp-@vX$E~>w<{eLm-c{m!B
z)WOF`<w<}zCYnCFGw1#mZ4%zvLcQ&`_fCA~=d!}|Q494kdednya%AwDc}rN7-l4d;
z`3+%6e|$&}@XpD*52cV*r5(*kP@b;$OG@)Ww($4Z6G3zGE9+O15rtI<7jf<VU#8iz
zeTv(gP14`jPNU9*nz&K|;PrmT={G&2Cog;>@Dc9DsVC)%5Ve`h(e2he-rUIeZ046l
z{VW<JP*$F&g7QgsBji6`pq5dSe<GHq&u6BVWQ5ciujxegv4~Da`|fh-0h6*OMXf`b
z2&~c|)t~rNF!3L$v$B=G>WAjoXQfKfKzq2;6EMmGnA|Ur^IsA>hJKqH?fjfPu4Eew
zS{2Zq#(I=g(MVRgaVcsGr5J0=oZ>BD4E-{x8{q+{Sc}m0bQdx}8+JP!gvUMZyWR4K
zkPKxM$k}LuHJ@m^Yy9_R8+!q+(s+fHzf54QRgrA$oC1IiHQg<O3q)tR_WJF8c|?o5
z3l;V@F3=)`Hr|qt@-;C8vxm<HAA&9@7q)p>ZdDbF?3Q%F*?UB*jX|!N+VYVh%7x|A
z>yeuoABsoN*n(hkS7Je6p(&92XecT13CV7)Y*}K+kb8Z)+|<5x`G;(Ju4&LC*vA4@
zSdZxId#~LV91Wd->sR0AJb2CMau`alDqndTt%N*PsJi{_MB9=+;>x(6IKwg}oxQ|T
z;p{*HX`Fzue4TqEY#gn<nfHUb3)4&nSTmr(C96S9>muZG^^c0pXwJ(CD+8!2$%^g(
zV#(Q4$W0=TXd0~xoq4z@5S}pG1wqdoy~i#7BwO}caZmK$$BP=+fzVuLH3f`GMHON~
z-K0v=(<~wW@|y#AWpbk|68ON=Uq>1WGl_NAvro@*OE)7?yNnkDTLl@j7}?F#>f|@=
zeOb+g06jAMX@6x5&G0+}yk@SDFfq&MWfI@V@lrBJhOKt`nCJz?Q)`wdW8vB&iLO_q
zR$|fAd*>WB%kRH|uB0{aODPOTzPaj|zA%K>7hi5ViaX`XK1Sb*M_Hqb^bBo;XLIe$
zBmlIb*IW6Abz#4<n4C&)6wNJA^+k-vk~W+~3&x#9QzJ_eNZhps*v52NHdrqC)Vlcg
z-McKRmn2n*n3xqY9Za$N-8F+wNIRL~yKNZ_FSTdRMo#^)z3QePe*5((Xvs@70o<P7
z)7VAI#cJ~8O~~X2!ii?P%I@oP+@2^gnSMg6;rvTL4MJ6NPe>m%vGTHu%}UXxeJ=Sa
zb(z1L7FIA$j*0Uqv-L-dxVV4Y98+1z7Qw6lF`onvKRmTtMP1kWXS+pYw$DxcGH27E
zT-sH;g?uh@ci%CC<#1jQ0RPHh4H)Ql@oZ&si2A8`8nF|{07N_{KOyS99%Vu`*R4og
zBXeQGxo|Sm6r#1~=X65(t};)&vv9pT{kmCBda~@23ugUolOGD>)BTBK5{YGqsmBP=
z*CryJzjfZX%0zJ<J#9IaTQS0(Rno2kExp{U@(9-#N{)IS?YVop^U}SpsF&u)oQW#G
z45N)p`RhJ;-HThRNdIo828PuNREAZG_HSwTaC><vUlDoJGe|@ia>DSn^_tyv(^W#n
zGgy(oxYDH*q{I}OK1AYV5!MG&Z&mXaSBa+`Zvj?M$?DDhW##|zbp8;AhV!%pI~YAP
z%P`HK2gYfAJ<BT?SzpGWOYi#e<TexkltIhHiRhf~y70hilK8V#Xt1P!4$w29KAvHL
zQ7P<aq@Sfu3^J9;YSjLTf;T7Jo$Qy|Gs<61C2BrNgyZBslqGb_O5WqA!;G|M(wfa3
z>{8}Ra%hj|wW6+9@x<ZE7Pw2eH)U9lMW)eZ${pK0gOPm+9<AgjC7u3OzeJz7BWbLm
z-F^_KFPFy>0%1`m?3aVy%o@>FbL6#aLa)B6?%OWu$hBZ7;q#DL9UCX@IqnM#-XB-K
zYOY^LhM3RL9AAc4jIuLaaM(Jjop+MX1oKx}jnY|(Z3>G*H=C?((YUD!W$j+Pp5_0`
zZ~sGL1T%tgQp*zD=22$%hzkI3gsUZf@r1W4&b5?5p2P;f67Xfjl2vp^z8){R&cxnW
z?kJ*F->R)qx|zr{kMZt#rz(3EYz@v;E)BALBWG>E+INTE`x=BZ#&M$^X@Ps%jV#`7
zHCnWk?D-XZ>#dUeQbA!f#kMYR)AbaJgXkUg>U~2_*b?NzJ+Np+-KbEIrj;7T{lZ%4
zDx%Ey8z(J(!`V`H)+;@GIiKcm7GD)NX~#)>;yz3b5~t$WMXsnEH4W;HjA*m(OfmZB
z(7*&md5-ef-SPgntX$P3pksO7ZK+2|j3=i1vA(lJ-tkpGY1{dgIM7C^^k1$SMqf+;
zQEx1~uHt{S?JG&lor|dQv)g(3><p@!x|*3k0#?`b@`u2i_^eisL-%emzn;{NhCEm!
zNA@vLb;WWS$iVqUafEl9s*k=!yfAE^afzz3UP{(!7XNe@i!ZFf*{XHba_3^{dwK&t
za}sNgLFiD}@E5LstjC56p9xAT2ivi^(OR9~8b1#=G-F4E7ULCRXyO?WS4o?SzMf0<
zh(p<;EsD<ne6JaJcza%zCm}v6zn8tc9aDS%n$ndI9RkH1T5tlq_EGhv$M7%=Gnfdk
zLxv@uDNYYH=qoh=2AWsaR>B}A8I^rU>C;!?uh`73iJWh?XxM|TDugcRj<&1ZC=mew
zLFKXie4Dd!lZ-FNa@mSBh+ffdf0*AIokDV$37b-18+Kv(f1@}`#}|#ZOy?YdKl_PI
zkIG<EW|uPx^SkSQy0-MUU9b?C=eMRZik`wI7+7T;pZUh{J=qT;C}wC_(7By$<v7Tx
z4rXx#7BP!Jg{me2@kW-BsXp^>_<)+}ints-l)-Bqn|;a<m)(k)xMOIPq{okHt7hGE
z>`RR0XX+j)LsVkl*iAMK!+oSmy`uYKTDU;T_+x9AYuAgxL#Yhy(X-6aM0@i?c#4$6
zZ*~kjmlxiPcV4Ji`g|lsG~#mn;B&QS&?4(QoSL;s+VI~}dL>_@pzT%UPUQLyvyeaD
zJWhXLJx>N5Uy;nF+m!W@(Mn?u^3Hj#wxd%#_hYt*-{UrB<j95prvL_JCev`)H;Dhm
z7-j}^H|)N021m+@yE7|m*Wu6E?+Kz7v)(JuW>HUDwI*V#;F7+ptd`vkp(L0cv6x7i
zuNI=lVxue+{du`?&kz=7HO5UI8sT@V^>XZ5&UL(#-p(@&*CA!Hb!BhqaskxbLs1vF
zQP=#~CL$p{^`$Zeq6mx7r%JRu*HG*HOnbT~)crH8tG>Jk_8bs}*Yk@d+BN*_NfFcN
z{z%igQ3s#MAr_PtieWgSAUzA2TK7kii3Jih5gzi{(6_$>43XxCWzM!w>LTm<o981g
zvM1UgF$`>Eh+snT4nmBm#4+`JCU(oQyMcxiO;!?WM>W~Y|Gk~qH<-7^boT-#49S|j
z`DM^Me6j4!7L-SVeavj$cA<F4Sa8Pj<p-v>m5bIfC}+7|^-HX<SC5HIy!>{O^porg
z4oSI=1oJC6c#bP-(rR-n3!gu8itM8ll6S{veOf@G*$T;IKZ=HxHMKa!DYrcMT^m-D
zE$pIv3({C)v38SfT{}L!T}^_}?hWK7n0B&DoqnX(eX|$mPAg@rI@}F>ewjI{(<}sJ
zK}45^zDhcnYQc8F@$6yEMx#N8kpY4Hom}Z+H)bE)%l|*18Fl3oQIE_LHWnPqp(`-a
zHJ*f1D$IbB2{ArkM6SC(`#3Zwf;~1zw<4eFhSM{+Icx0(af}5}3O;rDZcgF5_v$+=
zt!(@v_7C5g*_A=Ic_(Z(#&*=bgBUvGj+*Y^#IA@h%rPjJEMY{YIv&FA%9Qu%mT{&c
zZ}6edY&;Yl*%u?2pHPl2@BD{<{a0x55l}?@Q5gcIfW75gJ5P(5D=Ie!se#ytb>XOu
zCYbmxz4OZ7?kX6zC-N60L8Ys7lmI$(>Bo3Fe;k_mM3!Pd0>X)Zk|U%~4~E&1+UoHf
z*(Qd$;dKL_SbCWhkVo9{=8*AF9;Z-zlZ?10Oyhd7;y>xj46eA980Y>G@ihD;{$paA
zKJuceXnq3+14Zf!`7f#Ok9NCB24z-7%<Xwp_SkBo5OZcI)#GrTEEEtAvLa68siQ?<
z#|9q>Fi(ub23yNV8R7Bjic8udb^br{_+&m3#hSeQts_ljHT|m~$c`a#n3MV|NeGav
zG}|sQ(f6{x**bp40*5xXH*(|0py>NHd7RRZ5HnB&C>*d9TraQO<d|~b5z`(ibeeo_
zN+1ZIf)Sq#d)jMXt%8Turjgumg6VJ~teY-Tl+_(K!|){?tO{}KJP~*I%;c~~hl|;m
z`8k^b(BowvMYEPfrbkqU2|mO@0ur7yyL0<)u5YSaN9(WB<L`QxU=uKCqGU>G8$IGj
zt%bt1twCZUHpi~?OkeoB%ub#_7b(lJwoVBYucoMextZSd<xosRUCmhd8cTIk7mR~k
zs4tIe;r)c|@R<c=MPgd4|L*wcSm&XLao)im|8ZkCbauXIrh~ma7WPh}&Y>Caer-rA
zfyrJhi5aY5xUiJr+@T5&?)&kBsPOgmX;?dXB7ruV0%Ow<XSL%&_;Bx?f82$;KDyn3
z8CW4RmXQEx1Z^>Y0#$%rY^qI5DZ0dgqV9OU3SP{#ZoD4&;WtEp;b~0Q!ob^wb2ODt
z?V|x3N}r)tb4NKU?Aq)Y{<msCX<E53Imw{OpY=@BARq$Q{nGKrH<=z^`a~@l`9fB^
zMjj_2z8SAcuN0^0m)FPwNN5<Q?Wc0-eRrPE9bFbv+lNE{p}gGyw{p5v1RFE--T&b<
z66!Q6%BEr3TzRvqQ6Yi|!_p*X?)Y4$Pi4}_T&)D2yug$I6meCRL9i@Neh;p<Xm_ie
zGO&MgUIDSeQi35CqxsC2;cp2+j<B)ed;PsCjV+~h?)*wM^zCbQhNAv)x$kvJ><l0)
zYZhofjZ4m(kyDoQgWGoV%tFvI3gSZlr27AC#@V2Kq&+3s;z4gCxLl4s=1ZPqYFVUD
zd)KbaMl(cFW}Y}9ezu6u%RGpbk~XK`k8jgylm;l$N*~B{bl_ngWY|-`|NY@bVP}40
zw=#(Lqv8zTRl4R&V)3Te@g6gS-MLRH(h@9~>h@$CK7=2!tTYgx0LDoaekw*un@v83
z5o-LLlrX-wWzAhU{{X|k>~5+q@Hj{po}WBcOm{bsWkrJu$e7Ki)gg#?Ljs`j!;Q@Y
zil|(~DEE5se@1S93=+}I56mx}Ltm5g`?qtl=r)Y=6*B6R3sxAy2)`yc3n?>~n_GwC
zfy<1X$lDNc0(11!p%qVH+<RcJC@whvq<|^CVjhQUxiih6-vCU0sAR_Qhxn50cc;4)
z^z@Nk0c?GqzIIun-tu_glj>&?@9+_gkHh2ycKh>hs17wEHq&hx-}>itt*2DJ!-tId
z3x3w3Hj@yVo41>ABh)f}clHfmh*vZ03ARfD$T}t7Z15E+*N3-+^aUsj(a-Px=ZuUJ
z)rx{8rRB+`WVM&x4--dGC?>zsjSFH(R94Nj53nXx`BsnOZ1`(;g8)4Y9QvR2YNEQX
zYxn+Hp67vaeP+M!a(6NN2K?rK8`jLB{6sE?=~FTO@5EGmO@h2`(oyrbjsYH@UBAjQ
z!r=xE6F>aTzCWiF32*v{1(|F1<HaxZ>1Uj<#OJDeXZIpXYJebi8jD>OV#-)AOXxXj
zF?+rBi}napXCOSP<3ZF@9QlvVzDB22rX42gQpuy`5&qwE1b+=jgJ+gMZ>&`Nsu)ME
z9YY*NIguGYiDuH`I>39D;VvfIMUYrA!EVcgnE1lS<$n1YJ#?nI<8s1*+m)TfwtAC5
zN>-mcwqxgj8vaW$_<O<EvOabc4VcxojHw6r;^dQHd~*=wRahJhf-eFW<4vVT%_H9Q
z8WKC1dpZVYy$VU;xxy)@tqKbMr8tejJG<h%1d_HWuqfhM2Q`T*YwfhFTp&-Gq&vdL
zA1bzE<`nb&8-8;?1Vz0NHZ}kbN~3&yu=ZL?{xenh(~`bY#5CqHox~;ATxl_4(t(+y
zC6?6elu-_mZX;=xKdv4DXo<fF_>#qGHuCdf_a-%_7B6*(_7O9{$k=18?h<sV3|<47
zd*T@x<8yReyt-hV#H>wccem+>+yBg~SRS@no~hVYFa9bKeyqt&t=SE%`i0~dRAbEv
z<N;)QE(#m<n0i0#hwk+|snO*4uYW)C{T+lyR?D9%NF-%4`&O{|SiL%$cnVji<g~sg
ze0%j4YEfY|4%?=%$p=;^fxn=<4*!SRC^4Y*B$Jd+86v<C9h7`JgD-z46ch|G%ut(k
zZvRp^HjtK3$kEbf-EjZwDAdMNapyHfUVd6dXHQJ?P-nl#UTq4ZYMM+?d5E~&Sw-o&
zitZZ<rX)>xwj;7ML-hR+4nnzLne-0HGR>d&GPJ+RUr+GOH;scg>hmNcOrE+II?NIB
z4i8%y6qkjqZ~x!BKh1zvvmKS%&X?i_ebu5KA6$H(WXsSi1Sv}!{zt0F9JVv*2eCQm
zr2vei5@gC^uU$zE8GB!!COz8su*vadgy}NUc1awFAAt9rIXf|-pMC~*I?o)#ky*Zd
zXx^Po13%^Y9tq6SBmiA#V{5(QFYfUy;@D<H``ZFhMv#9Oe*Bdv{3qX`gI1$bnaq)Z
za(fCHTWaZ(jZic&Us>vVIg9gb(IpDU0K#A)a3L0325Qu@WFBwMk!uC7bx1~Ogx&SJ
zh2$MlxK1UihyMwjwq!iJTc`Zoh+6T%X|ynYf$PekJtVP!#`sLdy<T{7$_69;K)F={
z3>d;_y=dhx===#&%*hQMWmcqG5^*KTnOMBYssDx3-QqdUew!&!-PcsP-j>hhoTg^P
z<wgtkXx5gHN;8+XmQ@<l;#LqxPM_v>W?Z8j{-Z14<^PeZ{?TTA=n|3++Kly{O5{4E
zPFq?nTNuI9T1>p6-EJ1cj;jvCC#%ESDz-d4A3Aln{NUG303;K@1nRt<O2ieI&SZ8e
zAoqckmk0^UD@VKr*NKNii07t(i1RYe&Ep9}OX9SAbJe_JSKMHUfJwr}fIQ@P7lx<7
z#Vq=yAr1I~?NSU59$@WiT<828nP`fnNsT|!fPXJ#@bi$MSK4uU%u;$YOV$RI+0$fP
zNDa8Fkp_L50Fc*p8@R~7%5F3n9Hbu}mB}cRq;&15$hDmL54YfgM>cj$^MW7k1C=_5
zN%KqdPcL~b(4Zif4l(Z{#w0(Xbf~iQF7SgJI;gO3%*gQ{0}T4qwLi-Nyqb2O{r64$
z-GucN9i~0>EOy=j7A(K|7!bnSt6P4wP(MdfmB5u{Wmq>w78uYZ=3m0j`GIjRxE>EY
zoTaw2^EjH%sAIJ(i|fbYc;%tzxC>{ZFfMywT~)VqRN_IDQ#6p|F^SY^buTRn<|8=M
zQr6R7K#R|{W;$I7FG@^eO1cQ*tS+dp8%MGIuQ~qjH~I5vh9tWZdNQUcU6Ys{1tb=h
ze6$A(+bjW}AJ!_Gk9;DTEUHvF>aD(Igk8h8@N(WMZS#bl{-5UFGOViaYa0bb1(B3c
zK{^c(q`Nz$QA$ARO?QKIOQ%SeG;Cs1k^<7rCPW%GUD9Xrd;TFl=i52gd%b?rC9E~)
zTw{)Lk9&+YZ_K#+LS!!3Q@kIpJY83iX(g<eFC6`Bu%1z%OVQ&|qoS?xG_wegQ5g6{
zb{Ty{hEJpjY;l^`@O*Sfg+H_wze7^yPRoCEmoVXe0GH28{+HxFxQb{&UuwF!SadJU
zR7Lt_`?r2r9Gf~EtM|Wx&!~=$&NO*A=3HH<HQ2Y-)94Ju{^Hud+jEvF^?7awg?ij9
z*Hmc0k1^tL^$`&rX{>N{_tb)6eY2Y*T%PQJ3XMazEb;u)T^ZX+0+|;S<?m}Lc0$S|
zdB`Z@*!8CVE2&=OC5U6W_q8ZVlQ=7L{P*r<_s43vXoY}XwR@-OQkkQ2Eb;@!X`hSx
z4z(pt*o!ADnV44bjlpb#S_@7}gK5a>TZ78Ztqsx}k6Sh$H|5mlIlge3k0EQD9L+o`
zsL;$@f7;c$Ot<2&u+cJYT}AU^rKE$%wA%{@O>{;p`KgrChp{-a)y(|9k_l@Z{r2yX
z*ndcAb9$3cxW#w$+4pC~=t}&ORU~s)7vUJJtXD}d)I)0*D0kmPo#eJg=My^^{#}eS
zsc`21j;4TW`}If<;U?arHg!(_b%|(RlXRI()VQd6r{JBupmKs4MMctW+h2;>#6~Z)
zmcy$DEglIJ%rYAam5N7<<8LfED5wyLYO8z14=?11N)WKOCvr3ykhdkx@hyb@N0VHz
zkcON&$Lmp*HYS^)S)!4(Z?fdu8S>2hamIAIPn9m}vOzezibA|GhA$36NSNc8>J2?j
zJ?98^nHY@&TFxb+I}#aI1L983@(T^ZOW`!9qy=xx9`IE4?4n9LNpU`nX&SP%;7zLc
z!n2{U^W@wLK1zj{rA_Ute?C&ZM_J8}OGNP5qqANBj5c&*-Kj9yLvTgmDbVOuOOg91
zL&&IQ>^Nf|FZY>ul2$fh;HSgmas%4vvclrJZna`9leDYzl}&J*x4Gmi#I64<_|rJA
zj!YBASN489qXp;AN84lh#??*c<C1j0grmrnPv<XPPoS(G&qdYl3yG`n&p7{#8F4Sq
zePCCDT}&XTsPGo;^b?a6{tR=LvE=1Im7;A`mo0mv2pqv#^;+OL!I~n9!oni;$gG`C
zvjEKMa^0uaX`hy-T{T8s`j<SfaMDnfyGD`YqdL<&6puQ7h-DfQnu&gsb#N@O8J8Y!
zn{g*{SlHz-pLmDOcMwVR&UX37t`Q3m9;Ow@oCC)QI}FQloGgITaS18L79G8A{ae94
zk{#LE!zVI{B`==n9k(HlmUSmk?Y5AakXtKosDoz-xy!-#ikDpDSo>r?v(B{-pi^d(
z>--9TqdVS#BY#y~ld7#l&q2q2d@?Mvr6KQxwYIYI#<jJm@S;=AoE4qF$cz#ZnG^ab
z<uHapL0@jX{w0%sMqd+qTT{sMM~T~;ez8^Gq@{5UPEb@jhil`1;r1APb=Q<>p^9rx
zy#!i~CIT003Xq<-7h_O}=Ehi2a*?hs&+a>v-uUUZC>j|AI3S*(mY?9imS;<YL!dNx
z=&z-)I+W`m{2H_B!JbI69*>>b{S!rsjIF?{PW$MXNar4B*pY39b%(Da|GI(wc;9Vp
zViDrO4?^+cbmcMBP(yc|qD10lfpz=nc!<*#KUXZ92Sj!AH8kaXD(r24##rm_Xc0@O
zw=$7bm~)T9-T;Bj%IGU=XkBPQa(>4Sg9(W)*XMfu&e(vjT-3fAD2{7DZoNV=Q+!g2
zF7-WKij~hrr{{S9-}z1}dZm`mgX?z$*<bE$<XHr#fojb5jrb#>k|X_(_0i+#o=7eR
zMLYdI-9es2A}^q_CM3Y!qZVu6+ryG_@+}UVl0ats#cPUZb4s6eMW@5OsralTLniIf
zzQhcM!em{mqn?WOj)UJ9+7lRkU>BwZ7N=a&lPBXx8`)+*rX?tu0%e0<(b$h`RaNAb
z?{A)4k3X!#nA3T`Qd^u509(Tp#_Pm^FfT4KwZEQu-Re8FelJmg!ddkgixBfHV{<tY
zM<0DUyRf8j%H@FGP2{!i@U_A=U7Fhvspq+MVSawuwJfnb&n7D|pNpM4j4)=-^Xj6p
z$hMEGbE>BL9$FcS=zBq=e<_iBi$91QmraZ3*xZ3gB}m1zXa5w!_>gXv2_0=1bi}Zm
zL031QAc2P|v_KLc7~q);3Hlx4kc-T6R_$kd@L_Z$O7Z5yGrL8)BP<?b4whIu=+$dL
zFN@#NPhp|RQ7SCJsoR80V_pl$Ys{|Zphj{iP%=%qE1j*9Fp=?0>o|_5AFgOuw&w*X
zFZ_yGe1U<i?k2vdkZd9E1T;ADgr9p8kKy1#%@bV<fASiy%WG(BL!R(BR1)bpecP&8
zz#=nW04Mb_p8Nm@oI=!foU<`WI0qOTf9s#^MtExNHLCdj_q`fFM0PgG$VhKg%V_xu
zbv4*bD7L=k(y>(lRb^^=ws^HU*UMf#Y3zP_0CiTqwb{6|fmqaYol1GZbFXE$aGrcD
zLz~_b^o6m-C)djj#&~{pn<rdnv~DTbrt;$}mA7wE`eknC$l(uBkNHmiv$CX3&EwuI
z_maTn{2pT3e9vFXtJlTz4dl0HwaVj_`Vhr9{)$JJLV=t9$xxGFTAt%+Tgci_w!@75
z_~gOb@Wq?&Av6(V;OuCNXzjLkes`O*LqZ<apqD{qiLZ>_0<qc<RjNPb<S9^2W)`F&
z?5K|vFL10&85MEsg#C|bVz@1}4B+gRm3=%gc)F{at57z(3cHzJ(T>tZp5s$dA2*v(
zZUGB9irp#&PBQxk_Zj3vyP|1u2}D&j{M}oWJ`pp`5i}^SWKbj1m#C{)jmF_$V&W3h
zJYT|u*^Ikcn~LbLWk+y;4>dTdbfjP6jz;GxH{&_e(D26>&wWMsFkrhGKDZDHWW!xu
z)!PE647*+&jzfmBq~74>*!b05A&dmfj%Qs<W<1U|9p?Rr#2rJlJ=gPdWoPhm<&u}%
zHO;f{ym#1~8g;1SZz@zPGHsqkC=nrtEhrD+HQIkNb$jGS-2bxpwmE0GG`I7F6}MK|
zyj@L4RWK%zQV^CQ`bHrXe++}GQU0uW%(IgnK9O@Y*kCFGdCfU#6HsMwKVHT~e`5@4
z8-Dy%E#XB)tBYNEzx~9T55%sVX?wE0?gMBuhk0Ttp6$<gSeS>{+s@KuA6oEHS21^>
z7cywW6E}lZ$auBW!{hMWbFtRQdZ+9eUF)ph5no)+U_CS?>epV~5Eaw!leZAjM>r6X
zP@^%$N{rK>e8j>3xb0rq9lkv~h_yC5sq?nFt8cQfdZyesR@Zb+RTY9zAcmT;byDTp
zYSij~a7ZgF4$F~6E6d4h*w84c-wW2|+5hnhBfR(>8J@mn(*?B4HplAVWbI&Bp?Yfu
z2JZ+X-sG7(2?s|<4>Q6L<>#hhQ(NODX+X_*1_;m(<T*^#a(a(ignr@AIzW5Zv{t6t
zGhtQdHp<PO!f0U5sogkXW#dK@ckr0wd*&!Csjqr|GikB5F3mmNQXM{+9a3fAAB?B;
zME$wr99^0ny4EL0Q@y4i5*QHd6_X10&gBu#0h7|+{W&O;<GMZ%w_{bj%hB;5S?0o+
z+pz@E4wb+f3D#uU;AYbg@!EZu8v>1E?oB)bA8D^L%sTz|Xe#qlXvW3~M7)}&C5BY1
zy4}-soM^kdQ2R72L4bQ@yEbCz;|3x8wsrwxqD3Zf@>Sw<Ukw(L9@C{!b=||m(c6kM
z_cEn)=(EK_2v2Z+-$RW`3+F#IW@^JP&@uSoaga$CI~~T{RHP42B9b$hd(3iKq>;Qh
zjyI4@?9Oe!z9?WN^;yvA798fdy-1uZX4wQ5$KMMxw<msk-tMHp9;{GDySY=vhQoNi
z;isat=^5&=f@(_}I85hs>@YdOdX};}&E5u?X*zc}=`nn&IMaUOGN`avd17ufbz*Db
z>z8}#E@RC)W7Z2Unlss&-}ibfj>-;I&MZ7r(?k=Lb}CI^!%)bP%Q52{in#3;FvBJx
zhmELFu7$q6jo;$LCo6Qsuf}SXjd=3wl*jyuC*P;SsM^-sqUGm*wO6gX4SW$QyZeyW
zsq}DrZtm``)4`ww#yUNHKu2_!CKT+rGx8_>e8*bQ&uxiAXWLDd-SLiQzjx**_mtG)
zm~<`N&(z?hdacs=@~JFn&#jMjb-y&E(Aj#y;<FafeeWV6IGmxTxWw`SW`IfJl%_fX
z3*ww}@<PA$L^NuqxZj@uGN_esjC|`(7T$>PW<(#c^4Org5y@0$1?G%>KE>jB4Ijb&
z&*+-FG92bTEs<#ZdTTH{o0hNc?7GkP)cjccf-3v(BQ|m|HR>8+7>T-YKN~;dXF1H4
z!RL9qW^b2Whl0`B{eF$}S`IF(I=xaeFL&g3`1CSakcRCW1&T2E?lqD^1krFjTXB7U
z_SHw<^P22sx)7P_X`7G_ERUF=eVhxSHBUWK5<8L@s4qhwz0%)^ME?%}B_Ak9i+Rvh
zMxkDOl=+Qsq+<Z9>YISD>jHOG7=bqhXW%-6M$nP+<MFXByzwxWq(wH2P5OzB7%cg(
zwOvVU_JaL<1z%UZNF9#CFQFai<!kORI1KLaw(|v_c_f&JWYpuH9CDfp0S!|~ogH-d
z3EYq_Z_JvSo~+=F`pYh8M_Q5X4D*%l!FweQiNI{6szzFxV(PYQ$(~`S5`XG2reu~6
z(um@Fv*}Q_Sy^Nuf2=2uH6vuv{u9}e(O7-GP?cDhBs_l1VcCZSiEg*URi$)Tf?mE5
z;nB-~ItXZ(BdbSG>Q3?k(`*&VMNYMvNck1ux^_M<7}vD=c0N!|Kdj1rvz<`?f#RX$
zw1?Ev5}s^<>QsJovwG91>KV{tS#0ekb6HL25;nO!KRMd8^n~A?=luQiV+hoCLN9>e
zg)2B^x*msTr}@@})(5d?O8>%GUe{sS`wmXvz05lZol+T<FDBZgr8Y{_Cv8jZRr!oj
z@y8^eFk|QNDP>o@aYA6k3@Z$>7vNZP$hwZx`EDmXi0-rTV<vXTEv;leS7?>lcpuP@
zQR2r)`foak<0ZI+6Bt~kB&K4}uhaR-QNcD&+vCfA=nE~wcG@xvMA_c4MBvf`lMIMo
zvcT$>j|d=z)a#$O{i`Lk8zaX!m`WntJmb~;OPqg3`#+Xp+<;oYYKu$-F@vyn);JFh
zeffwZ<GtSRaa0lYHkIGKE^WYhbE*nzvm`wyRwnlzr6KBZQ2xR7$@>QsLjML`|J>{F
zLFB`Y(b_P+`3&yvR}M>a%i~STRA%IDkI`QS%_NpQLl3S*{;xeEd3WHxR*?|%J--aR
z3^U8~ZNSy15zM&vAKZF9%7Hy}Uu#p0>B#iTVLE2gVSc<#ksA-i$+JSiGXPFo;o>>2
zem>PQ(x0>Q611fp&|4}WQ6Qp-<JQOe^KF5VkjEn#wAnb9R)q)x1q(=L_!H>+F1!S4
z*>@NIn2lmhzLqU^w#ls?w$9X5ewcJ(<(Cen^QVv4vhJ`?<CtQ??-E-)tUf(z@!y~t
z;qAWo`i+f^EpdH)ec8Gr<wF6gVQ<nBoQ&Vish~66`cFGnVe#Hw=*)F+QDvORmBF;!
zw{YrBVxzU5t+t$7cBde4IwBq`;b`buAFC>~-yADK=`TG=Q*ye+^RC%z*^`;T>Gp)x
zDaUPM`FLg!+jOhKffs)W{$)AhKg<h2rd(Zb&8BLh+N2lq1>|S5ckl0~PxnXSpX+En
zYfC{RMw(NxP;x?Q_E7$2b`3Bxi}YI+A9s})w06DfwZtXvql-eYv!^`ouNm{f?2M&}
zl_(9*k{rUi@n(>`@zBZJq*2A#{+t=utWvmwfiz-K4*yO+{$*@2k|2Mkq%XK>GdPmL
zNW#w<qALHJDgX0(Uqn&R%J75~G5>|?|7A9UN~mbMX*gZ7v<BC54t%?9501XwCDYiR
zZGa#q%8sf^+<RS)5Qzhd_mZ8@Hmf%~h%KK#pNM^BKJgOv(+49s4k(!YIbK#iWRPC{
zaNL?UF-o<M<2yrCG_uK2o&jBsNgxno;5cm^Z~?T7s(~yQhc^`gd-}pOc(UJZm`w+n
z4Vv&*`X~4PoM@>l8hIPu&&%Yw^Nx_xPgD|9zz-vE=sXBvQm#gQDJB3FN4W1N&s7GH
zd2n9eu*`gtCC&NrWCcDbL8kpWT_&FKw)20}nSVwLiwBE@e7PQ;XLmGd<cgbf2t<~q
zXyUc>kLNs_j>AnHES$Te<b?vmh&lSOIDdL&CFk<}IZZ+G+h9FWzuaMFwf@zXGFDJn
z2sv0CJOU14awuB{!*?Qy%?!~+msZ(H<~eO@YHEIUaa=tt&(pgdJ^cC!@%+Ta*>OoG
zq1esMMT1Ljs*mEhFd%|`SR|<;7Tq>VY+0|jPMWY?6y?PY5*kEC2xIw5n&<2yGi(zI
zM`T6I?+FQHY>rfs`X<yv$UM)>KtyweS}%0eIi%me@wO*=QLi&YuLlaegHuq5*>#}f
zJ6+9$8gydKJ=qO@R{Zkz9w+z;vPc`|Fi8cOs_g0m0AQO2wpF)LkK=l}mb(dL7(QCA
z?Da5J135Uujq$2-w-Xy(x3%o}DPQylh`h}3@>|%v133z57QuOvKL1UwfYm;Y5R8@Y
za<*Vu7nd;QL>80f1)?J6t^Sx}wYEml6H2EY9F2NJx^88vs?H<)+snz8&~4AFGj@|9
zYNlDY<zj)k)3IvAtn2pSCoHD=ofcz7L|xr0i<!Dn3_kQSkBieu45s>@AN}lB2fjrp
z60@1AAR4U`D-3)0CPnUGLmf8Zny$wbG%|w1+9>W3d`CMiJd<N4a;OIa=~-9EN_(iK
z^NWeg)q$8R52PH3iRI&rw7_;IiCx?S|Di!>CdJO{asFGsJ*>}h>Ps-QeprVFycwv2
zT(s)u%BPxtkEEbg7%fmOG#|_mgMR2CRQ8bk2Zr+j7%rmb;M{^v=R`qk8P_(w?Qsk<
zf1|;5aWGuudc2iW+vBktqC4Sp8v{XhIZsxPq>>7xX_7^!4I=mq)pgygX6%OL^v0v&
zA*pv2HUpT3q8zjIyHKe+4k)rEkj~^Hir#kk4=)vtfz;-aLF?LHyZ`V?2xe|SXD`)H
zTp6z|1&_T4u*=hp&EE?V$1QZeq2na_b0>X{Za7%IhcwuMLyKL}7g_y}$47JIEVf@?
zh%l4}bi^<`znl*s)3Neb;5&>Va=m@~Qz*eRwFI94<LQKsd@-1~cmcuJI}r)%5s%r1
z#KWJ@B&!WPd{l;Nnj)#Y%<sy*{XrM3x5B%v>DtD@^RmP)vV41JAZD)T+`q@Ex&c6Q
zDEQaO9pVnEIQ|v!Cqg<@&P6VV>q{#*%GGU=i`g!~xDds#t-YDs*gS}rx>;NuF?TQ@
zFUFsE-ho+o2m9o0WjwZxOT&NM)R^TSfc3-_5I6f7mTiH!hnrE#jqYa-bE#3q+FTlV
zlW0C#GQVkkDvhajUIgbT0=J*J6I=C)U5l_d>X|_+O{S@xn0d`0ngMaMzd;!*Oty}e
zeJ`#7tQ`MAy{MXsub&}XZG~@w@W@??oZ^sR3woxv;7j~bwnnp=I(vTM4AFNW=(SQ*
z^c-%C8;!s5zPSMc1TC5XhhAc6ODlxbb-SeO-HAzG3K#z~Zt>@V&%~t7Wuuyvk6VGt
zV$PRGsS$OY>tR8@Wau|`aUKcqJmRw<daeE^7D2&rdDia+vv3X~c=rPPgO8Xmu2t4x
z7riN5!}yC-NuVLd63F}&_<4~ImY*L|g>$x4+peTKx|C>DYJDP#RmmOM)f>Ec^Cv1o
zLLH(K9B&gZkL{Eq%`fr1*l_5giT^E?2&~+6y_`NGzT+(rT<Q(y-TRLCgl~9>sOUdR
z-yrxYvL9(upHQz+Vbp)r!ti*DA7(Hc%xiWp(G!5YQzY_xC@c;L4FOWQXqg@i$H@=z
zOPlSqH-ca(48%aZnp;VtZ;*NLpX%tZ_j1e~6qzE%1TGf@rnpv8l&$d5vp!;Re+Mct
zQ|5s#Vw$`FP0Bss1`2rP@6u}S_bNiibzCPgIi`O$Snf4TkvyIHrK+yIbjzV_Q-bv7
z)6TAgi$2r5E%44~wN3n|t5h2^u<8gG4fXxuTm=sIv0`nr^^QFHfRlN|#+2@d0?&>|
zM!zGH6X0WC-z>n>dGy^E>Hl22<}E5O9GQd$O1iCze*O<tQ<)Oj!?|VomUE3c*nho9
za}?B#Jj7Y+<joNNA`(gX>+M+;#{c!>Xl31v0x3^!%wYfptk~P1a!XNvhD~tTIjt8?
z;r|aee;J{HQJ*iRrKGAFYe^g3MLkFY5k*7IdFy{D$y|tz=Vj^hl1lD&@q`tsv~;1j
zy>xH&l;JIPb58dMXkUi~ls349e+FZ6Y@cnVLCs7}Sw=@kd(Wgf&qUFFB(e2WzjFJ%
zBRZ9d`)6=vl|WhOj8NFDOdZa=7olP5l<e5*(n?vPu@lO+U6@~Af$jEe`^VdkM}8cB
z6P7N=4~XUe<p7Bv?mziNYGlsI2TlLb7;r1dF7BKEut@_{)mJ0QF4-E6LHN9s*+3k=
zI%trztE#e`&-A8#f{^Kr{;kvFYUEpY)t05$_bZZK8HG3bh+uJLCF)9j_{`6qa_J84
z^C1+#LiHlxUs<lbEpgR6YlLWNIP2OF`;AwE<oytA(TDW!Z@yO?$!LH&<mxIhZy+W?
za({67&b>D`pa9r|)WDFU86khc^FRtOoDgqe!oQx#3anr#O9uYeP4t0*>wk&7{nr!a
z!1(^Z4_gqPz+3)U>K%xAP^Vq}t2MB7<Pu({e|kYU=MI?4qY-5Vf}+=ek(GF2uQF(J
z1K1&X=g@%(MuMCWzUh6Jv@~AVg9~0Ds*xmY4m3TFPB%)7BI27^oOXVmKZpLOpzu#x
zI3tTh5jTLTQDGSN50(T$X#&6kNWc?^gljTm5m)9FWV&V$eAHtkL1OI-GzvX#2bj(^
zx(ks$y0JpdTQp!0fmg!{0uZzD;zOTRP-v=Tha?&Pp2?9*uE&X9F>CfhM%Tvd{UdD#
ze>K4apj^=LaD9~Ba+Wvx;Zq4;$Ygo@wY%MJIQ|ToDE)SNxM5__8GhKKq+kXj(mxdw
zpi7qzdyRd=q5m6}fjB=Jk}iVHH{#ktU9gz1R4p2mRg=Z&=Q!(}TLd((xE;3;8@;JK
z<Fju2av-@64kP8`wy5^u4dSvFh{5T3&yvF!rCn#Y>wG)5>yMQ58hZH%)=TFFDx2}s
zr85RS&ZVKS18CmWI=iBzprG9SXtI-5E@`o+m4G3y6O7<Jo?<77ZD>hmIgL!*?{{QC
zFE<qP3HJy_WvN2h(pZtEJ3KfgPcqC@`P#AR3>>Zv=S~za-ZWKM&^5^3Nq7ATg!LLY
zOs8wCXQ{Hcg4bR0_7)-p_+7KAXI*++i~TW);|TPgjyx;@^_UZj@V(G5G68;=LRl<e
zBAqR#GQ>j60TFf38UIDuvFmIL)-TVyE4ndSs9rV`vh27%(gkD`4K%AQv<JEISwp?r
zDx%_cAP-xWj(sfiv$_idFylK5nXAg?I|iOLGO=r=$PLj<SPhAq!CU=@=ch)BDBB)O
zY3aPYc=es?pU^RquiQW<+e>QIjO+j&tRW{C=DzdoF8llMIG(J%uX0WeIzkyZ>wG7!
zvEQvaUZZPmJo8i>OvO&+V{zwMVUF*h1d$+`9)9vGgz41wsd3il`#~*0Pt;jaLqRo|
zSn^$V#VvQqHQa$793>Y3I}axO<I3g*DN||KQ?SO33n#2%#P<L++LKK#7xUGiGPxTu
zLh6dpi>6cP3x3H4PZzm6*Zx|E**V__XPA=_J)g^Vc>-wU#@OE36V8{_zE9Jp<Gd7`
zVgsS)^}x}$-uIP?luO|pWbI54{WA5OBRI3(Nqa`4f>9erNRk%Xfg5QxE^(BO&ESkO
zkNI54Jj$q$@R}tT*KGICWR248^p7@5nxkY-QJ|iLm~+1QY+ZDlIQ*H+Q*4Sc$tXO&
zJqxM8+t&CHCE+ztS~~v5BkX}=$>(x__J*tHGoNcx-5m$S#30yEamqQEw~zCfOmyv~
zVsIIpBx|v=@WR1*9%<8T@DMI7;cEQq*n8pNc2@I_@zx!z=axpM@uGrETrrD!v@{Ax
z#PXj_JVGX{_&>d`E}MVv+)?F^*{LYSh)paX{-G?+2b1WT0woz3R50E<Jg(p<dh9sS
zH>_O`vg`AJ_l@Qp%IZH1DyTz^3D1qFf65(ElV$MC811h>OM)WzYQt(t%krsF#&=&l
z-t~J4X@K$E>RDX6C#AQksylOsIGX=_N5mBnO>*A^_M$1dOur_eercJE{woLrXD#zn
zC+{rUH@3qkzu!V~-KPdHKf3HMhi*dvK%G3@%My!TCUV`XU2#)sH};sWqxBlICSYcE
z(Hd2ycZ~(^tO>EJXWdD8(`iVCBWXW)@Q%SUUU<+GFt_~D>8Mdr^?Jbtc|RWy;_STz
zG)YZY=HdB%ziU|*vtF=Ajeh$*rW9%P;=$3|Ll3!ZvdBIj0pOOzYrEc^_m13DVxjF9
z<ttjNF4jLeCBPB-062oXlL6a;Z8y^DQCnvV5=rk#GJHM%BJ?ps^`dT%<|kp@>q9(q
z>3ioC%rp^i+zmR<v9==C3pUcH2B-B-jb#&P?c#FfQ6f-p#32<zz!mr$i|hxkWm@>)
zi3}n=u@3mOBAb@V<Cs*(-2GA2;vMGbc#7^RI4RaN7eC_dG}ouQH*S%o03rd(g3XG*
zQd=uEulio|n#f;CXEM?^?x$y$Rt`5DzaRz(uV&sHu`>Vb$A!O^Z(|$Xd`_hxh?s;<
z%C*#g0u{k8<sj_-UvqPiU6<tK&?;+dYfl<=PqLwV1KD=AE^AJ-Kf%J&#OD0m&q-5j
zs<pln&)Mo(@84KaLGsT?q-9Qwi~nv(_!qw;=?5;Wln&AVc`rcG)}arum09q|{Qcu8
z!1-Hev7+6)`4<bYj&&!m@7=siZVfKCeHtSFUpFUjTVEDo`0H{TSam!J>5n(wC=%-W
zHRMgk&i?B%-Zjo3VZi)rgn7w=@VXENND(SaVyHXV$;*ZpP<F_`#?r($kUYG@AYy!8
z*Td1ciwQSMbGPZ<uG?UA1#A&yw%&QHL|1_Lm0_s+NslfRIJo5zvm)YO4C2_l&5cM%
z_@h#yLMp&lL(w15;<xVrx#-F3plaE8l2c_lH+wYa>B+NMIcz@XejGMq1h9d_elG{y
ztfaw^%>CdKCK>;9M+BMWwJ7^$Stn`PS!LS~5DF)+&ik(>Kv~~3Udu*mP{uqE67Q-I
zOj`el&>V0e77d4Lt|v?Jn;)_HOV`&;L2bV>75J60T!plWXHVnR&8qb#0llzu*Y$YD
z@w$hD-EQ?Y&(Vaz=4xg{_)-XFr#?dx!&%)@oTfQQO0OXuF{jm+YYOa&-yTT7I|!I_
z63Ptzh}UuHx~{D!3wSnqE<a{BPe}DRT@&-%D4oxsmw7r-W)M)M(=a6fKUy7Zor1oI
z5q7y8wY5^_y`{eP^*RSQ^(Gf0$BD#Ur}_vPFV-$}p~t-cxbFOi5x>E9)0M{{D5!3-
zpB>~1Z1%JtgAfV@<)^t<V?;Zk#l(D}BaGc?=iM17C71yM{`r)e4C6$p-bW%;ymu~W
zuTM|<JuLuukwtvi(3>wPEG`G$#Kpmd-gXW$mak0rPu+j#9n@}3=}Pb%(3!uvKi<JC
zzl}lYlstZMa^2$sAXE^anoL&=62aVEQm1B+>KSB;hm}i^xmT_(#Tvj(v!-2@4w`#Q
ze_mIQO5B#7>3FSqt74bB3=BoMf*J88gXDpFrMr7+bPDoYLg4h%by~hB+pd1K|4hC3
zK$fNh)C1C%c1G{8Yk6fE`|FUp0Rrd%MHV~jR8Hx$($(d~Ch)gVP#s%DL}0G`%ySfB
zcLQ0p3-Q)yVf@v_+iHu{x$U&8leA4hjT#COPgo&Gm9xaX&@@bDhwSxhmFw~?hz|pB
zec>J{7)?bfXXJa<w8#AegZX$t&_@sp`B*U7+Om~J9{+s`eGh{{CMWW72hRd&Da#fG
z$I$|+xWfuC9=e4#_2rs{@Sd+4Wi2ZqXMp|_N!za1pR6k~St4rsE!N>&!F!7iiv_IH
z7hTWa^8p6ToR(@G=wm+sjhtdV8({uq)NxkAQ{a8i;KBCQ<#uCl2xA@nLZg%WAOOg(
zbbq%1c{kVQ5H(B14S9qS|Jh{!=|;>&mrj+6j&?o?+~PZ5{>DUa{*Q)tU2d2AX~(i9
zji>9O`<$o4j!ecZTi?1@6p~mOGF>RxeAUgM^PQxwKWuq_1yZp9-RHxHho0vE(ZAoE
zbnZr(Mc@(lW#rk4HHlnz{1{!cmWJ1HTvu1d-Dv%c^6D6!xcGsIkY4G0(;2L(hZo3m
zUph*%pMw?>!;bJL$&Z-z!AV(fjFE`3sJnI7Ic5)eyK9I(JaQ)s*j^rpLg@4xZ)M9z
z^Bx=C{=%zIjn;-6`Kg<JKa8!I(K+!4NJlQPM=DK*rCY3rr07mg0f{Lr?~Xnf^nlJ#
zOT9L9e`qlV0Ge_4VFA>d>cT`&arn?DaOKvYpMV3t3paB+QSdw+M&X_RcHGCa{UeTR
zJA%mNE{T<w=Vasyt7oS+V<P^)H)LvQazMfZzZ%#Ox&-WvnK=9qz03vzn|2+F;sgro
z`=4_g@9yd9b47%Cnvl=_B2}ube+$FpQ3x|a)Q#Nnom@I@Jm%BQ+5TkQ0^r}ej-xgo
zp9tT<#yne+mb$fWPQ31*(JfHZ&A+r?+1(>40Zfcf_q^jq-9~W_L#w~7RZLYnk#4ye
zpE6g!#aEnYq~TYV2Q+Wq+-0aJ_kH-$v)m>omJ{V0%|<9oaY8*WijDKLCCBmah07dE
zmtzyS_7Vd%S;kS-r+`h~n<2PHvc`&q8jd=s0D+t#uGuEHb=WG=XU|!6YtmGo0t$tT
z9TBB?UV8&@R9O+oxhUXF0rwyM>fABmZQEPwvF(1_-Eo~D%0Hg~<%1s$VTTt#H(*d3
zb4^IBtHL^uhwamGr);bz+!ML{hxsVb2G`@h%O+U-{mGvHahzeq1O}6q9z%|~Q61i*
z57<ev6_4xcKGYJi9bNCi#%GX9XrsUZ-<pm}VqK@dFXmmm>=%HNlkC?ph}w_#NRCN~
zXU;_<?)#=V=0CCa1`5(9F&J@oPONamw{on3=59Z#k;fA_uqs)UZx2(yTP6}E?5O(P
zbv#T{AF3ffJXV`KZDt{18HqLSVNy;V5I+cDM8JpQ9}8;AzlF-YY)AjecRW4JL9q1D
z;|FUoHDk~P&vyMn=Z4=Ij(SP$@)|O98Xv*7lh*?sb*Megn%>V}JS}!DTvL8-a-Pt^
zDBc+u+f!Ht-?>&N)_0O~i>3C>L?;d(%5&A$->Bd33n|LlWz)~Mt~!Napabid*8J?Y
z*CqChyCjT>2T|H;v;MA?i&J`bpvJW!K@o{H>|$(NT}#Ovl`o>5FS>nHffAb{4z-<d
zTKuKBuZ9T+7cWWI@KPPk$*TKl=a@hvW%k9ez=bmVj?{1ASc&J1et|)X$hhheyku72
zZ9&CO6FIz@qx-r?Rrt-NytjKz+R0LpHBTvhJG5OQl)$-s#C7Ox!X<G2>>Q*cuJr_y
zud-~M^;*5Pwv^_oultAkQ$RUfo`OB)V<rNV@9yfAuF&g|OhG=vjO%$CPt>)wcwQZQ
z!aXm>Jx@NfrAtT{V_r{Gcb!M*v)BcxA5lg7QCt{RTks%-XEjLwWBmmJpK&-%OGqVy
zvaY9X-oU{<u8lj~$s5>P#L61)vi0?bL?l)HnrR8>z7(VC<GbgDjbXWEk{pVwGsK8(
zm_0c<5q%DqtH8f@atyrsJw2b0lw(w!TQ4`~E?>BMW6t|w8tpE9t+CDk5?~CuDZg@&
zg9zjHFQy}1@j!rjtIlC)PZW<ffbqh9<ujLUPO8(dpvfL{G$wP-kp9NQBFhtohpdOe
z8l)b-=V>*!W*g4vVy7QpuRs>&<X7SzW7o|p^`q*BiE!A$8=?q}GMErQ>SMuXu|yWX
zg0Xtl08$rY=<VLZjOqYJ>5Nq8<;3NTvkyHQ-h#H|UJ#Bg#<lr%-ODB2`rOwG7H>By
z@xwATj%TmQ0#Bz=*88B{?t{<=^BN>6sYm77LA1`_VptpB=pkZt5g*i~FqIi(1epv+
z-Q{PAY3x&@cSaM2{>R_w#?MICOAjnpeD&PU$rmaO&zYY_MBvrYv!7u|ouIHTuL2$I
zZd^U?tW#$*!QxpcFDP4HK1UheY1c%I?5)($tCdtO2UwsUT8Fy)5XMP5J@USwQ&TO5
zY&4E^a=0k)b1C&`4qr8(kbQr(MVXyc&o=0bDPh8DuDy?3M<2xR6;RjLwULlkI`2lA
z4e8;@Jh`oHrz@~q+UDir<hE(1+r#dKX{5+%EVi#!hjWw@4^6^W_I8`be>lm-Dv~*2
zMXq?IGzYu4P6dzQ_^R|k_^Z;ktY3<{)o^#DR2bDuy%y9tuf~Tl__Y#Uewl|o=K^|4
z611BAbkVREirF06!T$Jlb)sf913ezEy36>Ftj|Q@@A={d8zCHvpBJGM<<LdsvjwuN
zRTyzC7YYVrF-3jpyu!<Zp6cY`Zb!wjtnIzP*kn*y59&J0rjJeXb!xges>n_A;O6mW
zXV(&FQZik=dh6{0y1rPpo!x$V-<6*19slNUt5|!&BKKL`xwU2A`!j9~kq%xrtk5Dv
z&()E4gR;$Ln^@Bsln|W-*C$2<$}e}>u&5dR0?uCNaA-<f=%h!+R^0!%dv>@HAKfob
z;a#WU&wn`oP1SW=Rd-2`Y$C+}ge)Jp+_RB(&YiH?2b<s<HNfthTi96HKB%0!t}|p9
zJnC6eRd=_(AF5f0*<<^~V(xS{Rxp9r+2-q`*t3hT;_Lb(zo9C`W81MPB)&vGig|e2
z7Fgm}4LeO&XHAnCHqN7}x?y)ZUX1z^y?CpF^O?Vy7c_317x<j{{*O~WOwP`-_#B*7
zAQ|IgLbatQO?z8n&@m7-G0I$%f%oh*Ear;5P3g|`bTW3w!_bOaU4^L?PefVgRIb2<
z$y_zhm4?MH#k3__n?so}8fAK`UiBqf+`bYEraB99X-sFB=70u9|3hYd-=>yT5uutz
zP|qAJ5$EzhU4Kew&O725&6SjFJRWfvf)PNrUIqQY1%1KpZJmFdMYHDyNT$uZ0>8a$
zgC=|B<hL?&31NLU&VHvwlm3SeXU}v`g10@Wv3-1~Mq^9o>sA-z_yBspE{|F^)zVaH
zUN@OP8?@&5Mk{lXhJC16hzXPG;xpj8wV1mDBUmcr2E0lBFSyItV{W@Vlc#s&s=jOm
zxhyL!w!MTZ&a%kGKR~hbGtv(8G%xVQF?%}Hd%*ea;iG5DSC}ILrL=l6$!8o*CvtsQ
zE)}`XmWxN>{wi1xxC(gWRB+A@OFXN%EKu|@SNqctT)BGeyxz^bpytnpo*%iWTE9^Y
zJCC=p>m!sC=eGVOy8W#l(0C&DP4#Oz$>(3}#8Az?yJ>QTRYit%OgMnG@cYuO<4?-S
zRJTfbmFWqGR1>5=r9gQ76IBo4cGt!IxyvJt>kfo_!uc!JKA>cq@2Y7#9h4}XAHY@A
zLW+mpJ>1jAY|!CrE`arxIQz1{`e=K8Aoexs{s5c&`Fr@hBLlY{7yEPWY`UNce)rSg
z=@>}`;=@Cdb|%uX<ZbxKDhpDr4iBqI8pyKA%JkSSn#qn!qk_WIj5U-Iqfuy*_}_7s
ziesIAdRJD$3SZ`TK5<M*<9tn;kQu>06`}dMQq(^6y1wiF@aI6%^sj=F3+nr<$oaG3
zCcJ<O)b+eLbO1SLz<QBG{>eCS(0bAD_?8n4^tp^fzhCpEcXw^m&a|MO4RsW5Ycje|
z12bsL3K`{22ZxjGW|;U!`1Zb2U1DgW$bSwUi(KwaUV*6IXQdZlj5ULsW`dJZ9jBKQ
z=+u;R<qs7H4ob7D%iE)P4d*+Xv~++q5e)pSZn`KUy0wRgFSEDqyZzd{=3(}4x7&*Y
z5-5U-UWbDiV(83#R6i1!TS1?AWlCz<JPC=t#j04k$JL?vm#z4613ud|4#}CNgR@L#
z{lVr}6IQ8`GK?d(-X3S88#Hr+I2j+G>O#|&YnaEOt}PDe6l!}7W^4pBEeD^1d^v6x
zE&n2RPQTZY9^eZa(^<~qWGoF1j*e1n`?`*VumT3-cbYCYnoa{v_i`B4<mL&~wXVs}
zBY4q;0)w!udt<vcC3|S+F#IJm(OJ#GdSS__u>$~y#ZBtv_J_UWBwXu#18<eUk3R3N
z+Vr1}j%(JkEn+CwVvw?5dh^;8tz^u!b>*|G1v?5vJ4JPSNQS9b?VEoE{krz2iJ`@b
zacxKKJ`GxXhV!H9Cdf4|&>i?ERqMOo;H3JJtic$cho<4T8a9^m$RQ%~pGdJ}IZo3y
z^0tNuPyaM47DwOYQ?@o5Ckmcwl9^cXj?x{wjm^+H*SxE8t(7eQ2LoPO4ZTX9J4#U<
zNvk1mem!%dlb$T88e<rTw&w$+{&v!TgE5|no#cBgw9Q{;0pFCNuB4v%vC%N-WsB5a
z!2_9!f;@N~1SDciO>Cd@V*)(QIq&LKObZ6G^}su%Zj$l6iC(@ME|<_{S%2F<>y@CM
z5=jYYxL_-j$2Y1HUV<dxG6})n4Mr0gEeo_A#ZhV{{qa4-b*TYI0*Qi9!jJa`J%)tB
z0B}+^T71rp`VBJ3iv$DP6)A2~nqGpzAh<BJWSjhT6KinUTIKu4>)OwMd87a1!}eYl
z0g;l_xI9`cknNEv)?Z}iQ*Y3AKLyAa<Lh!jbPA>LSE-k_I5+0<MG0NfSISE+iM0zb
zNollPy-+zDd@Pjz&gy!CUr^9daUHrDawk9~A{lXi_y6s2c-}KKEp4C$Oe(Iq_GWEL
zPWYW;@JnrSFaV2JcmHZZ@N3>8)cXa9GJLb2ynn7IlJ^L@iVxbb7-gGM(<sSbO@XK=
zhe0owaSMH6ZCp2>_QGG#ol<t`2i2Iq1qad1sUR#+vID`d!Dk$U7H%3O__ch>Q0KO{
zKVGZ|-yFw`)E6bOgan+g299s<blx;HC5qQwl(IyjI5JDn;hS7!DGbfHKA!)&;3t>L
zUG<Z7{Tb5)g<t5?syE8Y(fP&2^6Ab=e|+w7ZMFjksQAGiQ<WxS*jl>IgHImN=P4=i
zfX*)CA=Dez2D<amBm)N7%1~dc++0!eNNTWAqF7BZ{@Oq2z-<3l|0R+)H6SmevZOTs
zVkxD`y>Ol(<Eh?Q3S=~DDX`tB<)&`3)a0*Gz%$V!^KLG;Ah{IaQK?Oa{;m$=fikeP
zmngvvV+5Ur*F{XJXy{+hy!n2w$N4m(`RZ<VQcr#Os#6sb_#-7IFIp`8&gcID=86R)

literal 0
HcmV?d00001

diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md
index 21dc5adf1c..92db82b656 100644
--- a/examples/cfd/external_aerodynamics/domino/README.md
+++ b/examples/cfd/external_aerodynamics/domino/README.md
@@ -300,6 +300,39 @@ please open an issue on GitHub.
 
 ![Results from DoMINO for RTWT SC demo](../../../../docs/img/domino_perf.png)
 
+### Example Training Results
+
+To provide an example of what a successful training should look like, we include here
+some example results.  Training curves may look similar to this:
+
+![Surface Training Curve](../../../../docs/img/domino/surface-training-curve.png)
+
+![Combined Training Curve](../../../../docs/img/domino/combined-training-curve.png)
+
+And, when evaluating the results on the validation dataset, this particular
+run had the following L2 and R2 Metrics:
+
+|              Metric | Surface Only | Combined |
+|--------------------:|:------------:|:--------:|
+|          X Velocity |      N/A     |   0.086  |
+|          Y Velocity |      N/A     |   0.185  |
+|          Z Velocity |      N/A     |   0.197  |
+| Volumetric Pressure |      N/A     |   0.106  |
+|             Turb. V |      N/A     |   0.134  |
+|    Surface Pressure |     0.101    |   0.105  |
+|       X-Tau (Shear) |     0.138    |   0.145  |
+|       Y-Tau (Shear) |     0.174    |   0.185  |
+|       Z-Tau (Shear) |     0.198    |   0.207  |
+|             Drag R2 |     0.983    |          |
+|             Lift R2 |     0.971    |          |
+
+With the PhysicsNeMo CFD tool, you can create plots of the lift and drag
+forces computed by domino vs. the CFD Solver:
+
+![Draf Force R^2](../../../../docs/img/domino/drag-r2.jpg)
+
+![Lift Force R^2](../../../../docs/img/domino/lift-r2.png)
+
 ### Training with Physics Losses
 
 DoMINO supports enforcing of PDE residuals as soft constraints. This can be used

From d8a490187591014bacd1f36b483a1a77ad6ff438 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:18:27 -0500
Subject: [PATCH 91/98] Remove unneeded plots.

---
 examples/cfd/external_aerodynamics/domino/README.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md
index 92db82b656..c20e00939d 100644
--- a/examples/cfd/external_aerodynamics/domino/README.md
+++ b/examples/cfd/external_aerodynamics/domino/README.md
@@ -305,8 +305,6 @@ please open an issue on GitHub.
 To provide an example of what a successful training should look like, we include here
 some example results.  Training curves may look similar to this:
 
-![Surface Training Curve](../../../../docs/img/domino/surface-training-curve.png)
-
 ![Combined Training Curve](../../../../docs/img/domino/combined-training-curve.png)
 
 And, when evaluating the results on the validation dataset, this particular
@@ -327,12 +325,10 @@ run had the following L2 and R2 Metrics:
 |             Lift R2 |     0.971    |          |
 
 With the PhysicsNeMo CFD tool, you can create plots of the lift and drag
-forces computed by domino vs. the CFD Solver:
+forces computed by domino vs. the CFD Solver.  For example, here is the drag force:
 
 ![Draf Force R^2](../../../../docs/img/domino/drag-r2.jpg)
 
-![Lift Force R^2](../../../../docs/img/domino/lift-r2.png)
-
 ### Training with Physics Losses
 
 DoMINO supports enforcing of PDE residuals as soft constraints. This can be used

From 01a0c15d9f717411af967dd18c3607a8bc104d08 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:21:07 -0500
Subject: [PATCH 92/98] uupdate r2

---
 examples/cfd/external_aerodynamics/domino/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md
index c20e00939d..ee456cc573 100644
--- a/examples/cfd/external_aerodynamics/domino/README.md
+++ b/examples/cfd/external_aerodynamics/domino/README.md
@@ -321,8 +321,8 @@ run had the following L2 and R2 Metrics:
 |       X-Tau (Shear) |     0.138    |   0.145  |
 |       Y-Tau (Shear) |     0.174    |   0.185  |
 |       Z-Tau (Shear) |     0.198    |   0.207  |
-|             Drag R2 |     0.983    |          |
-|             Lift R2 |     0.971    |          |
+|             Drag R2 |     0.983    |   0.975  |
+|             Lift R2 |     0.971    |   0.968  |
 
 With the PhysicsNeMo CFD tool, you can create plots of the lift and drag
 forces computed by domino vs. the CFD Solver.  For example, here is the drag force:

From edae4366298caf7aff04a07cf19ed2060a39d9e4 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 15:18:29 -0500
Subject: [PATCH 93/98] Fix ruff issues

---
 .../external_aerodynamics/domino/src/loss.py  |  14 +-
 .../external_aerodynamics/domino/src/test.py  | 177 ++++++++++++------
 .../external_aerodynamics/domino/src/train.py |  22 ++-
 physicsnemo/datapipes/cae/cae_dataset.py      |  14 +-
 4 files changed, 154 insertions(+), 73 deletions(-)

diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py
index cb161cb06e..3ab52c7903 100644
--- a/examples/cfd/external_aerodynamics/domino/src/loss.py
+++ b/examples/cfd/external_aerodynamics/domino/src/loss.py
@@ -227,7 +227,7 @@ def loss_fn(
 
     num = torch.sum(mask * (output - target) ** 2.0, dims)
     if loss_type == "rmse":
-        denom = torch.sum(mask * (target - torch.mean(target, (0, 1)))**2.0, dims)
+        denom = torch.sum(mask * (target - torch.mean(target, (0, 1))) ** 2.0, dims)
         loss = torch.mean(num / denom)
     elif loss_type == "mse":
         denom = torch.sum(mask)
@@ -311,7 +311,9 @@ def loss_fn_surface(
 
         # Compute the mean diff**2 of the vector component, leave the last dimension:
         masked_loss_ws_num = vector_diff_sq
-        masked_loss_ws_denom = torch.mean((target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1))
+        masked_loss_ws_denom = torch.mean(
+            (target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1)
+        )
         masked_loss_ws = torch.sum(masked_loss_ws_num / masked_loss_ws_denom)
 
     loss = masked_loss_pres + masked_loss_ws
@@ -359,12 +361,16 @@ def loss_fn_area(
     # Compute the mean diff**2 of the scalar component:
     masked_loss_pres = torch.mean(((output_scalar - target_scalar) ** 2.0), dim=(0, 1))
     if loss_type == "rmse":
-        masked_loss_pres /= torch.mean((target_scalar-torch.mean(target_scalar, (0, 1)))**2.0, dim=(0, 1))
+        masked_loss_pres /= torch.mean(
+            (target_scalar - torch.mean(target_scalar, (0, 1))) ** 2.0, dim=(0, 1)
+        )
 
     # Compute the mean diff**2 of the vector component, leave the last dimension:
     masked_loss_ws = torch.mean((target_vector - output_vector) ** 2.0, (0, 1))
     if loss_type == "rmse":
-        masked_loss_ws /= torch.mean((target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1))
+        masked_loss_ws /= torch.mean(
+            (target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1)
+        )
 
     # Combine the scalar and vector components:
     loss = 0.25 * (masked_loss_pres + torch.sum(masked_loss_ws))
diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py
index d00d6dcd8d..4bb32dc6cd 100644
--- a/examples/cfd/external_aerodynamics/domino/src/test.py
+++ b/examples/cfd/external_aerodynamics/domino/src/test.py
@@ -120,7 +120,9 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
             if "volume_min_max" in data_dict.keys():
                 vol_max = data_dict["volume_min_max"][:, 1]
                 vol_min = data_dict["volume_min_max"][:, 0]
-                geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
+                geo_centers_vol = (
+                    2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1
+                )
             else:
                 geo_centers_vol = geo_centers
 
@@ -160,7 +162,9 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
             prediction_vol = torch.zeros_like(target_vol)
             num_points = volume_mesh_centers.shape[1]
             subdomain_points = int(np.floor(num_points / point_batch_size))
-            sdf_scaling_factor = cfg.model.geometry_rep.geo_processor.volume_sdf_scaling_factor
+            sdf_scaling_factor = (
+                cfg.model.geometry_rep.geo_processor.volume_sdf_scaling_factor
+            )
             start_time = time.time()
 
             for p in range(subdomain_points + 1):
@@ -174,7 +178,9 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                     sdf_nodes_batch = sdf_nodes[:, start_idx:end_idx]
                     scaled_sdf_nodes_batch = []
                     for p in range(len(sdf_scaling_factor)):
-                        scaled_sdf_nodes_batch.append(scale_sdf(sdf_nodes_batch, sdf_scaling_factor[p]))
+                        scaled_sdf_nodes_batch.append(
+                            scale_sdf(sdf_nodes_batch, sdf_scaling_factor[p])
+                        )
                     scaled_sdf_nodes_batch = torch.cat(scaled_sdf_nodes_batch, dim=-1)
 
                     pos_volume_closest_batch = pos_volume_closest[:, start_idx:end_idx]
@@ -199,9 +205,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                     else:
                         pos_encoding_all = pos_normals_com_batch
 
-                    pos_encoding = model.fc_p_vol(
-                        pos_encoding_all
-                    )
+                    pos_encoding = model.fc_p_vol(pos_encoding_all)
                     tpredictions_batch = model.solution_calculator_vol(
                         volume_mesh_centers_batch,
                         geo_encoding_local,
@@ -213,23 +217,23 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                     prediction_vol[:, start_idx:end_idx] = tpredictions_batch
 
             if cfg.model.normalization == "min_max_scaling":
-                prediction_vol = unnormalize(prediction_vol, vol_factors[0], vol_factors[1])
+                prediction_vol = unnormalize(
+                    prediction_vol, vol_factors[0], vol_factors[1]
+                )
             elif cfg.model.normalization == "mean_std_scaling":
-                prediction_vol = unstandardize(prediction_vol, vol_factors[0], vol_factors[1])
+                prediction_vol = unstandardize(
+                    prediction_vol, vol_factors[0], vol_factors[1]
+                )
             # print(np.amax(prediction_vol, axis=(0, 1)), np.amin(prediction_vol, axis=(0, 1)))
 
-            prediction_vol[:, :, :3] = (
-                prediction_vol[:, :, :3] * stream_velocity[0, 0]
-            )
+            prediction_vol[:, :, :3] = prediction_vol[:, :, :3] * stream_velocity[0, 0]
             prediction_vol[:, :, 3] = (
                 prediction_vol[:, :, 3]
                 * stream_velocity[0, 0] ** 2.0
                 * air_density[0, 0]
             )
             prediction_vol[:, :, 4] = (
-                prediction_vol[:, :, 4]
-                * stream_velocity[0, 0]
-                * length_scale[0]
+                prediction_vol[:, :, 4] * stream_velocity[0, 0] * length_scale[0]
             )
         else:
             prediction_vol = None
@@ -283,9 +287,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                         surface_mesh_centers_batch,
                         s_grid,
                     )
-                    pos_encoding = model.fc_p_surf(
-                        pos_surface_center_of_mass_batch
-                    )
+                    pos_encoding = model.fc_p_surf(pos_surface_center_of_mass_batch)
 
                     tpredictions_batch = model.solution_calculator_surf(
                         surface_mesh_centers_batch,
@@ -304,13 +306,15 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors):
                     prediction_surf[:, start_idx:end_idx] = tpredictions_batch
 
             if cfg.model.normalization == "min_max_scaling":
-                prediction_surf = unnormalize(prediction_surf, surf_factors[0], surf_factors[1])
+                prediction_surf = unnormalize(
+                    prediction_surf, surf_factors[0], surf_factors[1]
+                )
             elif cfg.model.normalization == "mean_std_scaling":
-                prediction_surf = unstandardize(prediction_surf, surf_factors[0], surf_factors[1])
+                prediction_surf = unstandardize(
+                    prediction_surf, surf_factors[0], surf_factors[1]
+                )
             prediction_surf = (
-                prediction_surf
-                * stream_velocity[0, 0] ** 2.0
-                * air_density[0, 0]
+                prediction_surf * stream_velocity[0, 0] ** 2.0 * air_density[0, 0]
             )
         else:
             prediction_surf = None
@@ -433,7 +437,10 @@ def main(cfg: DictConfig):
             :, 1:
         ]  # Assuming triangular elements
         mesh_indices_flattened = stl_faces.flatten()
-        length_scale = np.array(np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)), dtype=np.float32)
+        length_scale = np.array(
+            np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)),
+            dtype=np.float32,
+        )
         length_scale = torch.from_numpy(length_scale).to(torch.float32).to(dist.device)
         stl_sizes = mesh_stl.compute_cell_sizes(length=False, area=True, volume=False)
         stl_sizes = np.array(stl_sizes.cell_data["Area"], dtype=np.float32)
@@ -443,17 +450,29 @@ def main(cfg: DictConfig):
         stl_vertices = torch.from_numpy(stl_vertices).to(torch.float32).to(dist.device)
         stl_sizes = torch.from_numpy(stl_sizes).to(torch.float32).to(dist.device)
         stl_centers = torch.from_numpy(stl_centers).to(torch.float32).to(dist.device)
-        mesh_indices_flattened = torch.from_numpy(mesh_indices_flattened).to(torch.int32).to(dist.device)
+        mesh_indices_flattened = (
+            torch.from_numpy(mesh_indices_flattened).to(torch.int32).to(dist.device)
+        )
 
         # Center of mass calculation
         center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes)
 
-        s_max = torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.max)).to(torch.float32).to(dist.device)
-        s_min = torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.min)).to(torch.float32).to(dist.device)
+        s_max = (
+            torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.max))
+            .to(torch.float32)
+            .to(dist.device)
+        )
+        s_min = (
+            torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.min))
+            .to(torch.float32)
+            .to(dist.device)
+        )
 
         nx, ny, nz = cfg.model.interp_res
 
-        surf_grid = create_grid(s_max, s_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device))
+        surf_grid = create_grid(
+            s_max, s_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device)
+        )
 
         normed_stl_vertices_cp = normalize(stl_vertices, s_max, s_min)
         surf_grid_normed = normalize(surf_grid, s_max, s_min)
@@ -468,7 +487,7 @@ def main(cfg: DictConfig):
         )
 
         surf_grid_max_min = torch.stack([s_min, s_max])
-        
+
         # Get global parameters and global parameters scaling from config.yaml
         global_params_names = list(cfg.variables.global_parameters.keys())
         global_params_reference = {
@@ -496,7 +515,9 @@ def main(cfg: DictConfig):
         global_params_reference = np.array(
             global_params_reference_list, dtype=np.float32
         )
-        global_params_reference = torch.from_numpy(global_params_reference).to(dist.device)
+        global_params_reference = torch.from_numpy(global_params_reference).to(
+            dist.device
+        )
 
         # Define the list of global parameter values for each simulation.
         # Note: The user must ensure that the values provided here correspond to the
@@ -515,7 +536,9 @@ def main(cfg: DictConfig):
         global_params_values_list = np.array(
             global_params_values_list, dtype=np.float32
         )
-        global_params_values = torch.from_numpy(global_params_values_list).to(dist.device)
+        global_params_values = torch.from_numpy(global_params_values_list).to(
+            dist.device
+        )
 
         # Read VTP
         if model_type == "surface" or model_type == "combined":
@@ -543,13 +566,20 @@ def main(cfg: DictConfig):
             surface_normals = (
                 surface_normals / np.linalg.norm(surface_normals, axis=1)[:, np.newaxis]
             )
-            surface_coordinates = torch.from_numpy(surface_coordinates).to(torch.float32).to(dist.device)
-            surface_normals = torch.from_numpy(surface_normals).to(torch.float32).to(dist.device)
-            surface_sizes = torch.from_numpy(surface_sizes).to(torch.float32).to(dist.device)
-            surface_fields = torch.from_numpy(surface_fields).to(torch.float32).to(dist.device)
+            surface_coordinates = (
+                torch.from_numpy(surface_coordinates).to(torch.float32).to(dist.device)
+            )
+            surface_normals = (
+                torch.from_numpy(surface_normals).to(torch.float32).to(dist.device)
+            )
+            surface_sizes = (
+                torch.from_numpy(surface_sizes).to(torch.float32).to(dist.device)
+            )
+            surface_fields = (
+                torch.from_numpy(surface_fields).to(torch.float32).to(dist.device)
+            )
 
             if cfg.model.num_neighbors_surface > 1:
-
                 time_start = time.time()
                 # print(f"file: {dirname}, surface coordinates shape: {surface_coordinates.shape}")
                 # try:
@@ -607,15 +637,29 @@ def main(cfg: DictConfig):
                 polydata_vol, volume_variable_names
             )
             volume_fields = np.concatenate(volume_fields, axis=-1)
-            volume_coordinates = torch.from_numpy(volume_coordinates).to(torch.float32).to(dist.device)
-            volume_fields = torch.from_numpy(volume_fields).to(torch.float32).to(dist.device)
+            volume_coordinates = (
+                torch.from_numpy(volume_coordinates).to(torch.float32).to(dist.device)
+            )
+            volume_fields = (
+                torch.from_numpy(volume_fields).to(torch.float32).to(dist.device)
+            )
 
-            c_max = torch.from_numpy(np.asarray(cfg.data.bounding_box.max)).to(torch.float32).to(dist.device)
-            c_min = torch.from_numpy(np.asarray(cfg.data.bounding_box.min)).to(torch.float32).to(dist.device)
+            c_max = (
+                torch.from_numpy(np.asarray(cfg.data.bounding_box.max))
+                .to(torch.float32)
+                .to(dist.device)
+            )
+            c_min = (
+                torch.from_numpy(np.asarray(cfg.data.bounding_box.min))
+                .to(torch.float32)
+                .to(dist.device)
+            )
 
             # Generate a grid of specified resolution to map the bounding box
             # The grid is used for capturing structured geometry features and SDF representation of geometry
-            grid = create_grid(c_max, c_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device))
+            grid = create_grid(
+                c_max, c_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device)
+            )
 
             if cfg.data.normalize_coordinates:
                 volume_coordinates = normalize(volume_coordinates, c_max, c_min)
@@ -633,7 +677,7 @@ def main(cfg: DictConfig):
                 grid,
                 use_sign_winding_number=True,
             )
-            
+
             # SDF calculation
             time_start = time.time()
             sdf_nodes, sdf_node_closest_point = signed_distance_field(
@@ -647,7 +691,7 @@ def main(cfg: DictConfig):
 
             pos_volume_closest = volume_coordinates - sdf_node_closest_point
             pos_volume_center_of_mass = volume_coordinates - center_of_mass_normalized
-            
+
         else:
             volume_coordinates = None
             volume_fields = None
@@ -723,10 +767,7 @@ def main(cfg: DictConfig):
                 "global_params_reference": torch.unsqueeze(global_params_reference, -1),
             }
 
-        data_dict = {
-            key: torch.unsqueeze(value, 0)
-            for key, value in data_dict.items()
-        }
+        data_dict = {key: torch.unsqueeze(value, 0) for key, value in data_dict.items()}
 
         prediction_vol, prediction_surf = test_step(
             data_dict, model, dist.device, cfg, vol_factors, surf_factors
@@ -771,9 +812,15 @@ def main(cfg: DictConfig):
                 surface_fields[:, 0] * surface_normals[:, 2] * surface_sizes[:, 0]
                 - surface_fields[:, 3] * surface_sizes[:, 0]
             )
-            print("Drag=", dirname, force_x_pred.cpu().numpy(), force_x_true.cpu().numpy())
-            print("Lift=", dirname, force_z_pred.cpu().numpy(), force_z_true.cpu().numpy())
-            print("Side=", dirname, force_y_pred.cpu().numpy(), force_y_true.cpu().numpy())
+            print(
+                "Drag=", dirname, force_x_pred.cpu().numpy(), force_x_true.cpu().numpy()
+            )
+            print(
+                "Lift=", dirname, force_z_pred.cpu().numpy(), force_z_true.cpu().numpy()
+            )
+            print(
+                "Side=", dirname, force_y_pred.cpu().numpy(), force_y_true.cpu().numpy()
+            )
             aero_forces_all.append(
                 [
                     dirname,
@@ -787,8 +834,12 @@ def main(cfg: DictConfig):
             )
 
             l2_gt = torch.mean(torch.square(surface_fields), (0))
-            l2_error = torch.mean(torch.square(prediction_surf[0] - surface_fields), (0))
-            l2_surface_all.append(np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()))
+            l2_error = torch.mean(
+                torch.square(prediction_surf[0] - surface_fields), (0)
+            )
+            l2_surface_all.append(
+                np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy())
+            )
 
             print(
                 "Surface L-2 norm:",
@@ -819,30 +870,42 @@ def main(cfg: DictConfig):
                 dirname,
                 np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()),
             )
-            l2_volume_all.append(np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()))
+            l2_volume_all.append(
+                np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy())
+            )
 
         # import pdb; pdb.set_trace()
         if prediction_surf is not None:
-            surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 0:1].cpu().numpy())
+            surfParam_vtk = numpy_support.numpy_to_vtk(
+                prediction_surf[0, :, 0:1].cpu().numpy()
+            )
             surfParam_vtk.SetName(f"{surface_variable_names[0]}Pred")
             celldata_all.GetCellData().AddArray(surfParam_vtk)
 
-            surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 1:].cpu().numpy())
+            surfParam_vtk = numpy_support.numpy_to_vtk(
+                prediction_surf[0, :, 1:].cpu().numpy()
+            )
             surfParam_vtk.SetName(f"{surface_variable_names[1]}Pred")
             celldata_all.GetCellData().AddArray(surfParam_vtk)
 
             write_to_vtp(celldata_all, vtp_pred_save_path)
 
         if prediction_vol is not None:
-            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 0:3].cpu().numpy())
+            volParam_vtk = numpy_support.numpy_to_vtk(
+                prediction_vol[:, 0:3].cpu().numpy()
+            )
             volParam_vtk.SetName(f"{volume_variable_names[0]}Pred")
             polydata_vol.GetPointData().AddArray(volParam_vtk)
 
-            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 3:4].cpu().numpy())
+            volParam_vtk = numpy_support.numpy_to_vtk(
+                prediction_vol[:, 3:4].cpu().numpy()
+            )
             volParam_vtk.SetName(f"{volume_variable_names[1]}Pred")
             polydata_vol.GetPointData().AddArray(volParam_vtk)
 
-            volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 4:5].cpu().numpy())
+            volParam_vtk = numpy_support.numpy_to_vtk(
+                prediction_vol[:, 4:5].cpu().numpy()
+            )
             volParam_vtk.SetName(f"{volume_variable_names[2]}Pred")
             polydata_vol.GetPointData().AddArray(volParam_vtk)
 
diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py
index 9758ed7e2f..55731696d2 100644
--- a/examples/cfd/external_aerodynamics/domino/src/train.py
+++ b/examples/cfd/external_aerodynamics/domino/src/train.py
@@ -209,7 +209,7 @@ def train_epoch(
             io_end_time = time.perf_counter()
             if add_physics_loss:
                 autocast_enabled = False
-        
+
             with autocast("cuda", enabled=autocast_enabled, cache_enabled=False):
                 with nvtx.range("Model Forward Pass"):
                     if add_physics_loss:
@@ -259,9 +259,7 @@ def train_epoch(
                     scaler.unscale_(optimizer)
 
                     # Since the gradients of optimizer's assigned params are unscaled, clips as usual.
-                    torch.nn.utils.clip_grad_norm_(
-                        model.parameters(), grad_max_norm
-                    )
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), grad_max_norm)
                 scaler.step(optimizer)
                 scaler.update()
                 optimizer.zero_grad()
@@ -501,14 +499,22 @@ def main(cfg: DictConfig) -> None:
         optimizer_class = torch.optim.AdamW
     else:
         raise ValueError(f"Unsupported optimizer: {cfg.train.optimizer.name}")
-    optimizer = optimizer_class(model.parameters(), lr=cfg.train.optimizer.lr, weight_decay=cfg.train.optimizer.weight_decay)
+    optimizer = optimizer_class(
+        model.parameters(),
+        lr=cfg.train.optimizer.lr,
+        weight_decay=cfg.train.optimizer.weight_decay,
+    )
     if cfg.train.lr_scheduler.name == "MultiStepLR":
         scheduler = torch.optim.lr_scheduler.MultiStepLR(
-        optimizer, milestones=cfg.train.lr_scheduler.milestones, gamma=cfg.train.lr_scheduler.gamma
-    )
+            optimizer,
+            milestones=cfg.train.lr_scheduler.milestones,
+            gamma=cfg.train.lr_scheduler.gamma,
+        )
     elif cfg.train.lr_scheduler.name == "CosineAnnealingLR":
         scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
-            optimizer, T_max=cfg.train.lr_scheduler.T_max, eta_min=cfg.train.lr_scheduler.eta_min
+            optimizer,
+            T_max=cfg.train.lr_scheduler.T_max,
+            eta_min=cfg.train.lr_scheduler.eta_min,
         )
     else:
         raise ValueError(f"Unsupported scheduler: {cfg.train.lr_scheduler.name}")
diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py
index b41e217635..8a2dfdfc5c 100644
--- a/physicsnemo/datapipes/cae/cae_dataset.py
+++ b/physicsnemo/datapipes/cae/cae_dataset.py
@@ -1213,7 +1213,9 @@ def compute_mean_std_min_max(
 
         end = time.perf_counter()
         iteration_time = end - start
-        print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}")
+        print(
+            f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}"
+        )
         start = time.perf_counter()
 
     var = {}
@@ -1241,9 +1243,11 @@ def compute_mean_std_min_max(
             std_sample = std[field_key]
             mask = torch.ones_like(field_data, dtype=torch.bool)
             for v in range(field_data.shape[-1]):
-                outliers = (field_data[:, v] < mean_sample[v] - 9.0 * std_sample[v]) | (field_data[:, v] > mean_sample[v] + 9.0 * std_sample[v])
+                outliers = (field_data[:, v] < mean_sample[v] - 9.0 * std_sample[v]) | (
+                    field_data[:, v] > mean_sample[v] + 9.0 * std_sample[v]
+                )
                 mask[:, v] = ~outliers
-            
+
             batch_min = []
             batch_max = []
             for v in range(field_data.shape[-1]):
@@ -1258,7 +1262,9 @@ def compute_mean_std_min_max(
 
         end = time.perf_counter()
         iteration_time = end - start
-        print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}")
+        print(
+            f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}"
+        )
         start = time.perf_counter()
 
     global_end = time.perf_counter()

From 6c04d882666fcaf29fd137e6f59887364b930fec Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 21:09:54 +0000
Subject: [PATCH 94/98] test fix.

---
 test/utils/test_domino_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/utils/test_domino_utils.py b/test/utils/test_domino_utils.py
index a9e1166640..fc10b93688 100644
--- a/test/utils/test_domino_utils.py
+++ b/test/utils/test_domino_utils.py
@@ -122,7 +122,7 @@ def test_nd_interpolator():
     coords = torch.tensor([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
     field_vals = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
     grid_points = torch.tensor([[0.5, 0.5]])
-    result = nd_interpolator([coords], field_vals, grid_points)
+    result = nd_interpolator(coords, field_vals, grid_points)
     assert result.shape[0] == 1  # One grid point
 
 
From bd74ce3a72310d3b958e785a67e4893d7ec9c5eb Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 21:21:25 +0000
Subject: [PATCH 95/98]  Fix dict item with normalization off.

---
 physicsnemo/datapipes/cae/domino_datapipe.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py
index eb7b004d18..491ab5a199 100644
--- a/physicsnemo/datapipes/cae/domino_datapipe.py
+++ b/physicsnemo/datapipes/cae/domino_datapipe.py
@@ -796,7 +796,8 @@ def process_data(self, data_dict):
         ########################################################################
         # For volume data, we store this only if normalizing coordinates:
         if self.model_type == "volume" or self.model_type == "combined":
-            return_dict["volume_min_max"] = torch.stack([c_min, c_max])
+            if self.config.normalize_coordinates:
+                return_dict["volume_min_max"] = torch.stack([c_min, c_max])
 
         if self.model_type == "volume" or self.model_type == "combined":
             volume_fields_raw = (

From 90235753d3f7c2df5471cef27ed506226d5f8fc4 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 16:26:00 -0500
Subject: [PATCH 96/98] Change codeowners order, exclusion goes last.

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index d5881d69ae..1e6ad0ce59 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -19,8 +19,8 @@
 ./*.md @ram-cherukuri @megnvidia
 
 # All changes to documentation, except images:
-docs/img/
 docs/ @megnvidia @ktangsali
+docs/img/
 
 
 # Core release files

From b24b0d4772bf9eeec47e1c2c8bc1be83bdeb153b Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Fri, 17 Oct 2025 16:31:51 -0500
Subject: [PATCH 97/98] Undo file order so it can get fixed elsewhere.

---
 .github/CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 1e6ad0ce59..d5881d69ae 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -19,8 +19,8 @@
 ./*.md @ram-cherukuri @megnvidia
 
 # All changes to documentation, except images:
-docs/ @megnvidia @ktangsali
 docs/img/
+docs/ @megnvidia @ktangsali
 
 
 # Core release files

From b8db738b93fe42b4c90b7c081f22288db417a0f5 Mon Sep 17 00:00:00 2001
From: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Date: Mon, 20 Oct 2025 13:38:39 +0000
Subject: [PATCH 98/98] Update doctstring tests.

---
 physicsnemo/utils/domino/__init__.py | 15 ++++++
 physicsnemo/utils/domino/utils.py    | 78 ++++++++++++++++------------
 2 files changed, 59 insertions(+), 34 deletions(-)
 create mode 100644 physicsnemo/utils/domino/__init__.py

diff --git a/physicsnemo/utils/domino/__init__.py b/physicsnemo/utils/domino/__init__.py
new file mode 100644
index 0000000000..b2f171d4ac
--- /dev/null
+++ b/physicsnemo/utils/domino/__init__.py
@@ -0,0 +1,15 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py
index e3faae8123..7f67f36e6c 100644
--- a/physicsnemo/utils/domino/utils.py
+++ b/physicsnemo/utils/domino/utils.py
@@ -67,8 +67,8 @@ def calculate_center_of_mass(
 
 def normalize(
     field: torch.Tensor,
-    max_val: torch.Tensor | None = None,
-    min_val: torch.Tensor | None = None,
+    max_val: float | torch.Tensor | None = None,
+    min_val: float | torch.Tensor | None = None,
 ) -> torch.Tensor:
     """Normalize field values to the range [-1, 1].
 
@@ -93,11 +93,11 @@ def normalize(
         >>> import torch
         >>> field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
         >>> normalized = normalize(field, 5.0, 1.0)
-        >>> torch.allclose(normalized, [-1.0, -0.5, 0.0, 0.5, 1.0])
+        >>> torch.allclose(normalized, torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0]))
         True
         >>> # Auto-compute min/max
         >>> normalized_auto = normalize(field)
-        >>> torch.allclose(normalized_auto, [-1.0, -0.5, 0.0, 0.5, 1.0])
+        >>> torch.allclose(normalized_auto, torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0]))
         True
     """
 
@@ -111,7 +111,9 @@ def normalize(
 
 
 def unnormalize(
-    normalized_field: torch.Tensor, max_val: torch.Tensor, min_val: torch.Tensor
+    normalized_field: torch.Tensor,
+    max_val: float | torch.Tensor,
+    min_val: float | torch.Tensor,
 ) -> torch.Tensor:
     """Reverse the normalization process to recover original field values.
 
@@ -129,8 +131,10 @@ def unnormalize(
     Examples:
         >>> import torch
         >>> normalized = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0])
-        >>> original = unnormalize(normalized, 5.0, 1.0)
-        >>> torch.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0])
+        >>> max_val = torch.tensor(5.0)
+        >>> min_val = torch.tensor(1.0)
+        >>> original = unnormalize(normalized, max_val, min_val)
+        >>> torch.allclose(original, torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]))
         True
     """
     field_range = max_val - min_val
@@ -139,8 +143,8 @@ def unnormalize(
 
 def standardize(
     field: torch.Tensor,
-    mean: torch.Tensor | None = None,
-    std: torch.Tensor | None = None,
+    mean: float | torch.Tensor | None = None,
+    std: float | torch.Tensor | None = None,
 ) -> torch.Tensor:
     """Standardize field values to have zero mean and unit variance.
 
@@ -162,14 +166,16 @@ def standardize(
     Examples:
         >>> import torch
         >>> field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
-        >>> standardized = standardize(field, 3.0, torch.sqrt(2.5))
-        >>> torch.allclose(standardized, [-1.265, -0.632, 0.0, 0.632, 1.265], atol=1e-3)
+        >>> mean = torch.tensor(3.0)
+        >>> std = torch.sqrt(torch.tensor(2.5))
+        >>> standardized = standardize(field, mean, std)
+        >>> torch.allclose(standardized, torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265]), atol=1e-3)
         True
         >>> # Auto-compute mean/std
         >>> standardized_auto = standardize(field)
-        >>> torch.allclose(torch.mean(standardized_auto), 0.0)
+        >>> torch.allclose(torch.mean(standardized_auto), torch.tensor(0.0))
         True
-        >>> torch.allclose(torch.std(standardized_auto, ddof=0), 1.0)
+        >>> torch.allclose(torch.std(standardized_auto), torch.tensor(1.0))
         True
     """
 
@@ -182,7 +188,9 @@ def standardize(
 
 
 def unstandardize(
-    standardized_field: torch.Tensor, mean: torch.Tensor, std: torch.Tensor
+    standardized_field: torch.Tensor,
+    mean: float | torch.Tensor,
+    std: float | torch.Tensor,
 ) -> torch.Tensor:
     """Reverse the standardization process to recover original field values.
 
@@ -200,8 +208,10 @@ def unstandardize(
     Examples:
         >>> import torch
         >>> standardized = torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265])
-        >>> original = unstandardize(standardized, 3.0, torch.sqrt(2.5))
-        >>> torch.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0], atol=1e-3)
+        >>> mean = torch.tensor(3.0)
+        >>> std = torch.sqrt(torch.tensor(2.5))
+        >>> original = unstandardize(standardized, mean, std)
+        >>> torch.allclose(original, torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]), atol=1e-3)
         True
     """
     return standardized_field * std + mean
@@ -236,12 +246,12 @@ def calculate_normal_positional_encoding(
         >>> cell_size = [0.1, 0.1, 0.1]
         >>> encoding = calculate_normal_positional_encoding(coords, cell_dimensions=cell_size)
         >>> encoding.shape
-        (2, 12)
+        torch.Size([2, 12])
         >>> # Relative positioning example
         >>> coords_b = torch.tensor([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]])
         >>> encoding_rel = calculate_normal_positional_encoding(coords, coords_b, cell_size)
         >>> encoding_rel.shape
-        (2, 12)
+        torch.Size([2, 12])
     """
     dx, dy, dz = cell_dimensions[0], cell_dimensions[1], cell_dimensions[2]
 
@@ -318,7 +328,7 @@ def pad(arr: torch.Tensor, n_points: int, pad_value: float = 0.0) -> torch.Tenso
         >>> arr = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
         >>> padded = pad(arr, 4, -1.0)
         >>> padded.shape
-        (4, 2)
+        torch.Size([4, 2])
         >>> torch.allclose(padded[:2], arr)
         True
         >>> bool(torch.all(padded[2:] == -1.0))
@@ -368,7 +378,7 @@ def pad_inp(arr: torch.Tensor, n_points: int, pad_value: float = 0.0) -> torch.T
         >>> arr = torch.tensor([[[1.0, 2.0]], [[3.0, 4.0]]])
         >>> padded = pad_inp(arr, 4, 0.0)
         >>> padded.shape
-        (4, 1, 2)
+        torch.Size([4, 1, 2])
         >>> torch.allclose(padded[:2], arr)
         True
         >>> bool(torch.all(padded[2:] == 0.0))
@@ -424,13 +434,13 @@ def shuffle_array(
 
     Examples:
         >>> import torch
-        >>> torch.manual_seed(42)  # For reproducible results
+        >>> _ = torch.manual_seed(42)  # For reproducible results
         >>> data = torch.tensor([[1, 2], [3, 4], [5, 6], [7, 8]])
         >>> subset, indices = shuffle_array(data, 2)
         >>> subset.shape
-        (2, 2)
+        torch.Size([2, 2])
         >>> indices.shape
-        (2,)
+        torch.Size([2])
         >>> len(torch.unique(indices)) == 2  # No duplicates
         True
     """
@@ -510,14 +520,14 @@ def shuffle_array_without_sampling(
 
     Examples:
         >>> import torch
-        >>> torch.manual_seed(42)  # For reproducible results
+        >>> _ = torch.manual_seed(42)  # For reproducible results
         >>> data = torch.tensor([[1], [2], [3], [4]])
         >>> shuffled, indices = shuffle_array_without_sampling(data)
         >>> shuffled.shape
-        (4, 1)
+        torch.Size([4, 1])
         >>> indices.shape
-        (4,)
-        >>> set(indices) == set(range(4))  # All original indices present
+        torch.Size([4])
+        >>> set(indices.tolist()) == set(range(4))  # All original indices present
         True
     """
     idx = torch.randperm(arr.shape[0])
@@ -660,7 +670,7 @@ def create_grid(
         >>> grid_res = torch.tensor([2, 2, 2])
         >>> grid = create_grid(max_bounds, min_bounds, grid_res)
         >>> grid.shape
-        (2, 2, 2, 3)
+        torch.Size([2, 2, 2, 3])
         >>> torch.allclose(grid[0, 0, 0], torch.tensor([0.0, 0.0, 0.0]))
         True
         >>> torch.allclose(grid[1, 1, 1], torch.tensor([1.0, 1.0, 1.0]))
@@ -794,14 +804,14 @@ def area_weighted_shuffle_array(
 
     Examples:
         >>> import torch
-        >>> torch.manual_seed(42)  # For reproducible results
+        >>> _ = torch.manual_seed(42)  # For reproducible results
         >>> mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
         >>> cell_areas = torch.tensor([0.1, 0.1, 0.1, 10.0])  # Last point has much larger area
         >>> subset, indices = area_weighted_shuffle_array(mesh_data, 2, cell_areas)
         >>> subset.shape
-        (2, 1)
+        torch.Size([2, 1])
         >>> indices.shape
-        (2,)
+        torch.Size([2])
         >>> # The point with large area (index 3) should likely be selected
         >>> len(set(indices)) <= 2  # At most 2 unique indices
         True
@@ -849,14 +859,14 @@ def solution_weighted_shuffle_array(
 
     Examples:
         >>> import torch
-        >>> torch.manual_seed(42)  # For reproducible results
+        >>> _ = torch.manual_seed(42)  # For reproducible results
         >>> mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
         >>> solution = torch.tensor([0.1, 0.1, 0.1, 10.0])  # Last point has much larger solution field
         >>> subset, indices = solution_weighted_shuffle_array(mesh_data, 2, solution)
         >>> subset.shape
-        (2, 1)
+        torch.Size([2, 1])
         >>> indices.shape
-        (2,)
+        torch.Size([2])
         >>> # The point with large area (index 3) should likely be selected
         >>> len(set(indices)) <= 2  # At most 2 unique indices
         True