From e559c465b63fb54b29348a2927140d9de543d67d Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 25 Aug 2025 14:53:34 +0000 Subject: [PATCH 01/98] Relax cuml constraints --- examples/cfd/external_aerodynamics/domino/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/cfd/external_aerodynamics/domino/requirements.txt b/examples/cfd/external_aerodynamics/domino/requirements.txt index cafc1c7a4c..bb81466cdd 100644 --- a/examples/cfd/external_aerodynamics/domino/requirements.txt +++ b/examples/cfd/external_aerodynamics/domino/requirements.txt @@ -1,4 +1,4 @@ torchinfo warp-lang tensorboard -cuml-cu12>=25.6.0 +cuml From e38ecdf0d464de928a033f4aec82b49756f87740 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 25 Aug 2025 14:54:58 +0000 Subject: [PATCH 02/98] Port sdf function to use only torch inputs. No changes to tests yet. --- physicsnemo/utils/sdf.py | 155 +++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 89 deletions(-) diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py index a095074a96..446b7b5d54 100644 --- a/physicsnemo/utils/sdf.py +++ b/physicsnemo/utils/sdf.py @@ -14,8 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cupy as cp -import numpy as np +import torch import warp as wp wp.config.quiet = True @@ -28,7 +27,6 @@ def _bvh_query_distance( max_dist: wp.float32, sdf: wp.array(dtype=wp.float32), sdf_hit_point: wp.array(dtype=wp.vec3f), - sdf_hit_point_id: wp.array(dtype=wp.int32), use_sign_winding_number: bool = False, ): """ @@ -67,22 +65,15 @@ def _bvh_query_distance( sdf[tid] = res.sign * wp.abs(wp.length(points[tid] - p_closest)) sdf_hit_point[tid] = p_closest - sdf_hit_point_id[tid] = res.face - - -Array = np.ndarray | cp.ndarray def signed_distance_field( - mesh_vertices: Array, - mesh_indices: Array, - input_points: Array, + mesh_vertices: torch.Tensor, + mesh_indices: torch.Tensor, + input_points: torch.Tensor, max_dist: float = 1e8, - include_hit_points: bool = False, - include_hit_points_id: bool = False, use_sign_winding_number: bool = False, - return_cupy: bool | None = None, -) -> Array | tuple[Array, ...]: +): """ Computes the signed distance field (SDF) for a given mesh and input points. @@ -100,11 +91,7 @@ def signed_distance_field( max_dist (float, optional): Maximum distance within which to search for the closest point on the mesh. Default is 1e8. include_hit_points (bool, optional): Whether to include hit points in - the output. Here, "hit points" are the points on the mesh that are - closest to the input points, and hence, are defining the SDF. - Default is False. - include_hit_points_id (bool, optional): Whether to include hit point - IDs in the output. Default is False. + the output. Here, use_sign_winding_number (bool, optional): Whether to use sign winding number method for SDF. Default is False. If False, your mesh should be watertight to obtain correct results. @@ -115,88 +102,78 @@ def signed_distance_field( Returns: ------- Returns: - np.ndarray | cp.ndarray or tuple: - - If both `include_hit_points` and `include_hit_points_id` are False - (default), returns a 1D array of signed distances for each input - point. - - If `include_hit_points` is True, returns a tuple: (sdf, - hit_points), where `hit_points` contains the closest mesh point - for each input point. - - If `include_hit_points_id` is True, returns a tuple: (sdf, - hit_point_ids), where `hit_point_ids` contains the face index of - the closest mesh face for each input point. - - If both `include_hit_points` and `include_hit_points_id` are True, - returns a tuple: (sdf, hit_points, hit_point_ids). - - The returned array type (NumPy or CuPy) is determined by the - `return_cupy` argument, or inferred from the input arrays. + tuple[torch.Tensor, torch.Tensor] of: + - signed distance to the mesh, per input point + - hith point, per input point. "hit points" are the points on the + mesh that are closest to the input points, and hence, are + defining the SDF. Example: ------- >>> mesh_vertices = [(0, 0, 0), (1, 0, 0), (0, 1, 0)] - >>> mesh_indices = np.array((0, 1, 2)) - >>> input_points = [(0.5, 0.5, 0.5)] + >>> mesh_indices = torch.tensor((0, 1, 2)) + >>> input_points = torch.tensor((0.5, 0.5, 0.5)) >>> signed_distance_field(mesh_vertices, mesh_indices, input_points) - array([0.5], dtype=float32) + (tensor([0.5]), tensor([0.5, 0.5, 0.5])) """ - if return_cupy is None: - return_cupy = any( - isinstance(arr, cp.ndarray) - for arr in (mesh_vertices, mesh_indices, input_points) - ) - wp.init() + if input_points.shape[-1] != 3: + raise ValueError("Input points must be a tensor with last dimension of size 3") - if isinstance(mesh_vertices, cp.ndarray): - device = mesh_vertices.device - wp_device = f"cuda:{device.id}" - else: - wp_device = wp.get_device() + input_shape = input_points.shape - with wp.ScopedDevice(wp_device): - mesh = wp.Mesh( - points=wp.array(mesh_vertices, dtype=wp.vec3f, device=wp_device), - indices=wp.array(mesh_indices, dtype=wp.int32, device=wp_device), - ) + # Flatten the input points: + input_points = input_points.reshape(-1, 3) - warp_input_points = wp.array(input_points, dtype=wp.vec3f, device=wp_device) - - N = len(warp_input_points) - - sdf = wp.empty(shape=(N,), dtype=wp.float32, device=wp_device) - sdf_hit_point = wp.empty(shape=(N,), dtype=wp.vec3f, device=wp_device) - sdf_hit_point_id = wp.empty(shape=(N,), dtype=wp.int32, device=wp_device) - - wp.launch( - kernel=_bvh_query_distance, - dim=N, - inputs=[ - mesh.id, - warp_input_points, - max_dist, - sdf, - sdf_hit_point, - sdf_hit_point_id, - use_sign_winding_number, - ], - device=wp_device, - ) + N = len(input_points) + + # Allocate output tensors with torch: + sdf = torch.zeros(N, dtype=torch.float32, device=input_points.device) + sdf_hit_point = torch.zeros(N, 3, dtype=torch.float32, device=input_points.device) - def convert(array: wp.array) -> np.ndarray | cp.ndarray: - """Converts a Warp array to CuPy/NumPy based on the `return_cupy` flag.""" - if return_cupy: - return cp.asarray(array) - else: - return array.numpy() + wp.init() + + # zero copy the vertices, indices, and input points to warp: + wp_vertices = wp.from_torch(mesh_vertices, dtype=wp.vec3) + wp_indices = wp.from_torch(mesh_indices, dtype=wp.int32) + wp_input_points = wp.from_torch(input_points, dtype=wp.vec3) - arrays_to_return: list[np.ndarray | cp.ndarray] = [convert(sdf)] + # Convert output points: + wp_sdf = wp.from_torch(sdf, dtype=wp.float32) + wp_sdf_hit_point = wp.from_torch(sdf_hit_point, dtype=wp.vec3f) - if include_hit_points: - arrays_to_return.append(convert(sdf_hit_point)) - if include_hit_points_id: - arrays_to_return.append(convert(sdf_hit_point_id)) + mesh = wp.Mesh( + points=wp_vertices, + indices=wp_indices, + support_winding_number=use_sign_winding_number, + ) - return ( - arrays_to_return[0] - if len(arrays_to_return) == 1 - else tuple(arrays_to_return) + if input_points.device.type == "cuda": + wp_launch_stream = wp.stream_from_torch( + torch.cuda.current_stream(input_points.device) ) + wp_launch_device = None # We explicitly pass None if using the stream. + else: + wp_launch_stream = None + wp_launch_device = "cpu" # CPUs have no streams + + wp.launch( + kernel=_bvh_query_distance, + dim=N, + inputs=[ + mesh.id, + wp_input_points, + max_dist, + wp_sdf, + wp_sdf_hit_point, + use_sign_winding_number, + ], + device=wp_launch_device, + stream=wp_launch_stream, + ) + + # Unflatten the output to be like the input: + sdf = sdf.reshape(input_shape[:-1] + (1,)) + sdf_hit_point = sdf_hit_point.reshape(input_shape) + + return sdf, sdf_hit_point From dd7b3cfa14b53bac69d385b82eb9a70918a76e7a Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 25 Aug 2025 14:58:21 +0000 Subject: [PATCH 03/98] Porting some domino utils function to pure torch interface --- physicsnemo/utils/domino/utils.py | 578 ++++++--------------- physicsnemo/utils/domino/vtk_file_utils.py | 380 ++++++++++++++ 2 files changed, 542 insertions(+), 416 deletions(-) create mode 100644 physicsnemo/utils/domino/vtk_file_utils.py diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index dc4f3ac796..15437dca9e 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -26,15 +26,10 @@ from typing import Any, Sequence import numpy as np -import vtk +import torch from scipy.spatial import KDTree -from vtk import vtkDataSetTriangleFilter -from vtk.util import numpy_support - -from physicsnemo.utils.profiling import profile # Type alias for arrays that can be either NumPy or CuPy - try: import cupy as cp @@ -69,7 +64,9 @@ def array_type(array: ArrayType) -> "type[np] | type[cp]": return np -def calculate_center_of_mass(centers: ArrayType, sizes: ArrayType) -> ArrayType: +def calculate_center_of_mass( + centers: torch.Tensor, sizes: torch.Tensor +) -> torch.Tensor: """Calculate the center of mass for a collection of elements. Computes the volume-weighted centroid of mesh elements, commonly used @@ -88,24 +85,25 @@ def calculate_center_of_mass(centers: ArrayType, sizes: ArrayType) -> ArrayType: ValueError: If centers and sizes have incompatible shapes. Examples: - >>> import numpy as np - >>> centers = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]) - >>> sizes = np.array([1.0, 2.0, 3.0]) + >>> import torch + >>> centers = torch.tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]) + >>> sizes = torch.tensor([1.0, 2.0, 3.0]) >>> com = calculate_center_of_mass(centers, sizes) - >>> np.allclose(com, [[4.0/3.0, 4.0/3.0, 4.0/3.0]]) + >>> torch.allclose(com, torch.tensor([[4.0/3.0, 4.0/3.0, 4.0/3.0]])) True """ - xp = array_type(centers) - total_weighted_position = xp.einsum("i,ij->j", sizes, centers) - total_size = xp.sum(sizes) + total_weighted_position = torch.einsum("i,ij->j", sizes, centers) + total_size = torch.sum(sizes) return total_weighted_position[None, ...] / total_size def normalize( - field: ArrayType, max_val: ArrayType | None = None, min_val: ArrayType | None = None -) -> ArrayType: + field: torch.Tensor, + max_val: torch.Tensor | None = None, + min_val: torch.Tensor | None = None, +) -> torch.Tensor: """Normalize field values to the range [-1, 1]. Applies min-max normalization to scale field values to a symmetric range @@ -126,30 +124,29 @@ def normalize( ZeroDivisionError: If max_val equals min_val (zero range). Examples: - >>> import numpy as np - >>> field = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> import torch + >>> field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]) >>> normalized = normalize(field, 5.0, 1.0) - >>> np.allclose(normalized, [-1.0, -0.5, 0.0, 0.5, 1.0]) + >>> torch.allclose(normalized, [-1.0, -0.5, 0.0, 0.5, 1.0]) True >>> # Auto-compute min/max >>> normalized_auto = normalize(field) - >>> np.allclose(normalized_auto, [-1.0, -0.5, 0.0, 0.5, 1.0]) + >>> torch.allclose(normalized_auto, [-1.0, -0.5, 0.0, 0.5, 1.0]) True """ - xp = array_type(field) if max_val is None: - max_val = xp.max(field, axis=0, keepdims=True) + max_val = field.max(axis=0, keepdim=True) if min_val is None: - min_val = xp.min(field, axis=0, keepdims=True) + min_val = field.min(axis=0, keepdim=True) field_range = max_val - min_val return 2.0 * (field - min_val) / field_range - 1.0 def unnormalize( - normalized_field: ArrayType, max_val: ArrayType, min_val: ArrayType -) -> ArrayType: + normalized_field: torch.Tensor, max_val: torch.Tensor, min_val: torch.Tensor +) -> torch.Tensor: """Reverse the normalization process to recover original field values. Transforms normalized values from the range [-1, 1] back to their original @@ -164,10 +161,10 @@ def unnormalize( Field values restored to their original physical range. Examples: - >>> import numpy as np - >>> normalized = np.array([-1.0, -0.5, 0.0, 0.5, 1.0]) + >>> import torch + >>> normalized = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0]) >>> original = unnormalize(normalized, 5.0, 1.0) - >>> np.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0]) + >>> torch.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0]) True """ field_range = max_val - min_val @@ -175,8 +172,10 @@ def unnormalize( def standardize( - field: ArrayType, mean: ArrayType | None = None, std: ArrayType | None = None -) -> ArrayType: + field: torch.Tensor, + mean: torch.Tensor | None = None, + std: torch.Tensor | None = None, +) -> torch.Tensor: """Standardize field values to have zero mean and unit variance. Applies z-score normalization to center the data around zero with @@ -195,31 +194,30 @@ def standardize( ZeroDivisionError: If std contains zeros. Examples: - >>> import numpy as np - >>> field = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - >>> standardized = standardize(field, 3.0, np.sqrt(2.5)) - >>> np.allclose(standardized, [-1.265, -0.632, 0.0, 0.632, 1.265], atol=1e-3) + >>> import torch + >>> field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> standardized = standardize(field, 3.0, torch.sqrt(2.5)) + >>> torch.allclose(standardized, [-1.265, -0.632, 0.0, 0.632, 1.265], atol=1e-3) True >>> # Auto-compute mean/std >>> standardized_auto = standardize(field) - >>> np.allclose(np.mean(standardized_auto), 0.0) + >>> torch.allclose(torch.mean(standardized_auto), 0.0) True - >>> np.allclose(np.std(standardized_auto, ddof=0), 1.0) + >>> torch.allclose(torch.std(standardized_auto, ddof=0), 1.0) True """ - xp = array_type(field) if mean is None: - mean = xp.mean(field, axis=0, keepdims=True) + mean = field.mean(axis=0, keepdim=True) if std is None: - std = xp.std(field, axis=0, keepdims=True) + std = field.std(axis=0, keepdim=True) return (field - mean) / std def unstandardize( - standardized_field: ArrayType, mean: ArrayType, std: ArrayType -) -> ArrayType: + standardized_field: torch.Tensor, mean: torch.Tensor, std: torch.Tensor +) -> torch.Tensor: """Reverse the standardization process to recover original field values. Transforms standardized values (zero mean, unit variance) back to their @@ -234,365 +232,15 @@ def unstandardize( Field values restored to their original distribution. Examples: - >>> import numpy as np - >>> standardized = np.array([-1.265, -0.632, 0.0, 0.632, 1.265]) - >>> original = unstandardize(standardized, 3.0, np.sqrt(2.5)) - >>> np.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0], atol=1e-3) + >>> import torch + >>> standardized = torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265]) + >>> original = unstandardize(standardized, 3.0, torch.sqrt(2.5)) + >>> torch.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0], atol=1e-3) True """ return standardized_field * std + mean -def write_to_vtp(polydata: "vtk.vtkPolyData", filename: str) -> None: - """Write VTK polydata to a VTP (VTK PolyData) file format. - - VTP files are XML-based and store polygonal data including points, polygons, - and associated field data. This format is commonly used for surface meshes - in computational fluid dynamics visualization. - - Args: - polydata: VTK polydata object containing mesh geometry and fields. - filename: Output filename with .vtp extension. Directory will be created - if it doesn't exist. - - Raises: - RuntimeError: If writing fails due to file permissions or disk space. - - """ - # Ensure output directory exists - output_path = Path(filename) - output_path.parent.mkdir(parents=True, exist_ok=True) - - writer = vtk.vtkXMLPolyDataWriter() - writer.SetFileName(str(output_path)) - writer.SetInputData(polydata) - - if not writer.Write(): - raise RuntimeError(f"Failed to write polydata to {output_path}") - - -def write_to_vtu(unstructured_grid: "vtk.vtkUnstructuredGrid", filename: str) -> None: - """Write VTK unstructured grid to a VTU (VTK Unstructured Grid) file format. - - VTU files store 3D volumetric meshes with arbitrary cell types including - tetrahedra, hexahedra, and pyramids. This format is essential for storing - finite element analysis results. - - Args: - unstructured_grid: VTK unstructured grid object containing volumetric mesh - geometry and field data. - filename: Output filename with .vtu extension. Directory will be created - if it doesn't exist. - - Raises: - RuntimeError: If writing fails due to file permissions or disk space. - - """ - # Ensure output directory exists - output_path = Path(filename) - output_path.parent.mkdir(parents=True, exist_ok=True) - - writer = vtk.vtkXMLUnstructuredGridWriter() - writer.SetFileName(str(output_path)) - writer.SetInputData(unstructured_grid) - - if not writer.Write(): - raise RuntimeError(f"Failed to write unstructured grid to {output_path}") - - -def extract_surface_triangles(tetrahedral_mesh: "vtk.vtkUnstructuredGrid") -> list[int]: - """Extract surface triangle indices from a tetrahedral mesh. - - This function identifies the boundary faces of a 3D tetrahedral mesh and - returns the vertex indices that form triangular faces on the surface. - This is essential for visualization and boundary condition application. - - Args: - tetrahedral_mesh: VTK unstructured grid containing tetrahedral elements. - - Returns: - List of vertex indices forming surface triangles. Every three consecutive - indices define one triangle. - - Raises: - NotImplementedError: If the surface contains non-triangular faces. - - """ - # Extract the surface using VTK filter - surface_filter = vtk.vtkDataSetSurfaceFilter() - surface_filter.SetInputData(tetrahedral_mesh) - surface_filter.Update() - - # Wrap with PyVista for easier manipulation - import pyvista as pv - - surface_mesh = pv.wrap(surface_filter.GetOutput()) - triangle_indices = [] - - # Process faces - PyVista stores faces as [n_vertices, v1, v2, ..., vn] - faces = surface_mesh.faces.reshape((-1, 4)) - for face in faces: - if face[0] == 3: # Triangle (3 vertices) - triangle_indices.extend([face[1], face[2], face[3]]) - else: - raise NotImplementedError( - f"Non-triangular face found with {face[0]} vertices" - ) - - return triangle_indices - - -def convert_to_tet_mesh(polydata: "vtk.vtkPolyData") -> "vtk.vtkUnstructuredGrid": - """Convert surface polydata to a tetrahedral volumetric mesh. - - This function performs tetrahedralization of a surface mesh, creating - a 3D volumetric mesh suitable for finite element analysis. The process - fills the interior of the surface with tetrahedral elements. - - Args: - polydata: VTK polydata representing a closed surface mesh. - - Returns: - VTK unstructured grid containing tetrahedral elements filling the - volume enclosed by the input surface. - - Raises: - RuntimeError: If tetrahedralization fails (e.g., non-manifold surface). - - """ - tetrahedral_filter = vtkDataSetTriangleFilter() - tetrahedral_filter.SetInputData(polydata) - tetrahedral_filter.Update() - - tetrahedral_mesh = tetrahedral_filter.GetOutput() - return tetrahedral_mesh - - -def convert_point_data_to_cell_data(input_data: "vtk.vtkDataSet") -> "vtk.vtkDataSet": - """Convert point-based field data to cell-based field data. - - This function transforms field variables defined at mesh vertices (nodes) - to values defined at cell centers. This conversion is often needed when - switching between different numerical methods or visualization requirements. - - Args: - input_data: VTK dataset with point data to be converted. - - Returns: - VTK dataset with the same geometry but field data moved from points to cells. - Values are typically averaged from the surrounding points. - - """ - point_to_cell_filter = vtk.vtkPointDataToCellData() - point_to_cell_filter.SetInputData(input_data) - point_to_cell_filter.Update() - - return point_to_cell_filter.GetOutput() - - -def get_node_to_elem(polydata: "vtk.vtkDataSet") -> "vtk.vtkDataSet": - """Convert point data to cell data for VTK dataset. - - This function transforms field variables defined at mesh vertices to - values defined at cell centers using VTK's built-in conversion filter. - - Args: - polydata: VTK dataset with point data to be converted. - - Returns: - VTK dataset with field data moved from points to cells. - - """ - point_to_cell_filter = vtk.vtkPointDataToCellData() - point_to_cell_filter.SetInputData(polydata) - point_to_cell_filter.Update() - cell_data = point_to_cell_filter.GetOutput() - return cell_data - - -def get_fields_from_cell( - cell_data: "vtk.vtkCellData", variable_names: list[str] -) -> np.ndarray: - """Extract field variables from VTK cell data. - - This function extracts multiple field variables from VTK cell data and - organizes them into a structured NumPy array. Each variable becomes a - column in the output array. - - Args: - cell_data: VTK cell data object containing field variables. - variable_names: List of variable names to extract from the cell data. - - Returns: - NumPy array of shape (n_cells, n_variables) containing the extracted - field data. Variables are ordered according to the input list. - - Raises: - ValueError: If a requested variable name is not found in the cell data. - - """ - extracted_fields = [] - for variable_name in variable_names: - variable_array = cell_data.GetArray(variable_name) - if variable_array is None: - raise ValueError(f"Variable '{variable_name}' not found in cell data") - - num_tuples = variable_array.GetNumberOfTuples() - field_values = [] - for tuple_idx in range(num_tuples): - variable_value = np.array(variable_array.GetTuple(tuple_idx)) - field_values.append(variable_value) - field_values = np.asarray(field_values) - extracted_fields.append(field_values) - - # Transpose to get shape (n_cells, n_variables) - extracted_fields = np.transpose(np.asarray(extracted_fields), (1, 0)) - return extracted_fields - - -def get_fields( - data_attributes: "vtk.vtkDataSetAttributes", variable_names: list[str] -) -> list[np.ndarray]: - """Extract multiple field variables from VTK data attributes. - - This function extracts field variables from VTK data attributes (either - point data or cell data) and returns them as a list of NumPy arrays. - It handles both point and cell data seamlessly. - - Args: - data_attributes: VTK data attributes object (point data or cell data). - variable_names: List of variable names to extract. - - Returns: - List of NumPy arrays, one for each requested variable. Each array - has shape (n_points/n_cells, n_components) where n_components - depends on the variable (1 for scalars, 3 for vectors, etc.). - - Raises: - ValueError: If a requested variable is not found in the data attributes. - - """ - extracted_fields = [] - for variable_name in variable_names: - try: - vtk_array = data_attributes.GetArray(variable_name) - except ValueError as e: - raise ValueError( - f"Failed to get array '{variable_name}' from the data attributes: {e}" - ) - - # Convert VTK array to NumPy array with proper shape - numpy_array = numpy_support.vtk_to_numpy(vtk_array).reshape( - vtk_array.GetNumberOfTuples(), vtk_array.GetNumberOfComponents() - ) - extracted_fields.append(numpy_array) - - return extracted_fields - - -def get_vertices(polydata: "vtk.vtkPolyData") -> np.ndarray: - """Extract vertex coordinates from VTK polydata object. - - This function converts VTK polydata to a NumPy array containing the 3D - coordinates of all vertices in the mesh. - - Args: - polydata: VTK polydata object containing mesh geometry. - - Returns: - NumPy array of shape (n_points, 3) containing [x, y, z] coordinates - for each vertex. - - """ - vtk_points = polydata.GetPoints() - vertices = numpy_support.vtk_to_numpy(vtk_points.GetData()) - return vertices - - -def get_volume_data( - polydata: "vtk.vtkPolyData", variable_names: list[str] -) -> tuple[np.ndarray, list[np.ndarray]]: - """Extract vertices and field data from 3D volumetric mesh. - - This function extracts both geometric information (vertex coordinates) - and field data from a 3D volumetric mesh. It's commonly used for - processing finite element analysis results. - - Args: - polydata: VTK polydata representing a 3D volumetric mesh. - variable_names: List of field variable names to extract. - - Returns: - Tuple containing: - - Vertex coordinates as NumPy array of shape (n_vertices, 3) - - List of field arrays, one per variable - - """ - vertices = get_vertices(polydata) - point_data = polydata.GetPointData() - fields = get_fields(point_data, variable_names) - - return vertices, fields - - -def get_surface_data( - polydata: "vtk.vtkPolyData", variable_names: list[str] -) -> tuple[np.ndarray, list[np.ndarray], list[tuple[int, int]]]: - """Extract surface mesh data including vertices, fields, and edge connectivity. - - This function extracts comprehensive surface mesh information including - vertex coordinates, field data at vertices, and edge connectivity information. - It's commonly used for processing CFD surface results and boundary conditions. - - Args: - polydata: VTK polydata representing a surface mesh. - variable_names: List of field variable names to extract from the mesh. - - Returns: - Tuple containing: - - Vertex coordinates as NumPy array of shape (n_vertices, 3) - - List of field arrays, one per variable - - List of edge tuples representing mesh connectivity - - Raises: - ValueError: If a requested variable is not found or polygon data is missing. - - """ - points = polydata.GetPoints() - vertices = np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]) - - point_data = polydata.GetPointData() - fields = [] - for array_name in variable_names: - try: - array = point_data.GetArray(array_name) - except ValueError: - raise ValueError( - f"Failed to get array {array_name} from the unstructured grid." - ) - array_data = np.zeros( - (points.GetNumberOfPoints(), array.GetNumberOfComponents()) - ) - for j in range(points.GetNumberOfPoints()): - array.GetTuple(j, array_data[j]) - fields.append(array_data) - - polys = polydata.GetPolys() - if polys is None: - raise ValueError("Failed to get polygons from the polydata.") - polys.InitTraversal() - edges = [] - id_list = vtk.vtkIdList() - for _ in range(polys.GetNumberOfCells()): - polys.GetNextCell(id_list) - num_ids = id_list.GetNumberOfIds() - edges = [ - (id_list.GetId(j), id_list.GetId((j + 1) % num_ids)) for j in range(num_ids) - ] - - return vertices, fields, edges - - def calculate_normal_positional_encoding( coordinates_a: ArrayType, coordinates_b: ArrayType | None = None, @@ -769,21 +417,28 @@ def pad_inp(arr: ArrayType, n_points: int, pad_value: float = 0.0) -> ArrayType: return arr_padded -@profile def shuffle_array( - arr: ArrayType, + points: torch.Tensor, n_points: int, -) -> tuple[ArrayType, ArrayType]: - """Randomly sample points from array without replacement. + weights: torch.Tensor = None, +): + """ + Randomly sample points from array without replacement. This function performs random sampling from the input array, selecting n_points points without replacement. It's commonly used for creating training subsets and data augmentation in machine learning workflows. + Optionally, you can provide weights to use in the sampling. + + Note: the implementation with torch.multinomial is constrained to 2^24 points. + If the input is larger than that, it will be split and sampled from each chunk. + Args: arr: Input array to sample from, shape (n_points, ...). n_points: Number of points to sample. If greater than arr.shape[0], all points are returned. + weights: Optional weights for sampling. If None, uniform weights are used. Returns: Tuple containing: @@ -791,9 +446,9 @@ def shuffle_array( - Indices of the selected points Examples: - >>> import numpy as np - >>> np.random.seed(42) # For reproducible results - >>> data = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) + >>> import torch + >>> torch.manual_seed(42) # For reproducible results + >>> data = torch.tensor([[1, 2], [3, 4], [5, 6], [7, 8]]) >>> subset, indices = shuffle_array(data, 2) >>> subset.shape (2, 2) @@ -802,15 +457,109 @@ def shuffle_array( >>> len(np.unique(indices)) == 2 # No duplicates True """ - xp = array_type(arr) - if n_points > arr.shape[0]: - # If asking too many points, truncate the ask but still shuffle. - n_points = arr.shape[0] - idx = xp.random.choice(arr.shape[0], size=n_points, replace=False) - return arr[idx], idx + N_input_points = points.shape[0] + + if N_input_points < n_points: + return points, torch.arange(N_input_points) + + # If there are no weights, use uniform weights: + if weights is None: + weights = torch.ones(N_input_points, device=points.device) + + # Using torch multinomial for this. + # Multinomial can't work with more than 2^24 input points. + + # So apply chunking and stich back together in that case. + # Assume each chunk gets a number proportional to it's size, + # (but make sure they add up to n_points!) + + max_chunk_size = 2**24 + + N_chunks = (N_input_points // max_chunk_size) + 1 -def shuffle_array_without_sampling(arr: ArrayType) -> tuple[ArrayType, ArrayType]: + # Divide the weights into these chunks + chunk_weights = torch.chunk(weights, N_chunks) + + # Determine how mant points to compute per chunk: + points_per_chunk = [ + round(n_points * c.shape[0] / N_input_points) for c in chunk_weights + ] + print(f"points_per_chunk: {points_per_chunk}") + + gap = n_points - sum(points_per_chunk) + print(f"gap: {gap}") + + if gap > 0: + for g in range(gap): + points_per_chunk[g] += 1 + elif gap < 0: + for g in range(gap): + points_per_chunk[g] -= 1 + + # Create a list of indexes per chunk: + idx_chunks = [ + torch.multinomial( + w, + p, + replacement=False, + ) + for w, p in zip(chunk_weights, points_per_chunk) + ] + + # Stich the chunks back together: + idx = torch.cat(idx_chunks) + + # Apply the selection: + points_selected = points[idx] + + return points_selected, idx + + +# @profile +# def shuffle_array( +# arr: ArrayType, +# n_points: int, +# ) -> tuple[ArrayType, ArrayType]: +# """Randomly sample points from array without replacement. + +# This function performs random sampling from the input array, selecting +# n_points points without replacement. It's commonly used for creating training +# subsets and data augmentation in machine learning workflows. + +# Args: +# arr: Input array to sample from, shape (n_points, ...). +# n_points: Number of points to sample. If greater than arr.shape[0], +# all points are returned. + +# Returns: +# Tuple containing: +# - Sampled array subset +# - Indices of the selected points + +# Examples: +# >>> import numpy as np +# >>> np.random.seed(42) # For reproducible results +# >>> data = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) +# >>> subset, indices = shuffle_array(data, 2) +# >>> subset.shape +# (2, 2) +# >>> indices.shape +# (2,) +# >>> len(np.unique(indices)) == 2 # No duplicates +# True +# """ +# xp = array_type(arr) +# if n_points > arr.shape[0]: +# # If asking too many points, truncate the ask but still shuffle. +# n_points = arr.shape[0] +# idx = xp.random.choice(arr.shape[0], size=n_points, replace=False) +# return arr[idx], idx + + +def shuffle_array_without_sampling( + arr: torch.Tensor, +) -> tuple[torch.Tensor, torch.Tensor]: """Shuffle array order without changing the number of elements. This function reorders all elements in the array randomly while preserving @@ -826,9 +575,9 @@ def shuffle_array_without_sampling(arr: ArrayType) -> tuple[ArrayType, ArrayType - Permutation indices used for shuffling Examples: - >>> import numpy as np - >>> np.random.seed(42) # For reproducible results - >>> data = np.array([[1], [2], [3], [4]]) + >>> import torch + >>> torch.manual_seed(42) # For reproducible results + >>> data = torch.tensor([[1], [2], [3], [4]]) >>> shuffled, indices = shuffle_array_without_sampling(data) >>> shuffled.shape (4, 1) @@ -837,9 +586,7 @@ def shuffle_array_without_sampling(arr: ArrayType) -> tuple[ArrayType, ArrayType >>> set(indices) == set(range(4)) # All original indices present True """ - xp = array_type(arr) - idx = xp.arange(arr.shape[0]) - xp.random.shuffle(idx) + idx = torch.randperm(arr.shape[0]) return arr[idx], idx @@ -1004,7 +751,6 @@ def create_grid( zv = xp.expand_dims(zv, -1) grid = xp.concatenate((xv, yv, zv), axis=-1) grid = xp.transpose(grid, (1, 0, 2, 3)) - return grid diff --git a/physicsnemo/utils/domino/vtk_file_utils.py b/physicsnemo/utils/domino/vtk_file_utils.py new file mode 100644 index 0000000000..cdde402f8c --- /dev/null +++ b/physicsnemo/utils/domino/vtk_file_utils.py @@ -0,0 +1,380 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Utilities for data processing and training with the DoMINO model architecture. + +This module provides essential utilities for computational fluid dynamics data processing, +mesh manipulation, field normalization, and geometric computations. It supports both +CPU (NumPy) and GPU (CuPy) operations with automatic fallbacks. +""" + +from pathlib import Path + +import numpy as np +import vtk +from vtk import vtkDataSetTriangleFilter +from vtk.util import numpy_support + + +def write_to_vtp(polydata: "vtk.vtkPolyData", filename: str) -> None: + """Write VTK polydata to a VTP (VTK PolyData) file format. + + VTP files are XML-based and store polygonal data including points, polygons, + and associated field data. This format is commonly used for surface meshes + in computational fluid dynamics visualization. + + Args: + polydata: VTK polydata object containing mesh geometry and fields. + filename: Output filename with .vtp extension. Directory will be created + if it doesn't exist. + + Raises: + RuntimeError: If writing fails due to file permissions or disk space. + + """ + # Ensure output directory exists + output_path = Path(filename) + output_path.parent.mkdir(parents=True, exist_ok=True) + + writer = vtk.vtkXMLPolyDataWriter() + writer.SetFileName(str(output_path)) + writer.SetInputData(polydata) + + if not writer.Write(): + raise RuntimeError(f"Failed to write polydata to {output_path}") + + +def write_to_vtu(unstructured_grid: "vtk.vtkUnstructuredGrid", filename: str) -> None: + """Write VTK unstructured grid to a VTU (VTK Unstructured Grid) file format. + + VTU files store 3D volumetric meshes with arbitrary cell types including + tetrahedra, hexahedra, and pyramids. This format is essential for storing + finite element analysis results. + + Args: + unstructured_grid: VTK unstructured grid object containing volumetric mesh + geometry and field data. + filename: Output filename with .vtu extension. Directory will be created + if it doesn't exist. + + Raises: + RuntimeError: If writing fails due to file permissions or disk space. + + """ + # Ensure output directory exists + output_path = Path(filename) + output_path.parent.mkdir(parents=True, exist_ok=True) + + writer = vtk.vtkXMLUnstructuredGridWriter() + writer.SetFileName(str(output_path)) + writer.SetInputData(unstructured_grid) + + if not writer.Write(): + raise RuntimeError(f"Failed to write unstructured grid to {output_path}") + + +def extract_surface_triangles(tetrahedral_mesh: "vtk.vtkUnstructuredGrid") -> list[int]: + """Extract surface triangle indices from a tetrahedral mesh. + + This function identifies the boundary faces of a 3D tetrahedral mesh and + returns the vertex indices that form triangular faces on the surface. + This is essential for visualization and boundary condition application. + + Args: + tetrahedral_mesh: VTK unstructured grid containing tetrahedral elements. + + Returns: + List of vertex indices forming surface triangles. Every three consecutive + indices define one triangle. + + Raises: + NotImplementedError: If the surface contains non-triangular faces. + + """ + # Extract the surface using VTK filter + surface_filter = vtk.vtkDataSetSurfaceFilter() + surface_filter.SetInputData(tetrahedral_mesh) + surface_filter.Update() + + # Wrap with PyVista for easier manipulation + import pyvista as pv + + surface_mesh = pv.wrap(surface_filter.GetOutput()) + triangle_indices = [] + + # Process faces - PyVista stores faces as [n_vertices, v1, v2, ..., vn] + faces = surface_mesh.faces.reshape((-1, 4)) + for face in faces: + if face[0] == 3: # Triangle (3 vertices) + triangle_indices.extend([face[1], face[2], face[3]]) + else: + raise NotImplementedError( + f"Non-triangular face found with {face[0]} vertices" + ) + + return triangle_indices + + +def convert_to_tet_mesh(polydata: "vtk.vtkPolyData") -> "vtk.vtkUnstructuredGrid": + """Convert surface polydata to a tetrahedral volumetric mesh. + + This function performs tetrahedralization of a surface mesh, creating + a 3D volumetric mesh suitable for finite element analysis. The process + fills the interior of the surface with tetrahedral elements. + + Args: + polydata: VTK polydata representing a closed surface mesh. + + Returns: + VTK unstructured grid containing tetrahedral elements filling the + volume enclosed by the input surface. + + Raises: + RuntimeError: If tetrahedralization fails (e.g., non-manifold surface). + + """ + tetrahedral_filter = vtkDataSetTriangleFilter() + tetrahedral_filter.SetInputData(polydata) + tetrahedral_filter.Update() + + tetrahedral_mesh = tetrahedral_filter.GetOutput() + return tetrahedral_mesh + + +def convert_point_data_to_cell_data(input_data: "vtk.vtkDataSet") -> "vtk.vtkDataSet": + """Convert point-based field data to cell-based field data. + + This function transforms field variables defined at mesh vertices (nodes) + to values defined at cell centers. This conversion is often needed when + switching between different numerical methods or visualization requirements. + + Args: + input_data: VTK dataset with point data to be converted. + + Returns: + VTK dataset with the same geometry but field data moved from points to cells. + Values are typically averaged from the surrounding points. + + """ + point_to_cell_filter = vtk.vtkPointDataToCellData() + point_to_cell_filter.SetInputData(input_data) + point_to_cell_filter.Update() + + return point_to_cell_filter.GetOutput() + + +def get_node_to_elem(polydata: "vtk.vtkDataSet") -> "vtk.vtkDataSet": + """Convert point data to cell data for VTK dataset. + + This function transforms field variables defined at mesh vertices to + values defined at cell centers using VTK's built-in conversion filter. + + Args: + polydata: VTK dataset with point data to be converted. + + Returns: + VTK dataset with field data moved from points to cells. + + """ + point_to_cell_filter = vtk.vtkPointDataToCellData() + point_to_cell_filter.SetInputData(polydata) + point_to_cell_filter.Update() + cell_data = point_to_cell_filter.GetOutput() + return cell_data + + +def get_fields_from_cell( + cell_data: "vtk.vtkCellData", variable_names: list[str] +) -> np.ndarray: + """Extract field variables from VTK cell data. + + This function extracts multiple field variables from VTK cell data and + organizes them into a structured NumPy array. Each variable becomes a + column in the output array. + + Args: + cell_data: VTK cell data object containing field variables. + variable_names: List of variable names to extract from the cell data. + + Returns: + NumPy array of shape (n_cells, n_variables) containing the extracted + field data. Variables are ordered according to the input list. + + Raises: + ValueError: If a requested variable name is not found in the cell data. + + """ + extracted_fields = [] + for variable_name in variable_names: + variable_array = cell_data.GetArray(variable_name) + if variable_array is None: + raise ValueError(f"Variable '{variable_name}' not found in cell data") + + num_tuples = variable_array.GetNumberOfTuples() + field_values = [] + for tuple_idx in range(num_tuples): + variable_value = np.array(variable_array.GetTuple(tuple_idx)) + field_values.append(variable_value) + field_values = np.asarray(field_values) + extracted_fields.append(field_values) + + # Transpose to get shape (n_cells, n_variables) + extracted_fields = np.transpose(np.asarray(extracted_fields), (1, 0)) + return extracted_fields + + +def get_fields( + data_attributes: "vtk.vtkDataSetAttributes", variable_names: list[str] +) -> list[np.ndarray]: + """Extract multiple field variables from VTK data attributes. + + This function extracts field variables from VTK data attributes (either + point data or cell data) and returns them as a list of NumPy arrays. + It handles both point and cell data seamlessly. + + Args: + data_attributes: VTK data attributes object (point data or cell data). + variable_names: List of variable names to extract. + + Returns: + List of NumPy arrays, one for each requested variable. Each array + has shape (n_points/n_cells, n_components) where n_components + depends on the variable (1 for scalars, 3 for vectors, etc.). + + Raises: + ValueError: If a requested variable is not found in the data attributes. + + """ + extracted_fields = [] + for variable_name in variable_names: + try: + vtk_array = data_attributes.GetArray(variable_name) + except ValueError as e: + raise ValueError( + f"Failed to get array '{variable_name}' from the data attributes: {e}" + ) + + # Convert VTK array to NumPy array with proper shape + numpy_array = numpy_support.vtk_to_numpy(vtk_array).reshape( + vtk_array.GetNumberOfTuples(), vtk_array.GetNumberOfComponents() + ) + extracted_fields.append(numpy_array) + + return extracted_fields + + +def get_vertices(polydata: "vtk.vtkPolyData") -> np.ndarray: + """Extract vertex coordinates from VTK polydata object. + + This function converts VTK polydata to a NumPy array containing the 3D + coordinates of all vertices in the mesh. + + Args: + polydata: VTK polydata object containing mesh geometry. + + Returns: + NumPy array of shape (n_points, 3) containing [x, y, z] coordinates + for each vertex. + + """ + vtk_points = polydata.GetPoints() + vertices = numpy_support.vtk_to_numpy(vtk_points.GetData()) + return vertices + + +def get_volume_data( + polydata: "vtk.vtkPolyData", variable_names: list[str] +) -> tuple[np.ndarray, list[np.ndarray]]: + """Extract vertices and field data from 3D volumetric mesh. + + This function extracts both geometric information (vertex coordinates) + and field data from a 3D volumetric mesh. It's commonly used for + processing finite element analysis results. + + Args: + polydata: VTK polydata representing a 3D volumetric mesh. + variable_names: List of field variable names to extract. + + Returns: + Tuple containing: + - Vertex coordinates as NumPy array of shape (n_vertices, 3) + - List of field arrays, one per variable + + """ + vertices = get_vertices(polydata) + point_data = polydata.GetPointData() + fields = get_fields(point_data, variable_names) + + return vertices, fields + + +def get_surface_data( + polydata: "vtk.vtkPolyData", variable_names: list[str] +) -> tuple[np.ndarray, list[np.ndarray], list[tuple[int, int]]]: + """Extract surface mesh data including vertices, fields, and edge connectivity. + + This function extracts comprehensive surface mesh information including + vertex coordinates, field data at vertices, and edge connectivity information. + It's commonly used for processing CFD surface results and boundary conditions. + + Args: + polydata: VTK polydata representing a surface mesh. + variable_names: List of field variable names to extract from the mesh. + + Returns: + Tuple containing: + - Vertex coordinates as NumPy array of shape (n_vertices, 3) + - List of field arrays, one per variable + - List of edge tuples representing mesh connectivity + + Raises: + ValueError: If a requested variable is not found or polygon data is missing. + + """ + points = polydata.GetPoints() + vertices = np.array([points.GetPoint(i) for i in range(points.GetNumberOfPoints())]) + + point_data = polydata.GetPointData() + fields = [] + for array_name in variable_names: + try: + array = point_data.GetArray(array_name) + except ValueError: + raise ValueError( + f"Failed to get array {array_name} from the unstructured grid." + ) + array_data = np.zeros( + (points.GetNumberOfPoints(), array.GetNumberOfComponents()) + ) + for j in range(points.GetNumberOfPoints()): + array.GetTuple(j, array_data[j]) + fields.append(array_data) + + polys = polydata.GetPolys() + if polys is None: + raise ValueError("Failed to get polygons from the polydata.") + polys.InitTraversal() + edges = [] + id_list = vtk.vtkIdList() + for _ in range(polys.GetNumberOfCells()): + polys.GetNextCell(id_list) + num_ids = id_list.GetNumberOfIds() + edges = [ + (id_list.GetId(j), id_list.GetId((j + 1) % num_ids)) for j in range(num_ids) + ] + + return vertices, fields, edges From 8590afd3de9117268e56fcc1928a193fddf2b2ef Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 25 Aug 2025 16:18:27 +0000 Subject: [PATCH 04/98] Add new dataset to read DrivaerML like data in various formats. Separate the dataloading from the data processing in DoMINO datapipe. --- .../domino/src/benchmark_dataloader.py | 250 ++++ physicsnemo/datapipes/cae/domino_datapipe2.py | 1174 +++++++++++++++++ .../datapipes/cae/drivaer_ml_datapipe.py | 888 +++++++++++++ physicsnemo/utils/domino/utils.py | 51 +- 4 files changed, 2337 insertions(+), 26 deletions(-) create mode 100644 examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py create mode 100644 physicsnemo/datapipes/cae/domino_datapipe2.py create mode 100644 physicsnemo/datapipes/cae/drivaer_ml_datapipe.py diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py new file mode 100644 index 0000000000..95b39cedd3 --- /dev/null +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -0,0 +1,250 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code defines a distributed pipeline for training the DoMINO model on +CFD datasets. It includes the computation of scaling factors, instantiating +the DoMINO model and datapipe, automatically loading the most recent checkpoint, +training the model in parallel using DistributedDataParallel across multiple +GPUs, calculating the loss and updating model parameters using mixed precision. +This is a common recipe that enables training of combined models for surface and +volume as well either of them separately. Validation is also conducted every epoch, +where predictions are compared against ground truth values. The code logs training +and validation metrics to TensorBoard. The train tab in config.yaml can be used to +specify batch size, number of epochs and other training parameters. +""" + +import time +import os +import re +import torch +import torchinfo + +from typing import Literal, Any + +import apex +import numpy as np +import hydra +from hydra.utils import to_absolute_path +from omegaconf import DictConfig, OmegaConf +import torch.distributed as dist +from torch.cuda.amp import GradScaler, autocast +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from torch.utils.tensorboard import SummaryWriter +from nvtx import annotate as nvtx_annotate +import torch.cuda.nvtx as nvtx + + +from physicsnemo.distributed import DistributedManager +from physicsnemo.launch.utils import load_checkpoint, save_checkpoint +from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper + +from physicsnemo.datapipes.cae.domino_datapipe import ( + DoMINODataPipe, + compute_scaling_factors, + create_domino_dataset, +) +from physicsnemo.models.domino.model import DoMINO +from physicsnemo.utils.domino.utils import * + +# This is included for GPU memory tracking: +from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo +import time + +# Initialize NVML +nvmlInit() + + +from physicsnemo.utils.profiling import profile, Profiler + + +@profile +def train_epoch( + dataloader, + sampler, + logger, + gpu_handle, + epoch_index, + device, +): + dist = DistributedManager() + + indices = list(iter(sampler)) + print(f"indices: {indices}") + # If you tell the dataloader the indices in advance, it will preload + # and pre-preprocess data + dataloader.set_indices(indices) + + gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) + start_time = time.perf_counter() + for i_batch, sample_batched in enumerate(dataloader): + # sampled_batched = dict_to_device(sample_batched, device) + + # for key in sampled_batched.keys(): + # print(f"{key}: {sampled_batched[key].shape}") + + # Gather data and report + elapsed_time = time.perf_counter() - start_time + start_time = time.perf_counter() + gpu_end_info = nvmlDeviceGetMemoryInfo(gpu_handle) + gpu_memory_used = gpu_end_info.used / (1024**3) + gpu_memory_delta = (gpu_end_info.used - gpu_start_info.used) / (1024**3) + + logging_string = f"Device {device}, batch processed: {i_batch + 1}\n" + logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" + logging_string += f" GPU memory delta: {gpu_memory_delta:.3f} Gb\n" + logging_string += f" Time taken: {elapsed_time:.2f} seconds\n" + logger.info(logging_string) + gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) + + return + + +def get_or_compute_scaling_factors( + cfg: DictConfig, +) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Get or compute scaling factors for volume and surface fields normalization. + + This function either loads pre-computed scaling factors from disk or computes them + if they don't exist. The scaling factors are used for normalizing volume and surface + fields data based on the specified normalization method in the config. + + Args: + cfg (DictConfig): Configuration object containing: + - project.name: Project name for saving/loading scaling factors + - model.normalization: Type of normalization ("min_max_scaling" or "mean_std_scaling") + - data.input_dir: Input directory path + - data_processor.use_cache: Whether to use cached data + + Returns: + tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: A tuple containing: + - vol_factors: Scaling factors for volume fields (max/min or mean/std) + - surf_factors: Scaling factors for surface fields (max/min or mean/std) + Each factor is a numpy array containing the respective scaling values. + + Raises: + ValueError: If an invalid normalization type is specified in the config. + """ + # Compute or load the scaling factors: + vol_save_path = os.path.join( + "outputs", cfg.project.name, "volume_scaling_factors.npy" + ) + surf_save_path = os.path.join( + "outputs", cfg.project.name, "surface_scaling_factors.npy" + ) + + if not os.path.exists(vol_save_path) or not os.path.exists(surf_save_path): + # Save the scaling factors if needed: + mean, std, min_val, max_val = compute_scaling_factors( + cfg=cfg, + input_path=cfg.data.input_dir, + use_cache=cfg.data_processor.use_cache, + ) + + v_mean = mean["volume_fields"].cpu().numpy() + v_std = std["volume_fields"].cpu().numpy() + v_min = min_val["volume_fields"].cpu().numpy() + v_max = max_val["volume_fields"].cpu().numpy() + + s_mean = mean["surface_fields"].cpu().numpy() + s_std = std["surface_fields"].cpu().numpy() + s_min = min_val["surface_fields"].cpu().numpy() + s_max = max_val["surface_fields"].cpu().numpy() + + np.save(vol_save_path, [v_mean, v_std, v_min, v_max]) + np.save(surf_save_path, [s_mean, s_std, s_min, s_max]) + else: + v_mean, v_std, v_min, v_max = np.load(vol_save_path) + s_mean, s_std, s_min, s_max = np.load(surf_save_path) + + if cfg.model.normalization == "min_max_scaling": + vol_factors = [v_max, v_min] + elif cfg.model.normalization == "mean_std_scaling": + vol_factors = [v_mean, v_std] + else: + raise ValueError(f"Invalid normalization type: {cfg.model.normalization}") + + if cfg.model.normalization == "min_max_scaling": + surf_factors = [s_max, s_min] + elif cfg.model.normalization == "mean_std_scaling": + surf_factors = [s_mean, s_std] + else: + raise ValueError(f"Invalid normalization type: {cfg.model.normalization}") + + return vol_factors, surf_factors + + +@hydra.main(version_base="1.3", config_path="conf", config_name="config") +def main(cfg: DictConfig) -> None: + # initialize distributed manager + DistributedManager.initialize() + dist = DistributedManager() + + # Initialize NVML + nvmlInit() + + gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index) + + model_type = cfg.model.model_type + + logger = PythonLogger("Train") + logger = RankZeroLoggingWrapper(logger, dist) + + logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") + + vol_factors, surf_factors = get_or_compute_scaling_factors(cfg) + + train_dataset = create_domino_dataset( + cfg, + phase="train", + volume_variable_names="volume_fields", + surface_variable_names="surface_fields", + vol_factors=vol_factors, + surf_factors=surf_factors, + ) + train_sampler = DistributedSampler( + train_dataset, num_replicas=dist.world_size, rank=dist.rank + ) + + # train_dataloader = DataLoader( + # train_dataset, + # sampler=train_sampler, + # **cfg.train.dataloader, + # ) + + for epoch in range(0, cfg.train.epochs): + start_time = time.perf_counter() + logger.info(f"Device {dist.device}, epoch {epoch}:") + + epoch_start_time = time.perf_counter() + train_epoch( + dataloader=train_dataset, + sampler=train_sampler, + logger=logger, + gpu_handle=gpu_handle, + epoch_index=epoch, + device=dist.device, + ) + epoch_end_time = time.perf_counter() + logger.info( + f"Device {dist.device}, Epoch {epoch} took {epoch_end_time - epoch_start_time:.3f} seconds" + ) + + +if __name__ == "__main__": + main() diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py new file mode 100644 index 0000000000..310493e3cb --- /dev/null +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -0,0 +1,1174 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code provides the datapipe for reading the processed npy files, +generating multi-res grids, calculating signed distance fields, +positional encodings, sampling random points in the volume and on surface, +normalizing fields and returning the output tensors as a dictionary. + +This datapipe also non-dimensionalizes the fields, so the order in which the variables should +be fixed: velocity, pressure, turbulent viscosity for volume variables and +pressure, wall-shear-stress for surface variables. The different parameters such as +variable names, domain resolution, sampling size etc. are configurable in config.yaml. +""" + +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass +from pathlib import Path +from typing import Literal, Optional, Protocol, Sequence, Union + +import numpy as np +import torch +import torch.cuda.nvtx as nvtx +from omegaconf import DictConfig +from torch import Tensor +from torch.utils.data import Dataset + +from physicsnemo.datapipes.cae.drivaer_ml_datapipe import ( + DrivaerMLDataset, + compute_mean_std_min_max, +) +from physicsnemo.distributed import DistributedManager +from physicsnemo.utils.domino.utils import ( + ArrayType, + area_weighted_shuffle_array, + calculate_center_of_mass, + calculate_normal_positional_encoding, + create_grid, + get_filenames, + normalize, + pad, + shuffle_array, + standardize, +) +from physicsnemo.utils.neighbors import knn +from physicsnemo.utils.profiling import profile +from physicsnemo.utils.sdf import signed_distance_field + + +class BoundingBox(Protocol): + """ + Type definition for the required format of bounding box dimensions. + """ + + min: ArrayType + max: ArrayType + + +@dataclass +class DoMINODataConfig: + """Configuration for DoMINO dataset processing pipeline. + + Attributes: + data_path: Path to the dataset to load. + phase: Which phase of data to load ("train", "val", or "test"). + surface_variables: (Surface specific) Names of surface variables. + surface_points_sample: (Surface specific) Number of surface points to sample per batch. + num_surface_neighbors: (Surface specific) Number of surface neighbors to consider for nearest neighbors approach. + resample_surfaces: (Surface specific) Whether to resample the surface before kdtree/knn. Not available if caching. + resampling_points: (Surface specific) Number of points to resample the surface to. + surface_sampling_algorithm: (Surface specific) Algorithm to use for surface sampling ("area_weighted" or "random"). + surface_factors: (Surface specific) Non-dimensionalization factors for surface variables. + If set, and scaling_type is: + - min_max_scaling -> rescale surface_fields to the min/max set here + - mean_std_scaling -> rescale surface_fields to the mean and std set here. + bounding_box_dims_surf: (Surface specific) Dimensions of bounding box. Must be an object with min/max + attributes that are arraylike. + volume_variables: (Volume specific) Names of volume variables. + volume_points_sample: (Volume specific) Number of volume points to sample per batch. + volume_factors: (Volume specific) Non-dimensionalization factors for volume variables scaling. + If set, and scaling_type is: + - min_max_scaling -> rescale volume_fields to the min/max set here + - mean_std_scaling -> rescale volume_fields to the mean and std set here. + bounding_box_dims: (Volume specific) Dimensions of bounding box. Must be an object with min/max + attributes that are arraylike. + grid_resolution: Resolution of the latent grid. + normalize_coordinates: Whether to normalize coordinates based on min/max values. + For surfaces: uses s_min/s_max, defined from: + - Surface bounding box, if defined. + - Min/max of the stl_vertices + For volumes: uses c_min/c_max, defined from: + - Volume bounding_box if defined, + - 1.5x s_min/max otherwise, except c_min[2] = s_min[2] in this case + sample_in_bbox: Whether to sample points in a specified bounding box. + Uses the same min/max points as coordinate normalization. + Only performed if compute_scaling_factors is false. + sampling: Whether to downsample the full resolution mesh to fit in GPU memory. + Surface and volume sampling points are configured separately as: + - surface.points_sample + - volume.points_sample + geom_points_sample: Number of STL points sampled per batch. + Independent of volume.points_sample and surface.points_sample. + positional_encoding: Whether to use positional encoding. Affects the calculation of: + - pos_volume_closest + - pos_volume_center_of_mass + - pos_surface_centter_of_mass + scaling_type: Scaling type for volume variables. + If used, will rescale the volume_fields and surface fields outputs. + Requires volume.factor and surface.factor to be set. + compute_scaling_factors: Whether to compute scaling factors. + Not available if caching. + Many preprocessing pieces are disabled if computing scaling factors. + caching: Whether this is for caching or serving. + deterministic: Whether to use a deterministic seed for sampling and random numbers. + gpu_preprocessing: Whether to do preprocessing on the GPU (False for CPU). + gpu_output: Whether to return output on the GPU as cupy arrays. + If False, returns numpy arrays. + You might choose gpu_preprocessing=True and gpu_output=False if caching. + """ + + data_path: Path + phase: Literal["train", "val", "test"] + + # Surface-specific variables: + surface_variables: Optional[Sequence] = ("pMean", "wallShearStress") + surface_points_sample: int = 1024 + num_surface_neighbors: int = 11 + resample_surfaces: bool = False + resampling_points: int = 1_000_000 + surface_sampling_algorithm: str = Literal["area_weighted", "random"] + surface_factors: Optional[Sequence] = None + bounding_box_dims_surf: Optional[Union[BoundingBox, Sequence]] = None + + # Volume specific variables: + volume_variables: Optional[Sequence] = ("UMean", "pMean") + volume_points_sample: int = 1024 + volume_factors: Optional[Sequence] = None + bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None + + grid_resolution: Union[Sequence, ArrayType] = (256, 96, 64) + normalize_coordinates: bool = False + sample_in_bbox: bool = False + sampling: bool = False + geom_points_sample: int = 300000 + positional_encoding: bool = False + scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None + compute_scaling_factors: bool = False + caching: bool = False + deterministic: bool = False + gpu_preprocessing: bool = True + gpu_output: bool = True + + def __post_init__(self): + # Ensure data_path is a Path object: + if isinstance(self.data_path, str): + self.data_path = Path(self.data_path) + self.data_path = self.data_path.expanduser() + + if not self.data_path.exists(): + raise ValueError(f"Path {self.data_path} does not exist") + + if not self.data_path.is_dir(): + raise ValueError(f"Path {self.data_path} is not a directory") + + # Object if caching settings are impossible: + if self.caching: + if self.sampling: + raise ValueError("Sampling should be False for caching") + if self.compute_scaling_factors: + raise ValueError("Compute scaling factors should be False for caching") + if self.resample_surfaces: + raise ValueError("Resample surface should be False for caching") + + if self.phase not in [ + "train", + "val", + "test", + ]: + raise ValueError( + f"phase should be one of ['train', 'val', 'test'], got {self.phase}" + ) + if self.scaling_type is not None: + if self.scaling_type not in [ + "min_max_scaling", + "mean_std_scaling", + ]: + raise ValueError( + f"scaling_type should be one of ['min_max_scaling', 'mean_std_scaling'], got {self.scaling_type}" + ) + + +##### TODO +# - check the bounding box protocol works + + +class DoMINODataPipe(Dataset): + """ + Datapipe for DoMINO + + Leverages a dataset for the actual reading of the data, and this + object is responsible for preprocessing the data. + + """ + + def __init__( + self, + input_path, + model_type: Literal["surface", "volume", "combined"], + **data_config_overrides, + ): + # Perform config packaging and validation + self.config = DoMINODataConfig(data_path=input_path, **data_config_overrides) + + # Set up the distributed manager: + if not DistributedManager.is_initialized(): + DistributedManager.initialize() + + dist = DistributedManager() + if self.config.gpu_preprocessing or self.config.gpu_output: + # Make sure we move data to the right device: + target_device = dist.device + else: + target_device = torch.device("cpu") + + self.device = target_device + + self.model_type = model_type + + # Update the arrays for bounding boxes: + if hasattr(self.config.bounding_box_dims, "max") and hasattr( + self.config.bounding_box_dims, "min" + ): + self.config.bounding_box_dims = [ + torch.tensor( + self.config.bounding_box_dims.max, + device=self.device, + dtype=torch.float32, + ), + torch.tensor( + self.config.bounding_box_dims.min, + device=self.device, + dtype=torch.float32, + ), + ] + self.volume_grid = create_grid( + self.config.bounding_box_dims[0], + self.config.bounding_box_dims[1], + self.config.grid_resolution, + ) + + if hasattr(self.config.bounding_box_dims_surf, "max") and hasattr( + self.config.bounding_box_dims_surf, "min" + ): + self.config.bounding_box_dims_surf = [ + torch.tensor( + self.config.bounding_box_dims_surf.max, + device=self.device, + dtype=torch.float32, + ), + torch.tensor( + self.config.bounding_box_dims_surf.min, + device=self.device, + dtype=torch.float32, + ), + ] + + self.surf_grid = create_grid( + self.config.bounding_box_dims_surf[0], + self.config.bounding_box_dims_surf[1], + self.config.grid_resolution, + ) + + # Ensure the volume and surface scaling factors are torch tensors + # and on the right device: + if self.config.volume_factors is not None: + self.config.volume_factors = torch.tensor( + self.config.volume_factors, device=self.device, dtype=torch.float32 + ) + if self.config.surface_factors is not None: + self.config.surface_factors = torch.tensor( + self.config.surface_factors, device=self.device, dtype=torch.float32 + ) + + # Always read these keys: + self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"] + + self.keys_to_read_if_available = { + "global_params_values": torch.tensor([30.0, 1.226], device=self.device), + "global_params_reference": torch.tensor([30.0, 1.226], device=self.device), + } + + self.volume_keys = ["volume_mesh_centers", "volume_fields"] + self.surface_keys = [ + "surface_mesh_centers", + "surface_normals", + "surface_areas", + "surface_fields", + ] + + if self.model_type == "volume" or self.model_type == "combined": + self.keys_to_read.extend(self.volume_keys) + if self.model_type == "surface" or self.model_type == "combined": + self.keys_to_read.extend(self.surface_keys) + + self.dataset = DrivaerMLDataset( + data_dir=self.config.data_path, + keys_to_read=self.keys_to_read, + output_device=self.device, + ) + + # This is thread storage for data preprocessing: + self._preprocess_queue = {} + self._preprocess_events = {} + self.preprocess_depth = 2 + self.preprocess_executor = ThreadPoolExecutor(max_workers=2) + + def set_indices(self, indices: list[int]): + """ + Set the indices for the dataset for this epoch. + """ + self.indices = indices + + def __len__(self): + return len(self.dataset) + + def compute_stl_scaling( + self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None + ): + """ + Compute the min and max for the defining mesh. + + """ + + s_min = torch.amin(stl_vertices, 0) + s_max = torch.amax(stl_vertices, 0) + + length_scale = torch.amax(s_max - s_min) + + # if dynamic_bbox_scaling: + # Check the bounding box is not unit length + + if bounding_box_dims_surf is not None: + s_max = bounding_box_dims_surf[0] + s_min = bounding_box_dims_surf[1] + surf_grid = self.surf_grid + else: + # Create the grid: + surf_grid = create_grid(s_max, s_min, self.grid_resolution) + + surf_grid_max_min = torch.stack([s_min, s_max]) + + return s_min, s_max, length_scale, surf_grid_max_min, surf_grid + + @profile + def process_combined( + self, + s_min, + s_max, + surf_grid, + stl_vertices, + mesh_indices_flattened, + ): + # SDF calculation on the grid using WARP + nx, ny, nz = self.config.grid_resolution + + sdf_surf_grid, _ = signed_distance_field( + stl_vertices, + mesh_indices_flattened, + surf_grid, + use_sign_winding_number=True, + ) + + if self.config.sampling: + geometry_points = self.config.geom_points_sample + geometry_coordinates_sampled, idx_geometry = shuffle_array( + stl_vertices, geometry_points + ) + if geometry_coordinates_sampled.shape[0] < geometry_points: + geometry_coordinates_sampled = pad( + geometry_coordinates_sampled, geometry_points, pad_value=-100.0 + ) + geom_centers = geometry_coordinates_sampled + else: + geom_centers = stl_vertices + + return (sdf_surf_grid, geom_centers) + + @profile + def process_surface( + self, + s_min: torch.Tensor, + s_max: torch.Tensor, + center_of_mass: torch.Tensor, + surf_grid: torch.Tensor, + surface_coordinates: torch.Tensor, + surface_normals: torch.Tensor, + surface_sizes: torch.Tensor, + surface_fields: torch.Tensor, + ) -> dict[str, torch.Tensor]: + nx, ny, nz = self.config.grid_resolution + + return_dict = {} + + # Remove any sizes <= 0: + idx = surface_sizes > 0 + surface_sizes = surface_sizes[idx] + surface_fields = surface_fields[idx] + surface_normals = surface_normals[idx] + surface_coordinates = surface_coordinates[idx] + + if self.config.resample_surfaces: + if self.config.resampling_points > surface_coordinates.shape[0]: + resampling_points = surface_coordinates.shape[0] + else: + resampling_points = self.config.resampling_points + + surface_coordinates, idx_s = shuffle_array( + surface_coordinates, resampling_points + ) + surface_normals = surface_normals[idx_s] + surface_sizes = surface_sizes[idx_s] + surface_fields = surface_fields[idx_s] + + c_max = self.config.bounding_box_dims[0] + c_min = self.config.bounding_box_dims[1] + + if self.config.sample_in_bbox: + ids_min = surface_coordinates[:] > c_min + ids_max = surface_coordinates[:] < c_max + + ids_in_bbox = ids_min & ids_max + ids_in_bbox = ids_in_bbox.all(dim=-1) + + surface_coordinates = surface_coordinates[ids_in_bbox] + surface_normals = surface_normals[ids_in_bbox] + surface_sizes = surface_sizes[ids_in_bbox] + surface_fields = surface_fields[ids_in_bbox] + + # Compute the positional encoding before sampling + if self.config.positional_encoding: + dx, dy, dz = ( + (s_max[0] - s_min[0]) / nx, + (s_max[1] - s_min[1]) / ny, + (s_max[2] - s_min[2]) / nz, + ) + pos_normals_com_surface = calculate_normal_positional_encoding( + surface_coordinates, center_of_mass, cell_length=[dx, dy, dz] + ) + else: + pos_normals_com_surface = surface_coordinates - center_of_mass + + if self.config.sampling: + # Perform the down sampling: + + if self.config.surface_sampling_algorithm == "area_weighted": + weights = surface_sizes + # ( + # surface_coordinates_sampled, + # idx_surface, + # ) = area_weighted_shuffle_array( + # surface_coordinates, + # self.config.surface_points_sample, + # surface_sizes, + # ) + else: + weights = None + # surface_coordinates_sampled, idx_surface = shuffle_array( + # surface_coordinates, self.config.surface_points_sample + # ) + + surface_coordinates_sampled, idx_surface = shuffle_array( + surface_coordinates, + self.config.surface_points_sample, + weights=weights, + ) + + if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample: + surface_coordinates_sampled = pad( + surface_coordinates_sampled, + self.config.surface_points_sample, + pad_value=-10.0, + ) + + # Select out the sampled points for non-neighbor arrays: + surface_fields = surface_fields[idx_surface] + pos_normals_com_surface = pos_normals_com_surface[idx_surface] + + # Perform a kNN on the full set of points vs. sampled points + # to select the neighbors: + # if self.config.num_surface_neighbors > 1: + # if self.array_provider == cp: + # knn = cuml.neighbors.NearestNeighbors( + # n_neighbors=self.config.num_surface_neighbors, + # algorithm="rbc", + # ) + # knn.fit(surface_coordinates) + # else: + # # Under the hood this is instantiating a KDTree. + # # aka here knn is a type, not a class, technically. + # interp_func = KDTree(surface_coordinates) + + # Now, perform the kNN on the sampled points: + if self.config.num_surface_neighbors > 1: + neighbor_indices, neighbor_distances = knn( + points=surface_coordinates, + queries=surface_coordinates_sampled, + k=self.config.num_surface_neighbors, + ) + + # Pull out the neighbor elements. Note that ii is the index into the original + # points - but only exists for the sampled points + # In other words, a point from `surface_coordinates_sampled` has neighbors + # from the full `surface_coordinates` array. + surface_neighbors = surface_coordinates[neighbor_indices][:, 1:] + surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:] + surface_neighbors_sizes = surface_sizes[neighbor_indices][:, 1:] + else: + surface_neighbors = surface_coordinates + surface_neighbors_normals = surface_normals + surface_neighbors_sizes = surface_sizes + + # Subsample the normals and sizes: + surface_normals = surface_normals[idx_surface] + surface_sizes = surface_sizes[idx_surface] + + # Update the coordinates to the sampled points: + surface_coordinates = surface_coordinates_sampled + + else: + neighbor_indices, _ = knn( + points=surface_coordinates, + queries=surface_coordinates, + k=self.config.num_surface_neighbors, + ) + + # Construct the neighbors arrays: + surface_neighbors = surface_coordinates[neighbor_indices][:, 1:] + surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:] + surface_neighbors_sizes = surface_sizes[neighbor_indices][:, 1:] + + # Have to normalize neighbors after the kNN and sampling + if self.config.normalize_coordinates: + surf_grid = normalize(surf_grid, s_max, s_min) + surface_coordinates = normalize(surface_coordinates, s_max, s_min) + surface_neighbors = normalize(surface_neighbors, s_max, s_min) + + if self.config.scaling_type is not None: + if self.config.surface_factors is not None: + if self.config.scaling_type == "mean_std_scaling": + surf_mean = self.config.surface_factors[0] + surf_std = self.config.surface_factors[1] + # TODO - Are these array calls needed? + surface_fields = standardize(surface_fields, surf_mean, surf_std) + elif self.config.scaling_type == "min_max_scaling": + surf_min = self.config.surface_factors[1] + surf_max = self.config.surface_factors[0] + # TODO - Are these array calls needed? + surface_fields = normalize(surface_fields, surf_max, surf_min) + + return_dict.update( + { + "pos_surface_center_of_mass": pos_normals_com_surface, + "surface_mesh_centers": surface_coordinates, + "surface_mesh_neighbors": surface_neighbors, + "surface_normals": surface_normals, + "surface_neighbors_normals": surface_neighbors_normals, + "surface_areas": surface_sizes, + "surface_neighbors_areas": surface_neighbors_sizes, + "surface_fields": surface_fields, + } + ) + + return return_dict + + @profile + def process_volume( + self, + s_min: torch.Tensor, + s_max: torch.Tensor, + volume_coordinates: torch.Tensor, + volume_fields: torch.Tensor, + stl_vertices: torch.Tensor, + mesh_indices_flattened: torch.Tensor, + center_of_mass: torch.Tensor, + ) -> dict[str, torch.Tensor]: + return_dict = {} + + nx, ny, nz = self.config.grid_resolution + + # Determine the volume min / max locations + if self.config.bounding_box_dims is None: + c_max = s_max + (s_max - s_min) / 2 + c_min = s_min - (s_max - s_min) / 2 + c_min[2] = s_min[2] + else: + c_max = self.config.bounding_box_dims[0] + c_min = self.config.bounding_box_dims[1] + + if self.config.sample_in_bbox: + # Remove points in the volume that are outside + # of the bbox area. + min_check = volume_coordinates[:] > c_min + max_check = volume_coordinates[:] < c_max + + ids_in_bbox = min_check & max_check + ids_in_bbox = ids_in_bbox.all(dim=1) + + volume_coordinates = volume_coordinates[ids_in_bbox] + volume_fields = volume_fields[ids_in_bbox] + + dx, dy, dz = ( + (c_max[0] - c_min[0]) / nx, + (c_max[1] - c_min[1]) / ny, + (c_max[2] - c_min[2]) / nz, + ) + + # TODO - we need to make sure if the bbox is dynamic, + # the bounds on the grid are correct + + # # Generate a grid of specified resolution to map the bounding box + # # The grid is used for capturing structured geometry features and SDF representation of geometry + # grid = create_grid(c_max, c_min, [nx, ny, nz]) + # grid_reshaped = grid.reshape(nx * ny * nz, 3) + + # SDF calculation on the volume grid using WARP + sdf_grid, _ = signed_distance_field( + stl_vertices, + mesh_indices_flattened, + self.volume_grid, + use_sign_winding_number=True, + ) + + if self.config.sampling: + # Generate a series of idx to sample the volume + # without replacement + + volume_coordinates_sampled, idx_volume = shuffle_array( + volume_coordinates, self.config.volume_points_sample + ) + volume_coordinates_sampled = volume_coordinates[idx_volume] + + if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample: + padding_size = ( + self.config.volume_points_sample + - volume_coordinates_sampled.shape[0] + ) + volume_coordinates_sampled = torch.nn.functional.pad( + volume_coordinates_sampled, + (0, 0, 0, 0, 0, padding_size), + mode="constant", + value=-10.0, + ) + # volume_coordinates_sampled = pad( + # volume_coordinates_sampled, + # self.config.volume_points_sample, + # pad_value=-10.0, + # ) + volume_fields = volume_fields[idx_volume] + volume_coordinates = volume_coordinates_sampled + + # Get the SDF of all the selected volume coordinates, + # And keep the closest point to each one. + sdf_nodes, sdf_node_closest_point = signed_distance_field( + stl_vertices, + mesh_indices_flattened, + volume_coordinates, + use_sign_winding_number=True, + ) + + if self.config.positional_encoding: + pos_normals_closest_vol = calculate_normal_positional_encoding( + volume_coordinates, + sdf_node_closest_point, + cell_length=[dx, dy, dz], + ) + pos_normals_com_vol = calculate_normal_positional_encoding( + volume_coordinates, center_of_mass, cell_length=[dx, dy, dz] + ) + else: + pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point + pos_normals_com_vol = volume_coordinates - center_of_mass + + if self.config.normalize_coordinates: + volume_coordinates = normalize(volume_coordinates, c_max, c_min) + grid = normalize(self.volume_grid, c_max, c_min) + + if self.config.scaling_type is not None: + if self.config.volume_factors is not None: + if self.config.scaling_type == "mean_std_scaling": + vol_mean = self.config.volume_factors[0] + vol_std = self.config.volume_factors[1] + volume_fields = standardize(volume_fields, vol_mean, vol_std) + elif self.config.scaling_type == "min_max_scaling": + vol_min = self.config.volume_factors[1] + vol_max = self.config.volume_factors[0] + volume_fields = normalize(volume_fields, vol_max, vol_min) + + vol_grid_max_min = torch.stack([c_min, c_max]) + + return_dict.update( + { + "pos_volume_closest": pos_normals_closest_vol, + "pos_volume_center_of_mass": pos_normals_com_vol, + "grid": grid, + "sdf_grid": sdf_grid, + "sdf_nodes": sdf_nodes, + "volume_fields": volume_fields, + "volume_mesh_centers": volume_coordinates, + "volume_min_max": vol_grid_max_min, + } + ) + + return return_dict + + @profile + def process_data(self, data_dict): + # Start building the preprocessed return dict: + return_dict = { + "global_params_values": data_dict["global_params_values"], + "global_params_reference": data_dict["global_params_reference"], + } + + # This function gets information about the surface scale, + # and decides what the surface grid will be: + (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = ( + self.compute_stl_scaling( + data_dict["stl_coordinates"], self.config.bounding_box_dims_surf + ) + ) + + # This is a center of mass computation for the stl surface, + # using the size of each mesh point as weight. + + center_of_mass = calculate_center_of_mass( + data_dict["stl_centers"], data_dict["stl_areas"] + ) + + # For SDF calculations, make sure the mesh_indices_flattened is an integer array: + mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) + + return_dict.update( + { + "length_scale": length_scale, + "surf_grid_max_min": surf_grid_max_min, + } + ) + + # This will compute the sdf on the surface grid and apply downsampling if needed + sdf_surf_grid, geom_centers = self.preprocess_combined( + s_min, + s_max, + surf_grid, + stl_vertices=data_dict["stl_coordinates"], + mesh_indices_flattened=mesh_indices_flattened, + ) + return_dict["sdf_surf_grid"] = sdf_surf_grid + return_dict["geometry_coordinates"] = geom_centers + + # Up to here works all in torch! + + if self.model_type == "volume" or self.model_type == "combined": + volume_dict = self.preprocess_volume( + s_min, + s_max, + volume_coordinates=data_dict["volume_mesh_centers"], + volume_fields=data_dict["volume_fields"], + stl_vertices=data_dict["stl_coordinates"], + mesh_indices_flattened=mesh_indices_flattened, + center_of_mass=center_of_mass, + ) + + return_dict.update(volume_dict) + + if self.model_type == "surface" or self.model_type == "combined": + surface_dict = self.preprocess_surface( + s_min, + s_max, + center_of_mass, + surf_grid, + surface_coordinates=data_dict["surface_mesh_centers"], + surface_normals=data_dict["surface_normals"], + surface_sizes=data_dict["surface_areas"], + surface_fields=data_dict["surface_fields"], + ) + return_dict.update(surface_dict) + + return return_dict + + @profile + def __getitem__(self, idx): + """ + Function for fetching and processing a single file's data. + + Domino, in general, expects one example per file and the files + are relatively large due to the mesh size. + """ + + if self.config.deterministic: + torch.manual_seed(idx) + + if hasattr(self, "indices"): + index = self.indices[idx] + else: + index = idx + + data_dict = self.dataset[index] + + for key in self.keys_to_read_if_available.keys(): + if key not in data_dict: + data_dict[key] = self.keys_to_read_if_available[key] + + return_dict = self.process_data(data_dict) + + return return_dict + + # def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]: + # """ + # Get a data sample. + + # Flow is: + # - Read data, or get preloaded data if this idx is preloaded. + # - Move data to GPU, if needed. + # - Preloading data will move to GPU if it can. + # - If domain parallelism is enabled, convert to ShardTensors. + # - Return + + # Args: + # idx: Index of the sample to retrieve + + # Returns: + # Dictionary containing tensors/ShardTensors for the requested data + # """ + + # if idx >= len(self._filenames): + # raise IndexError( + # f"Index {idx} out of range for dataset of size {len(self._filenames)}" + # ) + + # # Attempt to get preloaded data: + # data = self.get_preloaded(idx) + # if data is None: + # # Read data from zarr file + # data = self._read_file(self._filenames[idx]) + # data = self._move_to_gpu(data, idx) + + # # This blocks until the preprocessing has transferred to GPU + # if idx in self._transfer_events: + # torch.cuda.current_stream().wait_event(self._transfer_events[idx]) + # self._transfer_events.pop(idx) + + # # Convert to ShardTensors if using domain parallelism + # if self.device_mesh is not None: + # data = self._convert_to_shard_tensors(data) + + # return data + + # def __iter__(self): + # self.i = 0 + # return self + + # def __next__(self): + # """ + # When used in an iterator context, this datapipe will + # leverage preloading and preprocessing to speed up the data + # loading latency. + + # Each time "next" is called, the datapipe will ask the data + # set to preload the data 2 steps ahead. It will then ask + # for the data from one step ahead, and start it processing. + + # Finally, it will return the data from this requested index + # """ + # if self.i >= len(self._filenames): + # self.i = 0 + # raise StopIteration + + # if self.preload_depth > 0 and self.i + 1 < len(self._filenames): + # self.preload(this_index) + # if self.preload_depth > 1 and self.i + 2 < len(self._filenames): + # self.preload(this_index) + + # data = self.__getitem__(this_index) + + # self.i += 1 + + # return data + + # def preprocess(self, idx: int) -> None: + # """ + # Asynchronously preload the data for the given index (up to CPU, not GPU). + # Only one preload operation is supported at a time. + + # Args: + # idx: Index of the sample to preload. + # """ + # if idx in self._preload_queue: + # # Skip items that are already in the queue + # return + + # def _preload_worker(): + # try: + # data = self._read_file(self._filenames[idx]) + # # Convert to torch tensors + # return self._move_to_gpu(data, idx) + # except Exception as e: + # print(f"Exception in preload: {e}") + # raise e + + # self._preload_queue[idx] = self.preload_executor.submit(_preload_worker) + + # def get_preloaded(self, idx: int) -> dict[str, torch.Tensor] | None: + # """ + # Retrieve the preloaded data (blocking if not ready). + + # Returns: + # (idx, data) tuple where data is a dictionary of key to numpy array or torch tensor. + + # Raises: + # RuntimeError: If no preload is in progress. + # Exception: If preload failed. + # """ + + # if idx not in self._preload_queue: + # return None + + # result = self._preload_queue[idx].result() # This will block until the result is ready + # self._preload_queue.pop(idx) # Clear the future after getting the result + + # return result + + +@profile +def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None: + # Create a dataset for just the field keys: + + dataset = DrivaerMLDataset( + data_dir=input_path, + keys_to_read=["volume_fields", "surface_fields"], + output_device=torch.device("cuda"), # TODO - configure this more carefully here + ) + + mean, std, min_val, max_val = compute_mean_std_min_max( + dataset, + field_keys=["volume_fields", "surface_fields"], + ) + + return mean, std, min_val, max_val + + +class CachedDoMINODataset(Dataset): + """ + Dataset for reading cached DoMINO data files, with optional resampling. + Acts as a drop-in replacement for DoMINODataPipe. + """ + + # @nvtx_annotate(message="CachedDoMINODataset __init__") + def __init__( + self, + data_path: Union[str, Path], + phase: Literal["train", "val", "test"] = "train", + sampling: bool = False, + volume_points_sample: Optional[int] = None, + surface_points_sample: Optional[int] = None, + geom_points_sample: Optional[int] = None, + model_type=None, # Model_type, surface, volume or combined + deterministic_seed=False, + surface_sampling_algorithm="area_weighted", + ): + super().__init__() + + self.model_type = model_type + if deterministic_seed: + np.random.seed(42) + + if isinstance(data_path, str): + data_path = Path(data_path) + self.data_path = data_path.expanduser() + + if not self.data_path.exists(): + raise AssertionError(f"Path {self.data_path} does not exist") + if not self.data_path.is_dir(): + raise AssertionError(f"Path {self.data_path} is not a directory") + + self.deterministic_seed = deterministic_seed + self.sampling = sampling + self.volume_points = volume_points_sample + self.surface_points = surface_points_sample + self.geom_points = geom_points_sample + self.surface_sampling_algorithm = surface_sampling_algorithm + + self.filenames = get_filenames(self.data_path, exclude_dirs=True) + + total_files = len(self.filenames) + + self.phase = phase + self.indices = np.array(range(total_files)) + + np.random.shuffle(self.indices) + + if not self.filenames: + raise AssertionError(f"No cached files found in {self.data_path}") + + def __len__(self): + return len(self.indices) + + # @nvtx_annotate(message="CachedDoMINODataset __getitem__") + def __getitem__(self, idx): + if self.deterministic_seed: + np.random.seed(idx) + nvtx.range_push("Load cached file") + + index = self.indices[idx] + cfd_filename = self.filenames[index] + + filepath = self.data_path / cfd_filename + result = np.load(filepath, allow_pickle=True).item() + result = { + k: v.numpy() if isinstance(v, Tensor) else v for k, v in result.items() + } + + nvtx.range_pop() + if not self.sampling: + return result + + nvtx.range_push("Sample points") + + # Sample volume points if present + if "volume_mesh_centers" in result and self.volume_points: + coords_sampled, idx_volume = shuffle_array( + result["volume_mesh_centers"], self.volume_points + ) + if coords_sampled.shape[0] < self.volume_points: + coords_sampled = pad( + coords_sampled, self.volume_points, pad_value=-10.0 + ) + + result["volume_mesh_centers"] = coords_sampled + for key in [ + "volume_fields", + "pos_volume_closest", + "pos_volume_center_of_mass", + "sdf_nodes", + ]: + if key in result: + result[key] = result[key][idx_volume] + + # Sample surface points if present + if "surface_mesh_centers" in result and self.surface_points: + if self.surface_sampling_algorithm == "area_weighted": + coords_sampled, idx_surface = area_weighted_shuffle_array( + result["surface_mesh_centers"], + self.surface_points, + result["surface_areas"], + ) + else: + coords_sampled, idx_surface = shuffle_array( + result["surface_mesh_centers"], self.surface_points + ) + + if coords_sampled.shape[0] < self.surface_points: + coords_sampled = pad( + coords_sampled, self.surface_points, pad_value=-10.0 + ) + + ii = result["neighbor_indices"] + result["surface_mesh_neighbors"] = result["surface_mesh_centers"][ii] + result["surface_neighbors_normals"] = result["surface_normals"][ii] + result["surface_neighbors_areas"] = result["surface_areas"][ii] + + result["surface_mesh_centers"] = coords_sampled + + for key in [ + "surface_fields", + "surface_areas", + "surface_normals", + "pos_surface_center_of_mass", + "surface_mesh_neighbors", + "surface_neighbors_normals", + "surface_neighbors_areas", + ]: + if key in result: + result[key] = result[key][idx_surface] + + del result["neighbor_indices"] + + # Sample geometry points if present + if "geometry_coordinates" in result and self.geom_points: + coords_sampled, _ = shuffle_array( + result["geometry_coordinates"], self.geom_points + ) + if coords_sampled.shape[0] < self.geom_points: + coords_sampled = pad(coords_sampled, self.geom_points, pad_value=-100.0) + result["geometry_coordinates"] = coords_sampled + + nvtx.range_pop() + return result + + +def create_domino_dataset( + cfg, phase, volume_variable_names, surface_variable_names, vol_factors, surf_factors +): + if phase == "train": + input_path = cfg.data.input_dir + elif phase == "val": + input_path = cfg.data.input_dir_val + else: + raise ValueError(f"Invalid phase {phase}") + + if cfg.data_processor.use_cache: + return CachedDoMINODataset( + input_path, + phase=phase, + sampling=True, + volume_points_sample=cfg.model.volume_points_sample, + surface_points_sample=cfg.model.surface_points_sample, + geom_points_sample=cfg.model.geom_points_sample, + model_type=cfg.model.model_type, + surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, + ) + else: + overrides = {} + if hasattr(cfg.data, "gpu_preprocessing"): + overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing + + if hasattr(cfg.data, "gpu_output"): + overrides["gpu_output"] = cfg.data.gpu_output + + return DoMINODataPipe( + input_path, + phase=phase, + grid_resolution=cfg.model.interp_res, + volume_variables=volume_variable_names, + surface_variables=surface_variable_names, + normalize_coordinates=True, + sampling=True, + sample_in_bbox=True, + volume_points_sample=cfg.model.volume_points_sample, + surface_points_sample=cfg.model.surface_points_sample, + geom_points_sample=cfg.model.geom_points_sample, + positional_encoding=cfg.model.positional_encoding, + volume_factors=vol_factors, + surface_factors=surf_factors, + scaling_type=cfg.model.normalization, + model_type=cfg.model.model_type, + bounding_box_dims=cfg.data.bounding_box, + bounding_box_dims_surf=cfg.data.bounding_box_surface, + num_surface_neighbors=cfg.model.num_neighbors_surface, + resample_surfaces=cfg.model.resampling_surface_mesh.resample, + resampling_points=cfg.model.resampling_surface_mesh.points, + surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, + **overrides, + ) + + +if __name__ == "__main__": + fm_data = DoMINODataPipe( + data_path="/code/processed_data/new_models_1/", + phase="train", + sampling=False, + sample_in_bbox=False, + ) diff --git a/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py b/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py new file mode 100644 index 0000000000..84eea51ea5 --- /dev/null +++ b/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py @@ -0,0 +1,888 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import multiprocessing as mp +import os +import pathlib +import sys +import time +from abc import ABC, abstractmethod +from concurrent.futures import ThreadPoolExecutor +from multiprocessing import shared_memory + +import numpy as np +import psutil +import tensorstore as ts +import torch +import zarr + +from physicsnemo.distributed import ShardTensor, ShardTensorSpec + +# from physicsnemo.distributed.utils import compute_split_shapes + +# For use on systems where cpu_affinity is not available: +psutil_process = psutil.Process() + + +class FakeProcess: + """ + Enable a fake cpu affinity setting if it's not available + """ + + def cpu_affinity(self, cpus: list[int] | None) -> None: + pass + + +if not hasattr(psutil_process, "cpu_affinity"): + psutil_process = FakeProcess() + +# Abstractions: +# - want to read npy/npz/.zarr/.stl/.vtp files +# - Need to share next level abstractions +# - Domain parallel dataloading is supported: output will be ShardTensor instead. +# - need to be able to configure preprocessing +# - CPU -> GPU transfer happens here, needs to be isolated in it's own stream +# - Output of dataloader should be torch.Tensor objects. + + +""" +This datapipe handles reading files from Zarr and piping into torch.Tensor objects. + +It's expected that the files are organized as groups, with each .zarr +file representing one training example. To improve IO performance, the files +should be chunked for each array. The reader takes a list of keys in the +group to read, and will not read keys that are not specified. The exception +is if _no_ keys are passed, in which case _all_ keys will be read. +""" + + +class BackendReader(ABC): + """ + Abstract base class for backend readers. + """ + + def __init__(self, keys_to_read: list[str] | None) -> None: + """ + Initialize the backend reader. + """ + self.keys_to_read = keys_to_read + + @abstractmethod + def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: + """ + Read a file and return a dictionary of tensors. + """ + pass + + @abstractmethod + def read_file_sharded( + self, filename: pathlib.Path, parallel_rank: int, parallel_size: int + ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + """ + Read a file and return a dictionary of tensors. + """ + pass + + +class NpyFileReader(BackendReader): + """ + Reader for numpy files. + """ + + def __init__(self, keys_to_read: list[str] | None) -> None: + super().__init__(keys_to_read) + + def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: + """ + Read a file and return a dictionary of tensors. + """ + data = np.load(filename, allow_pickle=True).item() + + missing_keys = set(self.keys_to_read) - set(data.keys()) + + if len(missing_keys) > 0: + raise ValueError(f"Keys {missing_keys} not found in file {filename}") + + data = {key: torch.from_numpy(data[key]) for key in self.keys_to_read} + + return data + + def read_file_sharded( + self, filename: pathlib.Path, parallel_rank: int, parallel_size: int + ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + pass + + +class ZarrFileReader(BackendReader): + """ + Reader for zarr files. + """ + + def __init__(self, keys_to_read: list[str] | None) -> None: + super().__init__(keys_to_read) + + def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: + """ + Read a file and return a dictionary of tensors. + """ + group = zarr.open_group(filename, mode="r") + + missing_keys = set(self.keys_to_read) - set(group.keys()) + + if len(missing_keys) > 0: + raise ValueError(f"Keys {missing_keys} not found in file {filename}") + + # This is a slower basic way to do this, to be improved: + data = {key: torch.from_numpy(group[key][:]) for key in self.keys_to_read} + + return data + + def read_file_sharded( + self, filename: pathlib.Path, parallel_rank: int, parallel_size: int + ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + """ + Read a file and return a dictionary of tensors. + """ + pass + + +class TensorStoreZarrReader(BackendReader): + """ + Reader for tensorstore zarr files. + """ + + def __init__(self, keys_to_read: list[str] | None) -> None: + super().__init__(keys_to_read) + + self.spec_template = { + "driver": "zarr2", + "kvstore": { + "driver": "file", + "path": None, + }, + } + + self.context = ts.Context( + { + "cache_pool": {"total_bytes_limit": 10000000}, + "data_copy_concurrency": {"limit": 32}, + } + ) + + def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: + """ + Read a file and return a dictionary of tensors. + """ + read_futures = {} + for key in self.keys_to_read: + spec = self.spec_template.copy() + spec["kvstore"]["path"] = str(filename) + "/" + str(key) + + read_futures[key] = ts.open( + spec, create=False, open=True, context=self.context + ) + + results = { + key: np.array(read_futures[key].result()) for key in self.keys_to_read + } + + data = { + key: torch.as_tensor(results[key], dtype=torch.float32) + for key in self.keys_to_read + } + + return data + + def read_file_sharded( + self, filename: pathlib.Path, parallel_rank: int, parallel_size: int + ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + """ + Read a file and return a dictionary of tensors. + """ + pass + + +class ZarrReadWorker: + """ + This class is a worker for the ZarrReadController. + It reads tasks from the task queue and writes to the shared memory buffer. + It then sends an acknowledgement to the controller. + """ + + def __init__(self, task_q: mp.Queue, ack_q: mp.Queue): + """ + + task_q is the incoming Queue of chunks to read + ack_q is the outgoing acknowledgement of reads + """ + + self.task_q = task_q + self.ack_q = ack_q + + self.current_group = None + self.current_array = None + + self.zarr_cache = {} + + def run(self): + """ + This function is the main loop for the worker. + It reads tasks from the task queue and writes to the shared memory buffer. + It then sends an acknowledgement to the controller. + """ + + while True: + # Run until killed + + task = self.task_q.get() + + if task is None: + break + + # Task organization: + # ( + # zarr_path - file name we're reading, a group + # array_name, - array in that group + # read_idx - a unique integer representing the read we're about to do + # index slice_to_read - the np.slice object representing what in the original file to read + # shared_buffer_name - the unique name of the shared buffer this read will use + # shared_buffer_loc - the slice of the shared buffer to store into + # ) + + ( + zarr_path, + array_name, + read_idx, + slice_to_read, + shared_buffer_name, + shared_buffer_slice, + ) = task + + if zarr_path not in self.zarr_cache: + self.zarr_cache[zarr_path] = zarr.open_group(zarr_path) + + z = self.zarr_cache[zarr_path] + + arr = z[array_name] + + # Get the shared memory instance: + shm = shared_memory.SharedMemory(name=shared_buffer_name) + buf = np.ndarray(arr.shape, arr.dtype, buffer=shm.buf) + + # Perform the local read (and implicit decompress): + buf[shared_buffer_slice] = arr[slice_to_read] + + shm.close() + + # Send completion signal: + self.ack_q.put( + ( + "done", + read_idx, + ) + ) + + +def spawn_worker(task_q: mp.Queue, ack_q: mp.Queue): + worker = ZarrReadWorker(task_q, ack_q) + worker.run() + + +class ZarrReadController(BackendReader): + """ + This class maintains a persistent pool of processes to enable shared + memory reading of zarr groups. Users can control how many processes + to use, and which pool of CPUs they reside on. + + By default reading is done by passing chunks to each worker to read. + Reads are round-robin across children processes. Master process will + not return until all children reads have acknowledged. + """ + + def __init__( + self, + keys_to_read: list[str] | None, + num_read_processes: int | None = None, + ): + super().__init__(keys_to_read) + + self.available_cpus = psutil.Process().cpu_affinity() + if num_read_processes is None: + # Use all but one CPU, unless there is only one... + num_read_processes = max(1, len(self.available_cpus) - 1) + + self.num_read_processes = num_read_processes + print(f"num_read_processes: {num_read_processes}") + # If the target_cpus aren't set, we use some default settings: + + # Initialize Queues: + self.task_q = mp.Queue() + self.ack_q = mp.Queue() + + self.children = [] + + self.memory_buffers = {} + + self.spawn_children() + + def spawn_children( + self, + ): + if mp.get_start_method() != "fork" and not hasattr(sys, "frozen"): + # Prevent accidental spawn in child imports + if not hasattr(self, "_spawn_guard"): + self._spawn_guard = True + else: + return + + # Create processes, using psutil to set affinity at spawn time. + + stride = len(self.available_cpus) // self.num_read_processes + cpus_by_proc = [ + self.available_cpus[i * stride : (i + 1) * stride] + for i in range(self.num_read_processes) + ] + + # split the available cpus into num_read_processes chunks + + for i, cpus in enumerate(cpus_by_proc): + psutil_process = psutil.Process() + psutil_process.cpu_affinity(cpus) + proc = mp.Process(target=spawn_worker, args=(self.task_q, self.ack_q)) + psutil_process.cpu_affinity(self.available_cpus) + self.children.append(proc) + + for worker in self.children: + worker.start() + + def free_shared_memory(self, zarr_file): + # Free all the shared memory buffers that were opened for the specified file + if zarr_file in self.memory_buffers: + for buffer in self.memory_buffers[zarr_file]: + buffer.close() + buffer.unlink() + + self.memory_buffers.pop(zarr_file) + + def read_file(self, zarr_file: str): + print(f"zarr_file: {zarr_file}") + file_id = os.path.basename(zarr_file) + + # Open the file: + z = zarr.open_group(zarr_file) + + output_arrays = {} + + if zarr_file in self.memory_buffers: + self.free_shared_memory(zarr_file) + + self.memory_buffers[zarr_file] = [] + + required_idx = [] + + for key in self.keys_to_read: + # Get the metadata for this key: + arr = z[key] + + # Allocate the entire buffer: + buffer_size = np.prod(arr.shape) * np.dtype(arr.dtype).itemsize + + shm = shared_memory.SharedMemory( + create=True, + size=buffer_size, + ) + np_buffer = np.ndarray(arr.shape, dtype=arr.dtype, buffer=shm.buf) + + # Make sure we don't unlink it prematurely: + self.memory_buffers[zarr_file].append(shm) + + output_arrays[key] = np_buffer + + zarr_chunk_size = arr.chunks[0] + + # Define the read boundaries for slicing: + slice_starts = list(range(0, arr.shape[0], zarr_chunk_size)) + slice_stops = [start + zarr_chunk_size for start in slice_starts] + + # Correct the last stop point: + slice_stops[-1] = arr.shape[0] + + # Task organization: + # ( + # zarr_path - file name we're reading, a group + # array_name, - array in that group + # read_idx - a unique integer representing the read we're about to do + # index slice_to_read - the np.slice object representing what in the original file to read + # shared_buffer_name - the unique name of the shared buffer this read will use + # shared_buffer_loc - the slice of the shared buffer to store into + # ) + + for i, (slice_start, slice_stop) in enumerate( + zip(slice_starts, slice_stops) + ): + cpu_slice = np.s_[slice_start:slice_stop] + zarr_slice = np.s_[slice_start:slice_stop] + + length = slice_stop - slice_start + + read_idx = f"{file_id}_{key}_{i}_{length}" + required_idx.append(read_idx) + + task_args = ( + zarr_file, + key, + read_idx, + zarr_slice, + shm.name, + cpu_slice, + ) + self.task_q.put(task_args) + + # Now, let's check for completeness before returning: + completed = False + while not completed: + status, idx = self.ack_q.get() + if status == "done": + if idx not in required_idx: + # Put it back in the queue, it's for another file: + self.ack_q.put((status, idx)) + else: + required_idx.remove(idx) + completed = len(required_idx) == 0 + + return {key: torch.as_tensor(output_arrays[key]) for key in self.keys_to_read} + + def read_file_sharded( + self, filename: pathlib.Path, parallel_rank: int, parallel_size: int + ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + """ + Read a file and return a dictionary of tensors. + """ + pass + + def __del__(self): + """ + Make sure we're not leaving things open we shouldn't be + """ + + # Stop all the workers by sending None on the queue: + for child in self.children: + self.task_q.put(None) + + for child in self.children: + child.join() + + open_files = list(self.memory_buffers.keys()) + for zf in open_files: + self.free_shared_memory(zf) + + +class DrivaerMLDataset: + """ + Dataset reader for DrivaerML and similar datasets. In general, this + dataset supports reading dictionary-like data, and returning a + dictionary of torch.Tensor objects. + + When constructed, the user must pass a directory of data examples. + The dataset will inspect the folder, identify all children, and decide: + - If every file is a directory ending in .zarr, the zarr reader is used. + - If every file is .npy, the .npy reader is used. + - If every file is .npz, the .npz reader is used. + - If every file is a directory without an extension, it's assumed to be .stl/.vtp/.vtu + + The user can optionally force one path with a parameter. + + The flow of this dataset is: + - Load data from file, using a thread. + - Each individual file reading tool may or may not have it's own threading + or multi processing enabled. That's up to it. This just does async + loading. + - Data should come out of the readers in dict{str : torch.Tensor} format + - The data is transferred from CPU to GPU in a separate stream. + + Users can call __getitem__(i), which will trigger the pipeline, + or they can call `preload(i)`, which will start the pipeline for index `i`. + Subsequent calls to `__getitem__(i)` should be faster since the IO is in + progress or complete. + + Using the `__iter__` functionality will automatically enable preloading. + + """ + + def __init__( + self, + data_dir: str | pathlib.Path, + keys_to_read: list[str] | None, + output_device: torch.device, + preload_depth: int = 2, + device_mesh: torch.distributed.DeviceMesh | None = None, + placements: dict[str, torch.distributed.tensor.Placement] | None = None, + ) -> None: + if isinstance(data_dir, str): + data_dir = pathlib.Path(data_dir) + + # Verify the data directory exists: + if not data_dir.exists(): + raise FileNotFoundError(f"Data directory {data_dir} does not exist") + + # Verify the data directory is a directory: + if not data_dir.is_dir(): + raise NotADirectoryError(f"Data directory {data_dir} is not a directory") + + self._file_type, self._filenames = self._infer_file_type_and_filenames(data_dir) + + # Initialize the file reader object + # Note that for some of these, they could be functions + # But others benefit from having a state, so we use classes: + if self._file_type == "npy": + self.file_reader = NpyFileReader(keys_to_read) + elif self._file_type == "zarr": + # self.file_reader = ZarrFileReader(keys_to_read) + # self.file_reader = ZarrReadController(keys_to_read) + self.file_reader = TensorStoreZarrReader(keys_to_read) + else: + raise ValueError(f"Unsupported file type: {self._file_type}") + + self._keys_to_read = keys_to_read + + # Check the file names; some can be read well in parallel, while others + # are not parallelizable. + + self._length = len(self._filenames) + + self.output_device = output_device + if output_device.type == "cuda": + self._data_loader_stream = torch.cuda.Stream() + else: + self._data_loader_stream = None + + self.device_mesh = device_mesh + self.placements = placements + + # This is thread storage for data preloading: + self._preload_queue = {} + self._transfer_events = {} + self.preload_depth = preload_depth + self.preload_executor = ThreadPoolExecutor(max_workers=preload_depth) + + def _infer_file_type_and_filenames( + self, data_dir: pathlib.Path + ) -> tuple[str, list[str]]: + """ + Infer the file type and filenames from the data directory. + """ + + # We validated the directory exists and is a directory already. + + # List the files: + files = list(data_dir.iterdir()) + + if all(file.suffix == ".npy" for file in files): + return "npy", files + elif all(file.suffix == ".zarr" and file.is_dir() for file in files): + return "zarr", files + else: + # TODO - support folders of stl, vtp, vtu. + raise ValueError(f"Unsupported file type: {files}") + + def _move_to_gpu( + self, data: dict[str, torch.Tensor], idx: int + ) -> dict[str, torch.Tensor]: + """Convert numpy arrays to torch tensors and move to GPU if available. + + Args: + data: Dictionary of key to torch tensor. + + Returns: + Dictionary of key to torch tensor on GPU if available. + """ + + if self.output_device.type != "cuda": + return data + + result = {} + + with torch.cuda.stream(self._data_loader_stream): + for key in data.keys(): + # Move to GPU if available + result[key] = data[key].to(self.output_device, non_blocking=True) + + self._transfer_events[idx] = torch.cuda.Event() + self._transfer_events[idx].record(self._data_loader_stream) + + return result + + def _convert_to_shard_tensors( + self, tensors: dict[str, torch.Tensor] + ) -> dict[str, ShardTensor]: + """Convert tensors to ShardTensor objects for distributed training. + + Args: + tensors: Dictionary of key to torch tensor. + + Returns: + Dictionary of key to torch tensor or ShardTensor. + """ + + if self.device_mesh is None: + return tensors + + raise NotImplementedError("Converting to ShardTensor here not implemented yet.") + + # result = {} + + # for key, tensor in tensors.items(): + # # Create a ShardTensor with whatever layout the data is actually in: + # st = ShardTensor.__new__( + # ShardTensor, + # local_tensor=tensor, + # spec=self.tensor_specs[key], + # requires_grad=False, # By default, the data pipe output doesn't need a grad. + # ) + + # # Find out the desired placement: + # if tensor.numel() > 1: + # if isinstance(self.placements, dict): + # target_placement = self.placements[key] + # else: + # target_placement = self.placements + # else: + # target_placement = (Replicate(),) + + # # Redistribute if necessary: + # # (Recall that this is one dimensional mesh only) + # if st._spec.placements[0] != target_placement[0]: + # st = st.redistribute(placements=target_placement) + + # result[key] = st + + # return result + + def preload(self, idx: int) -> None: + """ + Asynchronously preload the data for the given index (up to CPU, not GPU). + Only one preload operation is supported at a time. + + Args: + idx: Index of the sample to preload. + """ + if idx in self._preload_queue: + # Skip items that are already in the queue + return + + def _preload_worker(): + try: + data = self._read_file(self._filenames[idx]) + # Convert to torch tensors + return self._move_to_gpu(data, idx) + except Exception as e: + print(f"Exception in preload: {e}") + raise e + + self._preload_queue[idx] = self.preload_executor.submit(_preload_worker) + + def get_preloaded(self, idx: int) -> dict[str, torch.Tensor] | None: + """ + Retrieve the preloaded data (blocking if not ready). + + Returns: + (idx, data) tuple where data is a dictionary of key to numpy array or torch tensor. + + Raises: + RuntimeError: If no preload is in progress. + Exception: If preload failed. + """ + + if idx not in self._preload_queue: + return None + + result = self._preload_queue[ + idx + ].result() # This will block until the result is ready + self._preload_queue.pop(idx) # Clear the future after getting the result + + return result + + def __iter__(self): + self.i = 0 + return self + + def __next__(self): + if self.i >= len(self._filenames): + self.i = 0 + raise StopIteration + + if self.preload_depth > 0 and self.i + 1 < len(self._filenames): + self.preload(self.i + 1) + if self.preload_depth > 1 and self.i + 2 < len(self._filenames): + self.preload(self.i + 2) + + data = self.__getitem__(self.i) + + self.i += 1 + + return data + + def __len__(self): + return len(self._filenames) + + def _read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: + """ + Read a file and return a dictionary of tensors. + """ + return self.file_reader.read_file(filename) + + def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]: + """ + Get a data sample. + + Flow is: + - Read data, or get preloaded data if this idx is preloaded. + - Move data to GPU, if needed. + - Preloading data will move to GPU if it can. + - If domain parallelism is enabled, convert to ShardTensors. + - Return + + Args: + idx: Index of the sample to retrieve + + Returns: + Dictionary containing tensors/ShardTensors for the requested data + """ + + if idx >= len(self._filenames): + raise IndexError( + f"Index {idx} out of range for dataset of size {len(self._filenames)}" + ) + + # Attempt to get preloaded data: + data = self.get_preloaded(idx) + if data is None: + # Read data from zarr file + data = self._read_file(self._filenames[idx]) + data = self._move_to_gpu(data, idx) + + # This blocks until the preprocessing has transferred to GPU + if idx in self._transfer_events: + torch.cuda.current_stream().wait_event(self._transfer_events[idx]) + self._transfer_events.pop(idx) + + # Convert to ShardTensors if using domain parallelism + if self.device_mesh is not None: + data = self._convert_to_shard_tensors(data) + + return data + + +def compute_mean_std_min_max( + dataset: DrivaerMLDataset, field_keys: list[str], max_samples: int = 20 +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Compute the mean, standard deviation, minimum, and maximum for a specified field + across all samples in a dataset. + + Uses a numerically stable online algorithm for mean and variance. + + Args: + dataset (DrivaerMLDataset): The dataset to process. + field_key (str): The key for the field to normalize. + + Returns: + tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + mean, std, min, max tensors for the field. + """ + N = {} + mean = {} + M2 = {} # Sum of squares of differences from the current mean + min_val = {} + max_val = {} + + # Read the first data item to get the shapes: + example_data = dataset[0] + + # Create placeholders for the accumulators: + for key in field_keys: + N[key] = torch.zeros(1, dtype=torch.int64, device=example_data[key].device) + mean[key] = torch.zeros( + example_data[key].shape[-1], + device=example_data[key].device, + dtype=torch.float64, + ) + M2[key] = torch.zeros( + example_data[key].shape[-1], + device=example_data[key].device, + dtype=torch.float64, + ) + min_val[key] = torch.full( + (example_data[key].shape[-1],), + float("inf"), + device=example_data[key].device, + ) + max_val[key] = torch.full( + (example_data[key].shape[-1],), + float("-inf"), + device=example_data[key].device, + ) + + global_start = time.perf_counter() + start = time.perf_counter() + for i, data in enumerate(dataset): + if i >= max_samples: + break + + for field_key in field_keys: + field_data = data[field_key] + + # Compute batch statistics + batch_mean = field_data.mean(axis=(0)) + batch_M2 = ((field_data - batch_mean) ** 2).sum(axis=(0)) + batch_n = field_data.shape[0] + + # Update min/max + batch_min = field_data.amin(dim=(0)) + batch_max = field_data.amax(dim=(0)) + min_val[field_key] = torch.minimum(min_val[field_key], batch_min) + max_val[field_key] = torch.maximum(max_val[field_key], batch_max) + + # Update running mean and M2 (Welford's algorithm) + delta = batch_mean - mean[field_key] + N[field_key] += batch_n # batch_n should also be torch.int64 + mean[field_key] = mean[field_key] + delta * (batch_n / N[field_key]) + M2[field_key] = ( + M2[field_key] + + batch_M2 + + delta**2 * (batch_n * N[field_key]) / N[field_key] + ) + + end = time.perf_counter() + iteration_time = end - start + print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds") + start = time.perf_counter() + + global_end = time.perf_counter() + global_time = global_end - global_start + + print(f"Total time: {global_time:.2f} seconds for {max_samples} samples") + + var = {} + std = {} + for field_key in field_keys: + var[field_key] = M2[field_key] / ( + N[field_key].item() - 1 + ) # Convert N to Python int for division + std[field_key] = torch.sqrt(var[field_key]) + + return mean, std, min_val, max_val diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index 15437dca9e..01ee143253 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -485,10 +485,8 @@ def shuffle_array( points_per_chunk = [ round(n_points * c.shape[0] / N_input_points) for c in chunk_weights ] - print(f"points_per_chunk: {points_per_chunk}") gap = n_points - sum(points_per_chunk) - print(f"gap: {gap}") if gap > 0: for g in range(gap): @@ -703,8 +701,8 @@ def combine_dict(old_dict: dict[Any, Any], new_dict: dict[Any, Any]) -> dict[Any def create_grid( - max_coords: ArrayType, min_coords: ArrayType, resolution: ArrayType -) -> ArrayType: + max_coords: torch.Tensor, min_coords: torch.Tensor, resolution: torch.Tensor +) -> torch.Tensor: """Create a 3D regular grid from coordinate bounds and resolution. This function generates a regular 3D grid spanning from min_coords to @@ -721,36 +719,37 @@ def create_grid( grid point. The last dimension contains [x, y, z] coordinates. Examples: - >>> import numpy as np - >>> min_bounds = np.array([0.0, 0.0, 0.0]) - >>> max_bounds = np.array([1.0, 1.0, 1.0]) - >>> grid_res = np.array([2, 2, 2]) + >>> import torch + >>> min_bounds = torch.tensor([0.0, 0.0, 0.0]) + >>> max_bounds = torch.tensor([1.0, 1.0, 1.0]) + >>> grid_res = torch.tensor([2, 2, 2]) >>> grid = create_grid(max_bounds, min_bounds, grid_res) >>> grid.shape (2, 2, 2, 3) - >>> np.allclose(grid[0, 0, 0], [0.0, 0.0, 0.0]) + >>> torch.allclose(grid[0, 0, 0], torch.tensor([0.0, 0.0, 0.0])) True - >>> np.allclose(grid[1, 1, 1], [1.0, 1.0, 1.0]) + >>> torch.allclose(grid[1, 1, 1], torch.tensor([1.0, 1.0, 1.0])) True """ - xp = array_type(max_coords) + # Linspace to make evenly spaced steps along each axis: + dd = [ + torch.linspace( + min_coords[i], + max_coords[i], + resolution[i], + dtype=max_coords.dtype, + device=max_coords.device, + ) + for i in range(3) + ] - dx = xp.linspace( - min_coords[0], max_coords[0], resolution[0], dtype=max_coords.dtype - ) - dy = xp.linspace( - min_coords[1], max_coords[1], resolution[1], dtype=max_coords.dtype - ) - dz = xp.linspace( - min_coords[2], max_coords[2], resolution[2], dtype=max_coords.dtype - ) + # Combine them with meshgrid: + xv, yv, zv = torch.meshgrid(*dd) - xv, yv, zv = xp.meshgrid(dx, dy, dz) - xv = xp.expand_dims(xv, -1) - yv = xp.expand_dims(yv, -1) - zv = xp.expand_dims(zv, -1) - grid = xp.concatenate((xv, yv, zv), axis=-1) - grid = xp.transpose(grid, (1, 0, 2, 3)) + xv = xv.unsqueeze(-1) + yv = yv.unsqueeze(-1) + zv = zv.unsqueeze(-1) + grid = torch.concatenate((xv, yv, zv), axis=-1) return grid From caf02908ad65370a1f65e2a8ffcdf86d33a378c7 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 27 Aug 2025 14:17:51 +0000 Subject: [PATCH 05/98] Adding a torch-centric domino datapipe and a separated, data-agnostic data set for IO. This is reaching IO throughputs of about 5GB/s on ORD, so getting better. --- physicsnemo/datapipes/cae/domino_datapipe2.py | 397 ++++++++-------- .../datapipes/cae/drivaer_ml_datapipe.py | 430 ++++-------------- physicsnemo/utils/sdf.py | 95 ++-- 3 files changed, 332 insertions(+), 590 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 310493e3cb..86e2f88539 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -232,8 +232,10 @@ def __init__( if self.config.gpu_preprocessing or self.config.gpu_output: # Make sure we move data to the right device: target_device = dist.device + self.preprocess_stream = torch.cuda.Stream() else: target_device = torch.device("cpu") + self.preprocess_stream = None self.device = target_device @@ -319,23 +321,28 @@ def __init__( data_dir=self.config.data_path, keys_to_read=self.keys_to_read, output_device=self.device, + consumer_stream=self.preprocess_stream, ) # This is thread storage for data preprocessing: self._preprocess_queue = {} self._preprocess_events = {} self.preprocess_depth = 2 - self.preprocess_executor = ThreadPoolExecutor(max_workers=2) + self.preprocess_executor = ThreadPoolExecutor(max_workers=1) def set_indices(self, indices: list[int]): """ Set the indices for the dataset for this epoch. """ + + # TODO - this needs to block while anything is in the preprocess queue. + self.indices = indices def __len__(self): return len(self.dataset) + @torch.compile(dynamic=True) def compute_stl_scaling( self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None ): @@ -382,6 +389,7 @@ def process_combined( surf_grid, use_sign_winding_number=True, ) + sdf_surf_grid = surf_grid if self.config.sampling: geometry_points = self.config.geom_points_sample @@ -398,7 +406,7 @@ def process_combined( return (sdf_surf_grid, geom_centers) - @profile + @torch.compile(dynamic=True) def process_surface( self, s_min: torch.Tensor, @@ -467,19 +475,9 @@ def process_surface( if self.config.surface_sampling_algorithm == "area_weighted": weights = surface_sizes - # ( - # surface_coordinates_sampled, - # idx_surface, - # ) = area_weighted_shuffle_array( - # surface_coordinates, - # self.config.surface_points_sample, - # surface_sizes, - # ) + else: weights = None - # surface_coordinates_sampled, idx_surface = shuffle_array( - # surface_coordinates, self.config.surface_points_sample - # ) surface_coordinates_sampled, idx_surface = shuffle_array( surface_coordinates, @@ -498,20 +496,6 @@ def process_surface( surface_fields = surface_fields[idx_surface] pos_normals_com_surface = pos_normals_com_surface[idx_surface] - # Perform a kNN on the full set of points vs. sampled points - # to select the neighbors: - # if self.config.num_surface_neighbors > 1: - # if self.array_provider == cp: - # knn = cuml.neighbors.NearestNeighbors( - # n_neighbors=self.config.num_surface_neighbors, - # algorithm="rbc", - # ) - # knn.fit(surface_coordinates) - # else: - # # Under the hood this is instantiating a KDTree. - # # aka here knn is a type, not a class, technically. - # interp_func = KDTree(surface_coordinates) - # Now, perform the kNN on the sampled points: if self.config.num_surface_neighbors > 1: neighbor_indices, neighbor_distances = knn( @@ -585,7 +569,7 @@ def process_surface( return return_dict - @profile + @torch.compile(dynamic=True) def process_volume( self, s_min: torch.Tensor, @@ -663,11 +647,7 @@ def process_volume( mode="constant", value=-10.0, ) - # volume_coordinates_sampled = pad( - # volume_coordinates_sampled, - # self.config.volume_points_sample, - # pad_value=-10.0, - # ) + volume_fields = volume_fields[idx_volume] volume_coordinates = volume_coordinates_sampled @@ -726,76 +706,94 @@ def process_volume( return return_dict @profile - def process_data(self, data_dict): - # Start building the preprocessed return dict: - return_dict = { - "global_params_values": data_dict["global_params_values"], - "global_params_reference": data_dict["global_params_reference"], - } + def process_data(self, data_dict, idx: int): + for key in self.keys_to_read_if_available.keys(): + if key not in data_dict: + data_dict[key] = self.keys_to_read_if_available[key] - # This function gets information about the surface scale, - # and decides what the surface grid will be: - (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = ( - self.compute_stl_scaling( - data_dict["stl_coordinates"], self.config.bounding_box_dims_surf - ) - ) + with torch.cuda.stream(self.preprocess_stream): + if self.config.deterministic: + torch.manual_seed(idx) - # This is a center of mass computation for the stl surface, - # using the size of each mesh point as weight. + # Start building the preprocessed return dict: + return_dict = { + "global_params_values": data_dict["global_params_values"], + "global_params_reference": data_dict["global_params_reference"], + } - center_of_mass = calculate_center_of_mass( - data_dict["stl_centers"], data_dict["stl_areas"] - ) + # This function gets information about the surface scale, + # and decides what the surface grid will be: + (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = ( + self.compute_stl_scaling( + data_dict["stl_coordinates"], self.config.bounding_box_dims_surf + ) + ) - # For SDF calculations, make sure the mesh_indices_flattened is an integer array: - mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) + # This is a center of mass computation for the stl surface, + # using the size of each mesh point as weight. - return_dict.update( - { - "length_scale": length_scale, - "surf_grid_max_min": surf_grid_max_min, - } - ) + center_of_mass = calculate_center_of_mass( + data_dict["stl_centers"], data_dict["stl_areas"] + ) - # This will compute the sdf on the surface grid and apply downsampling if needed - sdf_surf_grid, geom_centers = self.preprocess_combined( - s_min, - s_max, - surf_grid, - stl_vertices=data_dict["stl_coordinates"], - mesh_indices_flattened=mesh_indices_flattened, - ) - return_dict["sdf_surf_grid"] = sdf_surf_grid - return_dict["geometry_coordinates"] = geom_centers + # For SDF calculations, make sure the mesh_indices_flattened is an integer array: + mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) - # Up to here works all in torch! + return_dict.update( + { + "length_scale": length_scale, + "surf_grid_max_min": surf_grid_max_min, + } + ) - if self.model_type == "volume" or self.model_type == "combined": - volume_dict = self.preprocess_volume( + # This will compute the sdf on the surface grid and apply downsampling if needed + sdf_surf_grid, geom_centers = self.process_combined( s_min, s_max, - volume_coordinates=data_dict["volume_mesh_centers"], - volume_fields=data_dict["volume_fields"], + surf_grid, stl_vertices=data_dict["stl_coordinates"], mesh_indices_flattened=mesh_indices_flattened, - center_of_mass=center_of_mass, ) + return_dict["sdf_surf_grid"] = sdf_surf_grid + return_dict["geometry_coordinates"] = geom_centers + + # Up to here works all in torch! + + if self.model_type == "volume" or self.model_type == "combined": + volume_dict = self.process_volume( + s_min, + s_max, + volume_coordinates=data_dict["volume_mesh_centers"], + volume_fields=data_dict["volume_fields"], + stl_vertices=data_dict["stl_coordinates"], + mesh_indices_flattened=mesh_indices_flattened, + center_of_mass=center_of_mass, + ) - return_dict.update(volume_dict) + return_dict.update(volume_dict) + + if self.model_type == "surface" or self.model_type == "combined": + surface_dict = self.process_surface( + s_min, + s_max, + center_of_mass, + surf_grid, + surface_coordinates=data_dict["surface_mesh_centers"], + surface_normals=data_dict["surface_normals"], + surface_sizes=data_dict["surface_areas"], + surface_fields=data_dict["surface_fields"], + ) + return_dict.update(surface_dict) - if self.model_type == "surface" or self.model_type == "combined": - surface_dict = self.preprocess_surface( - s_min, - s_max, - center_of_mass, - surf_grid, - surface_coordinates=data_dict["surface_mesh_centers"], - surface_normals=data_dict["surface_normals"], - surface_sizes=data_dict["surface_areas"], - surface_fields=data_dict["surface_fields"], - ) - return_dict.update(surface_dict) + if self.device.type == "cuda": + self._preprocess_events[idx] = torch.cuda.Event() + self._preprocess_events[idx].record(self.preprocess_stream) + + # Mark all cuda tensors to be consumed on the main stream: + if self.device.type == "cuda": + for key in return_dict.keys(): + if isinstance(return_dict[key], torch.Tensor): + return_dict[key].record_stream(torch.cuda.default_stream()) return return_dict @@ -808,138 +806,111 @@ def __getitem__(self, idx): are relatively large due to the mesh size. """ - if self.config.deterministic: - torch.manual_seed(idx) + index = self.idx_to_index(idx) + + # Get the preprocessed data: + data_dict = self.get_preprocessed(idx) + if data_dict is None: + # If no preprocessing was done for this index, process it now + + # Get the data from the dataset. + # Under the hood, this may be fetching preloaded data. + data_dict = self.dataset[index] + data_dict = self.process_data(data_dict, idx) + + # This blocks the main stream until the preprocessing has transferred to GPU + if idx in self._preprocess_events: + torch.cuda.current_stream().wait_event(self._preprocess_events[idx]) + self._preprocess_events.pop(idx) + + return data_dict + def idx_to_index(self, idx): if hasattr(self, "indices"): - index = self.indices[idx] - else: - index = idx + return self.indices[idx] - data_dict = self.dataset[index] + return idx - for key in self.keys_to_read_if_available.keys(): - if key not in data_dict: - data_dict[key] = self.keys_to_read_if_available[key] + def preprocess(self, idx: int) -> None: + """ + Start preprocessing for the given index (1 step ahead). + This processes preloaded data or loads it if not available. + """ + if idx in self._preprocess_queue: + # Skip items that are already being preprocessed + return + + def _preprocess_worker(): + index = self.idx_to_index(idx) + # Try to get preloaded data first + data_dict = self.dataset[index] + # Process the data + return self.process_data(data_dict, idx) + + # Submit preprocessing task to thread pool + self._preprocess_queue[idx] = self.preprocess_executor.submit( + _preprocess_worker + ) - return_dict = self.process_data(data_dict) + def get_preprocessed(self, idx: int) -> dict | None: + """ + Retrieve preprocessed data (blocking if not ready). + Returns None if no preprocessing is in progress for this index. + """ + if idx not in self._preprocess_queue: + return None - return return_dict + result = self._preprocess_queue[idx].result() # Block until ready + self._preprocess_queue.pop(idx) # Clear after getting result + + return result + + def __next__(self): + # To iterate through the data efficiently, he have to implement the + # following, assuming a steady state + + # - start the dataset loading at idx + 2 + # - start the preprocessing pipe at idx + 1 + # - the preprocessing pipe has to implicitly wait for idx +1 in the dataset + # - wait for the preprocessing pipe at idx to finish + # return the data. + if self.i >= len(self.dataset): + self.i = 0 + raise StopIteration + + current_idx = self.i + + # Start loading two ahead: + if len(self.dataset) >= current_idx + 2: + self.dataset.preload(self.idx_to_index(current_idx + 2)) + + # Start preprocessing one ahead: + if len(self.dataset) >= current_idx + 1: + self.preprocess(current_idx + 1) + + # If no preprocessing was done for this index, process it now + data = self.__getitem__(current_idx) + + self.i += 1 + return data + + def __iter__(self): + # When starting the iterator method, start loading the data + # at idx = 0, idx = 1 + # Start preprocessing at idx = 0, when the load completes + + self.i = 0 + + # Trigger the dataset to start loading index 0: + if len(self.dataset) >= 1: + self.dataset.preload(self.idx_to_index(self.i)) + if len(self.dataset) >= 2: + self.dataset.preload(self.idx_to_index(self.i + 1)) + + # Start preprocessing index 0 + self.preprocess(self.i) - # def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]: - # """ - # Get a data sample. - - # Flow is: - # - Read data, or get preloaded data if this idx is preloaded. - # - Move data to GPU, if needed. - # - Preloading data will move to GPU if it can. - # - If domain parallelism is enabled, convert to ShardTensors. - # - Return - - # Args: - # idx: Index of the sample to retrieve - - # Returns: - # Dictionary containing tensors/ShardTensors for the requested data - # """ - - # if idx >= len(self._filenames): - # raise IndexError( - # f"Index {idx} out of range for dataset of size {len(self._filenames)}" - # ) - - # # Attempt to get preloaded data: - # data = self.get_preloaded(idx) - # if data is None: - # # Read data from zarr file - # data = self._read_file(self._filenames[idx]) - # data = self._move_to_gpu(data, idx) - - # # This blocks until the preprocessing has transferred to GPU - # if idx in self._transfer_events: - # torch.cuda.current_stream().wait_event(self._transfer_events[idx]) - # self._transfer_events.pop(idx) - - # # Convert to ShardTensors if using domain parallelism - # if self.device_mesh is not None: - # data = self._convert_to_shard_tensors(data) - - # return data - - # def __iter__(self): - # self.i = 0 - # return self - - # def __next__(self): - # """ - # When used in an iterator context, this datapipe will - # leverage preloading and preprocessing to speed up the data - # loading latency. - - # Each time "next" is called, the datapipe will ask the data - # set to preload the data 2 steps ahead. It will then ask - # for the data from one step ahead, and start it processing. - - # Finally, it will return the data from this requested index - # """ - # if self.i >= len(self._filenames): - # self.i = 0 - # raise StopIteration - - # if self.preload_depth > 0 and self.i + 1 < len(self._filenames): - # self.preload(this_index) - # if self.preload_depth > 1 and self.i + 2 < len(self._filenames): - # self.preload(this_index) - - # data = self.__getitem__(this_index) - - # self.i += 1 - - # return data - - # def preprocess(self, idx: int) -> None: - # """ - # Asynchronously preload the data for the given index (up to CPU, not GPU). - # Only one preload operation is supported at a time. - - # Args: - # idx: Index of the sample to preload. - # """ - # if idx in self._preload_queue: - # # Skip items that are already in the queue - # return - - # def _preload_worker(): - # try: - # data = self._read_file(self._filenames[idx]) - # # Convert to torch tensors - # return self._move_to_gpu(data, idx) - # except Exception as e: - # print(f"Exception in preload: {e}") - # raise e - - # self._preload_queue[idx] = self.preload_executor.submit(_preload_worker) - - # def get_preloaded(self, idx: int) -> dict[str, torch.Tensor] | None: - # """ - # Retrieve the preloaded data (blocking if not ready). - - # Returns: - # (idx, data) tuple where data is a dictionary of key to numpy array or torch tensor. - - # Raises: - # RuntimeError: If no preload is in progress. - # Exception: If preload failed. - # """ - - # if idx not in self._preload_queue: - # return None - - # result = self._preload_queue[idx].result() # This will block until the result is ready - # self._preload_queue.pop(idx) # Clear the future after getting the result - - # return result + return self @profile diff --git a/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py b/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py index 84eea51ea5..67e137bf13 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py @@ -14,21 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -import multiprocessing as mp -import os import pathlib -import sys import time from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor -from multiprocessing import shared_memory import numpy as np import psutil -import tensorstore as ts import torch import zarr +try: + import tensorstore as ts + + TENSORSTORE_AVAILABLE = True +except ImportError: + TENSORSTORE_AVAILABLE = False + from physicsnemo.distributed import ShardTensor, ShardTensorSpec # from physicsnemo.distributed.utils import compute_split_shapes @@ -159,335 +161,70 @@ def read_file_sharded( pass -class TensorStoreZarrReader(BackendReader): - """ - Reader for tensorstore zarr files. - """ - - def __init__(self, keys_to_read: list[str] | None) -> None: - super().__init__(keys_to_read) - - self.spec_template = { - "driver": "zarr2", - "kvstore": { - "driver": "file", - "path": None, - }, - } - - self.context = ts.Context( - { - "cache_pool": {"total_bytes_limit": 10000000}, - "data_copy_concurrency": {"limit": 32}, - } - ) - - def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: - """ - Read a file and return a dictionary of tensors. - """ - read_futures = {} - for key in self.keys_to_read: - spec = self.spec_template.copy() - spec["kvstore"]["path"] = str(filename) + "/" + str(key) - - read_futures[key] = ts.open( - spec, create=False, open=True, context=self.context - ) - - results = { - key: np.array(read_futures[key].result()) for key in self.keys_to_read - } - - data = { - key: torch.as_tensor(results[key], dtype=torch.float32) - for key in self.keys_to_read - } - - return data - - def read_file_sharded( - self, filename: pathlib.Path, parallel_rank: int, parallel_size: int - ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: - """ - Read a file and return a dictionary of tensors. - """ - pass - - -class ZarrReadWorker: - """ - This class is a worker for the ZarrReadController. - It reads tasks from the task queue and writes to the shared memory buffer. - It then sends an acknowledgement to the controller. - """ +if TENSORSTORE_AVAILABLE: - def __init__(self, task_q: mp.Queue, ack_q: mp.Queue): + class TensorStoreZarrReader(BackendReader): """ - - task_q is the incoming Queue of chunks to read - ack_q is the outgoing acknowledgement of reads + Reader for tensorstore zarr files. """ - self.task_q = task_q - self.ack_q = ack_q - - self.current_group = None - self.current_array = None - - self.zarr_cache = {} - - def run(self): - """ - This function is the main loop for the worker. - It reads tasks from the task queue and writes to the shared memory buffer. - It then sends an acknowledgement to the controller. - """ - - while True: - # Run until killed - - task = self.task_q.get() - - if task is None: - break - - # Task organization: - # ( - # zarr_path - file name we're reading, a group - # array_name, - array in that group - # read_idx - a unique integer representing the read we're about to do - # index slice_to_read - the np.slice object representing what in the original file to read - # shared_buffer_name - the unique name of the shared buffer this read will use - # shared_buffer_loc - the slice of the shared buffer to store into - # ) - - ( - zarr_path, - array_name, - read_idx, - slice_to_read, - shared_buffer_name, - shared_buffer_slice, - ) = task + def __init__(self, keys_to_read: list[str] | None) -> None: + super().__init__(keys_to_read) - if zarr_path not in self.zarr_cache: - self.zarr_cache[zarr_path] = zarr.open_group(zarr_path) - - z = self.zarr_cache[zarr_path] - - arr = z[array_name] - - # Get the shared memory instance: - shm = shared_memory.SharedMemory(name=shared_buffer_name) - buf = np.ndarray(arr.shape, arr.dtype, buffer=shm.buf) - - # Perform the local read (and implicit decompress): - buf[shared_buffer_slice] = arr[slice_to_read] - - shm.close() + self.spec_template = { + "driver": "zarr2", + "kvstore": { + "driver": "file", + "path": None, + }, + } - # Send completion signal: - self.ack_q.put( - ( - "done", - read_idx, - ) + self.context = ts.Context( + { + "cache_pool": {"total_bytes_limit": 30_000_000}, + "data_copy_concurrency": {"limit": 60}, + } ) + def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: + """ + Read a file and return a dictionary of tensors. + """ + read_futures = {} + for key in self.keys_to_read: + spec = self.spec_template.copy() + spec["kvstore"]["path"] = str(filename) + "/" + str(key) + + read_futures[key] = ts.open( + spec, create=False, open=True, context=self.context + ) -def spawn_worker(task_q: mp.Queue, ack_q: mp.Queue): - worker = ZarrReadWorker(task_q, ack_q) - worker.run() - - -class ZarrReadController(BackendReader): - """ - This class maintains a persistent pool of processes to enable shared - memory reading of zarr groups. Users can control how many processes - to use, and which pool of CPUs they reside on. - - By default reading is done by passing chunks to each worker to read. - Reads are round-robin across children processes. Master process will - not return until all children reads have acknowledged. - """ - - def __init__( - self, - keys_to_read: list[str] | None, - num_read_processes: int | None = None, - ): - super().__init__(keys_to_read) - - self.available_cpus = psutil.Process().cpu_affinity() - if num_read_processes is None: - # Use all but one CPU, unless there is only one... - num_read_processes = max(1, len(self.available_cpus) - 1) - - self.num_read_processes = num_read_processes - print(f"num_read_processes: {num_read_processes}") - # If the target_cpus aren't set, we use some default settings: - - # Initialize Queues: - self.task_q = mp.Queue() - self.ack_q = mp.Queue() - - self.children = [] - - self.memory_buffers = {} - - self.spawn_children() - - def spawn_children( - self, - ): - if mp.get_start_method() != "fork" and not hasattr(sys, "frozen"): - # Prevent accidental spawn in child imports - if not hasattr(self, "_spawn_guard"): - self._spawn_guard = True - else: - return - - # Create processes, using psutil to set affinity at spawn time. - - stride = len(self.available_cpus) // self.num_read_processes - cpus_by_proc = [ - self.available_cpus[i * stride : (i + 1) * stride] - for i in range(self.num_read_processes) - ] - - # split the available cpus into num_read_processes chunks - - for i, cpus in enumerate(cpus_by_proc): - psutil_process = psutil.Process() - psutil_process.cpu_affinity(cpus) - proc = mp.Process(target=spawn_worker, args=(self.task_q, self.ack_q)) - psutil_process.cpu_affinity(self.available_cpus) - self.children.append(proc) - - for worker in self.children: - worker.start() - - def free_shared_memory(self, zarr_file): - # Free all the shared memory buffers that were opened for the specified file - if zarr_file in self.memory_buffers: - for buffer in self.memory_buffers[zarr_file]: - buffer.close() - buffer.unlink() - - self.memory_buffers.pop(zarr_file) - - def read_file(self, zarr_file: str): - print(f"zarr_file: {zarr_file}") - file_id = os.path.basename(zarr_file) - - # Open the file: - z = zarr.open_group(zarr_file) - - output_arrays = {} - - if zarr_file in self.memory_buffers: - self.free_shared_memory(zarr_file) - - self.memory_buffers[zarr_file] = [] - - required_idx = [] - - for key in self.keys_to_read: - # Get the metadata for this key: - arr = z[key] + results = { + key: np.array(read_futures[key].result()) for key in self.keys_to_read + } - # Allocate the entire buffer: - buffer_size = np.prod(arr.shape) * np.dtype(arr.dtype).itemsize + data = { + key: torch.as_tensor(results[key], dtype=torch.float32) + for key in self.keys_to_read + } - shm = shared_memory.SharedMemory( - create=True, - size=buffer_size, - ) - np_buffer = np.ndarray(arr.shape, dtype=arr.dtype, buffer=shm.buf) - - # Make sure we don't unlink it prematurely: - self.memory_buffers[zarr_file].append(shm) - - output_arrays[key] = np_buffer - - zarr_chunk_size = arr.chunks[0] - - # Define the read boundaries for slicing: - slice_starts = list(range(0, arr.shape[0], zarr_chunk_size)) - slice_stops = [start + zarr_chunk_size for start in slice_starts] - - # Correct the last stop point: - slice_stops[-1] = arr.shape[0] - - # Task organization: - # ( - # zarr_path - file name we're reading, a group - # array_name, - array in that group - # read_idx - a unique integer representing the read we're about to do - # index slice_to_read - the np.slice object representing what in the original file to read - # shared_buffer_name - the unique name of the shared buffer this read will use - # shared_buffer_loc - the slice of the shared buffer to store into - # ) - - for i, (slice_start, slice_stop) in enumerate( - zip(slice_starts, slice_stops) - ): - cpu_slice = np.s_[slice_start:slice_stop] - zarr_slice = np.s_[slice_start:slice_stop] - - length = slice_stop - slice_start - - read_idx = f"{file_id}_{key}_{i}_{length}" - required_idx.append(read_idx) - - task_args = ( - zarr_file, - key, - read_idx, - zarr_slice, - shm.name, - cpu_slice, - ) - self.task_q.put(task_args) - - # Now, let's check for completeness before returning: - completed = False - while not completed: - status, idx = self.ack_q.get() - if status == "done": - if idx not in required_idx: - # Put it back in the queue, it's for another file: - self.ack_q.put((status, idx)) - else: - required_idx.remove(idx) - completed = len(required_idx) == 0 - - return {key: torch.as_tensor(output_arrays[key]) for key in self.keys_to_read} + return data - def read_file_sharded( - self, filename: pathlib.Path, parallel_rank: int, parallel_size: int - ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: - """ - Read a file and return a dictionary of tensors. - """ - pass + def read_file_sharded( + self, filename: pathlib.Path, parallel_rank: int, parallel_size: int + ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + """ + Read a file and return a dictionary of tensors. + """ + pass +else: - def __del__(self): + class TensorStoreZarrReader(BackendReader): """ - Make sure we're not leaving things open we shouldn't be + Null reader for tensorstore zarr files. """ - # Stop all the workers by sending None on the queue: - for child in self.children: - self.task_q.put(None) - - for child in self.children: - child.join() - - open_files = list(self.memory_buffers.keys()) - for zf in open_files: - self.free_shared_memory(zf) + pass class DrivaerMLDataset: @@ -530,6 +267,7 @@ def __init__( preload_depth: int = 2, device_mesh: torch.distributed.DeviceMesh | None = None, placements: dict[str, torch.distributed.tensor.Placement] | None = None, + consumer_stream: torch.cuda.Stream | None = None, ) -> None: if isinstance(data_dir, str): data_dir = pathlib.Path(data_dir) @@ -542,21 +280,10 @@ def __init__( if not data_dir.is_dir(): raise NotADirectoryError(f"Data directory {data_dir} is not a directory") - self._file_type, self._filenames = self._infer_file_type_and_filenames(data_dir) - - # Initialize the file reader object - # Note that for some of these, they could be functions - # But others benefit from having a state, so we use classes: - if self._file_type == "npy": - self.file_reader = NpyFileReader(keys_to_read) - elif self._file_type == "zarr": - # self.file_reader = ZarrFileReader(keys_to_read) - # self.file_reader = ZarrReadController(keys_to_read) - self.file_reader = TensorStoreZarrReader(keys_to_read) - else: - raise ValueError(f"Unsupported file type: {self._file_type}") - self._keys_to_read = keys_to_read + self.file_reader, self._filenames = self._infer_file_type_and_filenames( + data_dir + ) # Check the file names; some can be read well in parallel, while others # are not parallelizable. @@ -578,6 +305,11 @@ def __init__( self.preload_depth = preload_depth self.preload_executor = ThreadPoolExecutor(max_workers=preload_depth) + if consumer_stream is None and self.output_device.type == "cuda": + consumer_stream = torch.cuda.current_stream() + + self.consumer_stream = consumer_stream + def _infer_file_type_and_filenames( self, data_dir: pathlib.Path ) -> tuple[str, list[str]]: @@ -590,10 +322,19 @@ def _infer_file_type_and_filenames( # List the files: files = list(data_dir.iterdir()) + # Initialize the file reader object + # Note that for some of these, they could be functions + # But others benefit from having a state, so we use classes: + if all(file.suffix == ".npy" for file in files): - return "npy", files + file_reader = NpyFileReader(self._keys_to_read) + return file_reader, files elif all(file.suffix == ".zarr" and file.is_dir() for file in files): - return "zarr", files + if TENSORSTORE_AVAILABLE: + file_reader = TensorStoreZarrReader(self._keys_to_read) + else: + file_reader = ZarrFileReader(self._keys_to_read) + return file_reader, files else: # TODO - support folders of stl, vtp, vtu. raise ValueError(f"Unsupported file type: {files}") @@ -613,15 +354,18 @@ def _move_to_gpu( if self.output_device.type != "cuda": return data + # result = StreamDict() result = {} with torch.cuda.stream(self._data_loader_stream): for key in data.keys(): # Move to GPU if available result[key] = data[key].to(self.output_device, non_blocking=True) - - self._transfer_events[idx] = torch.cuda.Event() - self._transfer_events[idx].record(self._data_loader_stream) + result[key].record_stream(self.consumer_stream) + # Mark the consumer stream: + transfer_event = torch.cuda.Event() + transfer_event.record(self._data_loader_stream) + # result.set_event("transfer", transfer_event) return result @@ -684,13 +428,9 @@ def preload(self, idx: int) -> None: return def _preload_worker(): - try: - data = self._read_file(self._filenames[idx]) - # Convert to torch tensors - return self._move_to_gpu(data, idx) - except Exception as e: - print(f"Exception in preload: {e}") - raise e + data = self._read_file(self._filenames[idx]) + # Convert to torch tensors + return self._move_to_gpu(data, idx) self._preload_queue[idx] = self.preload_executor.submit(_preload_worker) @@ -777,7 +517,7 @@ def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]: # This blocks until the preprocessing has transferred to GPU if idx in self._transfer_events: - torch.cuda.current_stream().wait_event(self._transfer_events[idx]) + self.consumer_stream.wait_event(self._transfer_events[idx]) self._transfer_events.pop(idx) # Convert to ShardTensors if using domain parallelism diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py index 446b7b5d54..495b0839be 100644 --- a/physicsnemo/utils/sdf.py +++ b/physicsnemo/utils/sdf.py @@ -67,13 +67,14 @@ def _bvh_query_distance( sdf_hit_point[tid] = p_closest +@torch.library.custom_op("physicsnemo::signed_distance_field", mutates_args=()) def signed_distance_field( mesh_vertices: torch.Tensor, mesh_indices: torch.Tensor, input_points: torch.Tensor, max_dist: float = 1e8, use_sign_winding_number: bool = False, -): +) -> tuple[torch.Tensor, torch.Tensor]: """ Computes the signed distance field (SDF) for a given mesh and input points. @@ -131,23 +132,6 @@ def signed_distance_field( sdf = torch.zeros(N, dtype=torch.float32, device=input_points.device) sdf_hit_point = torch.zeros(N, 3, dtype=torch.float32, device=input_points.device) - wp.init() - - # zero copy the vertices, indices, and input points to warp: - wp_vertices = wp.from_torch(mesh_vertices, dtype=wp.vec3) - wp_indices = wp.from_torch(mesh_indices, dtype=wp.int32) - wp_input_points = wp.from_torch(input_points, dtype=wp.vec3) - - # Convert output points: - wp_sdf = wp.from_torch(sdf, dtype=wp.float32) - wp_sdf_hit_point = wp.from_torch(sdf_hit_point, dtype=wp.vec3f) - - mesh = wp.Mesh( - points=wp_vertices, - indices=wp_indices, - support_winding_number=use_sign_winding_number, - ) - if input_points.device.type == "cuda": wp_launch_stream = wp.stream_from_torch( torch.cuda.current_stream(input_points.device) @@ -157,23 +141,70 @@ def signed_distance_field( wp_launch_stream = None wp_launch_device = "cpu" # CPUs have no streams - wp.launch( - kernel=_bvh_query_distance, - dim=N, - inputs=[ - mesh.id, - wp_input_points, - max_dist, - wp_sdf, - wp_sdf_hit_point, - use_sign_winding_number, - ], - device=wp_launch_device, - stream=wp_launch_stream, - ) + with wp.ScopedStream(wp_launch_stream): + wp.init() + + # zero copy the vertices, indices, and input points to warp: + wp_vertices = wp.from_torch(mesh_vertices, dtype=wp.vec3) + wp_indices = wp.from_torch(mesh_indices, dtype=wp.int32) + wp_input_points = wp.from_torch(input_points, dtype=wp.vec3) + + # Convert output points: + wp_sdf = wp.from_torch(sdf, dtype=wp.float32) + wp_sdf_hit_point = wp.from_torch(sdf_hit_point, dtype=wp.vec3f) + + mesh = wp.Mesh( + points=wp_vertices, + indices=wp_indices, + support_winding_number=use_sign_winding_number, + ) + + wp.launch( + kernel=_bvh_query_distance, + dim=N, + inputs=[ + mesh.id, + wp_input_points, + max_dist, + wp_sdf, + wp_sdf_hit_point, + use_sign_winding_number, + ], + device=wp_launch_device, + stream=wp_launch_stream, + ) # Unflatten the output to be like the input: sdf = sdf.reshape(input_shape[:-1] + (1,)) sdf_hit_point = sdf_hit_point.reshape(input_shape) return sdf, sdf_hit_point + + +@signed_distance_field.register_fake +def _( + mesh_vertices: torch.Tensor, + mesh_indices: torch.Tensor, + input_points: torch.Tensor, + max_dist: float = 1e8, + use_sign_winding_number: bool = False, +) -> tuple[torch.Tensor, torch.Tensor]: + if mesh_vertices.device != input_points.device: + raise RuntimeError("mesh_vertices and input_points must be on the same device") + + if mesh_vertices.device != mesh_indices.device: + raise RuntimeError("mesh_vertices and mesh_indices must be on the same device") + + if mesh_vertices.shape[0] != mesh_indices.shape[0]: + raise RuntimeError( + "mesh_vertices and mesh_indices must have the same number of points" + ) + + N = input_points.shape[0] + + sdf_output = torch.empty(N, 1, device=input_points.device, dtype=input_points.dtype) + sdf_hit_point_output = torch.empty( + N, 3, device=input_points.device, dtype=input_points.dtype + ) + + return sdf_output, sdf_hit_point_output From 7fb5f8eb938e8be8d41363547cc0b5d388b72d9c Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 27 Aug 2025 14:19:16 +0000 Subject: [PATCH 06/98] Rename datapipe file to dataset. --- .../cae/{drivaer_ml_datapipe.py => drivaer_ml_dataset.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename physicsnemo/datapipes/cae/{drivaer_ml_datapipe.py => drivaer_ml_dataset.py} (100%) diff --git a/physicsnemo/datapipes/cae/drivaer_ml_datapipe.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py similarity index 100% rename from physicsnemo/datapipes/cae/drivaer_ml_datapipe.py rename to physicsnemo/datapipes/cae/drivaer_ml_dataset.py From 0fb0ed25e08b3dd1a4fbf6ceacf416a0786f70dc Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:22:04 +0000 Subject: [PATCH 07/98] Update SDF function and test. Auto convert higher precisions to match the kernel precision. The test had some expected numbers that, I believe, were incorrect. --- physicsnemo/utils/sdf.py | 8 +++---- test/utils/test_sdf.py | 51 +++++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py index 495b0839be..f9216bdd16 100644 --- a/physicsnemo/utils/sdf.py +++ b/physicsnemo/utils/sdf.py @@ -145,9 +145,9 @@ def signed_distance_field( wp.init() # zero copy the vertices, indices, and input points to warp: - wp_vertices = wp.from_torch(mesh_vertices, dtype=wp.vec3) - wp_indices = wp.from_torch(mesh_indices, dtype=wp.int32) - wp_input_points = wp.from_torch(input_points, dtype=wp.vec3) + wp_vertices = wp.from_torch(mesh_vertices.to(torch.float32), dtype=wp.vec3) + wp_indices = wp.from_torch(mesh_indices.to(torch.int32), dtype=wp.int32) + wp_input_points = wp.from_torch(input_points.to(torch.float32), dtype=wp.vec3) # Convert output points: wp_sdf = wp.from_torch(sdf, dtype=wp.float32) @@ -178,7 +178,7 @@ def signed_distance_field( sdf = sdf.reshape(input_shape[:-1] + (1,)) sdf_hit_point = sdf_hit_point.reshape(input_shape) - return sdf, sdf_hit_point + return sdf.to(input_points.dtype), sdf_hit_point.to(input_points.dtype) @signed_distance_field.register_fake diff --git a/test/utils/test_sdf.py b/test/utils/test_sdf.py index 107e5e0316..f449469b5a 100644 --- a/test/utils/test_sdf.py +++ b/test/utils/test_sdf.py @@ -16,12 +16,13 @@ # ruff: noqa: E402 -import numpy as np +import pytest +import torch from pytest_utils import import_or_fail def tet_verts(flip_x=1): - tet = np.array( + tet = torch.tensor( [ flip_x * 0, 0, @@ -60,35 +61,47 @@ def tet_verts(flip_x=1): 0, 1, ], - dtype=np.float64, + dtype=torch.float64, ) return tet @import_or_fail("warp") -def test_sdf(pytestconfig): +@pytest.mark.parametrize("dtype", [torch.float32, torch.float64]) +@pytest.mark.parametrize("device", ["cpu", "cuda"]) +def test_sdf(pytestconfig, dtype, device): from physicsnemo.utils.sdf import signed_distance_field - tet = tet_verts() + mesh_vertices = tet_verts().reshape(-1, 3) - sdf_tet = signed_distance_field( - tet, - np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), - np.array([1, 1, 1, 0.1, 0.1, 0.1], dtype=np.float64), + if device == "cuda": + device = torch.device("cuda") + else: + device = torch.device("cpu") + + mesh_indices = torch.tensor( + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=torch.int32 ) - np.testing.assert_allclose(sdf_tet, [1.15470052, -0.1], atol=1e-7) + input_points = torch.tensor([[1, 1, 1], [0.05, 0.1, 0.1]], dtype=torch.float64) + + mesh_vertices = mesh_vertices.to(dtype) + input_points = input_points.to(dtype) - sdf_tet, sdf_hit_point, sdf_hit_point_id = signed_distance_field( - tet, - np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=np.int32), - np.array([1, 1, 1, 0.12, 0.11, 0.1], dtype=np.float64), - include_hit_points=True, - include_hit_points_id=True, + sdf_tet, sdf_hit_point = signed_distance_field( + mesh_vertices, + mesh_indices, + input_points, + use_sign_winding_number=False, ) - np.testing.assert_allclose( + + expected_sdf = torch.tensor([[1.1547], [-0.05]], dtype=dtype) + assert torch.allclose(sdf_tet, expected_sdf, atol=1e-7) + + assert torch.allclose( sdf_hit_point, - [[0.33333322, 0.33333334, 0.3333334], [0.12000002, 0.11, 0.0]], + torch.tensor( + [[0.33333322, 0.33333334, 0.3333334], [0.0, 0.10, 0.10]], dtype=dtype + ), atol=1e-7, ) - np.testing.assert_allclose(sdf_hit_point_id, [3, 0], atol=1e-7) From 60c3535ab2f209c7773846cd574005351187d195 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 27 Aug 2025 15:32:20 +0000 Subject: [PATCH 08/98] Add IO benchmark --- .../domino/src/benchmark_dataloader.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index 95b39cedd3..b1f5184fc6 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -54,7 +54,7 @@ from physicsnemo.launch.utils import load_checkpoint, save_checkpoint from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper -from physicsnemo.datapipes.cae.domino_datapipe import ( +from physicsnemo.datapipes.cae.domino_datapipe2 import ( DoMINODataPipe, compute_scaling_factors, create_domino_dataset, @@ -88,13 +88,14 @@ def train_epoch( print(f"indices: {indices}") # If you tell the dataloader the indices in advance, it will preload # and pre-preprocess data - dataloader.set_indices(indices) + # dataloader.set_indices(indices) gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) start_time = time.perf_counter() for i_batch, sample_batched in enumerate(dataloader): # sampled_batched = dict_to_device(sample_batched, device) - + # if i_batch == 7: + # break # for key in sampled_batched.keys(): # print(f"{key}: {sampled_batched[key].shape}") @@ -232,14 +233,15 @@ def main(cfg: DictConfig) -> None: logger.info(f"Device {dist.device}, epoch {epoch}:") epoch_start_time = time.perf_counter() - train_epoch( - dataloader=train_dataset, - sampler=train_sampler, - logger=logger, - gpu_handle=gpu_handle, - epoch_index=epoch, - device=dist.device, - ) + with Profiler(): + train_epoch( + dataloader=train_dataset, + sampler=train_sampler, + logger=logger, + gpu_handle=gpu_handle, + epoch_index=epoch, + device=dist.device, + ) epoch_end_time = time.perf_counter() logger.info( f"Device {dist.device}, Epoch {epoch} took {epoch_end_time - epoch_start_time:.3f} seconds" @@ -247,4 +249,7 @@ def main(cfg: DictConfig) -> None: if __name__ == "__main__": + # Profiler().enable("torch") + # Profiler().initialize() main() + # Profiler().finalize() From 0c668d94a67f0772f2010e7c66d98b92aa69d8c8 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 27 Aug 2025 09:07:46 -0700 Subject: [PATCH 09/98] Minor bug fixes --- physicsnemo/datapipes/cae/domino_datapipe2.py | 5 +++-- physicsnemo/datapipes/cae/drivaer_ml_dataset.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 86e2f88539..9aa5ae40cf 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -38,7 +38,7 @@ from torch import Tensor from torch.utils.data import Dataset -from physicsnemo.datapipes.cae.drivaer_ml_datapipe import ( +from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( DrivaerMLDataset, compute_mean_std_min_max, ) @@ -742,7 +742,7 @@ def process_data(self, data_dict, idx: int): return_dict.update( { "length_scale": length_scale, - "surf_grid_max_min": surf_grid_max_min, + "surface_min_max": surf_grid_max_min, } ) @@ -754,6 +754,7 @@ def process_data(self, data_dict, idx: int): stl_vertices=data_dict["stl_coordinates"], mesh_indices_flattened=mesh_indices_flattened, ) + return_dict["surf_grid"] = surf_grid return_dict["sdf_surf_grid"] = sdf_surf_grid return_dict["geometry_coordinates"] = geom_centers diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index 67e137bf13..aac34197ea 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -181,8 +181,8 @@ def __init__(self, keys_to_read: list[str] | None) -> None: self.context = ts.Context( { - "cache_pool": {"total_bytes_limit": 30_000_000}, - "data_copy_concurrency": {"limit": 60}, + "cache_pool": {"total_bytes_limit": 10_000_000}, + "data_copy_concurrency": {"limit": 72}, } ) From 70e6130b62c272cff5d1654899f44f0a154745f5 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 27 Aug 2025 10:57:12 -0700 Subject: [PATCH 10/98] Few bug fixes --- physicsnemo/datapipes/cae/domino_datapipe2.py | 10 ++++++---- physicsnemo/utils/sdf.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 9aa5ae40cf..6a0ce133fa 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -300,8 +300,8 @@ def __init__( self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"] self.keys_to_read_if_available = { - "global_params_values": torch.tensor([30.0, 1.226], device=self.device), - "global_params_reference": torch.tensor([30.0, 1.226], device=self.device), + "global_params_values": torch.tensor([[30.0], [1.226]], device=self.device), + "global_params_reference": torch.tensor([[30.0], [1.226]], device=self.device), } self.volume_keys = ["volume_mesh_centers", "volume_fields"] @@ -389,7 +389,6 @@ def process_combined( surf_grid, use_sign_winding_number=True, ) - sdf_surf_grid = surf_grid if self.config.sampling: geometry_points = self.config.geom_points_sample @@ -497,12 +496,14 @@ def process_surface( pos_normals_com_surface = pos_normals_com_surface[idx_surface] # Now, perform the kNN on the sampled points: + print(self.config.num_surface_neighbors) if self.config.num_surface_neighbors > 1: neighbor_indices, neighbor_distances = knn( points=surface_coordinates, queries=surface_coordinates_sampled, k=self.config.num_surface_neighbors, ) + print(f"datapipe neighbor_indices: {neighbor_indices.shape}") # Pull out the neighbor elements. Note that ii is the index into the original # points - but only exists for the sampled points @@ -529,7 +530,7 @@ def process_surface( queries=surface_coordinates, k=self.config.num_surface_neighbors, ) - + print(f"datapipe neighbor_indices: {neighbor_indices.shape}") # Construct the neighbors arrays: surface_neighbors = surface_coordinates[neighbor_indices][:, 1:] surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:] @@ -755,6 +756,7 @@ def process_data(self, data_dict, idx: int): mesh_indices_flattened=mesh_indices_flattened, ) return_dict["surf_grid"] = surf_grid + print(f"datapipe sdf_surf_grid: {sdf_surf_grid.shape}") return_dict["sdf_surf_grid"] = sdf_surf_grid return_dict["geometry_coordinates"] = geom_centers diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py index f9216bdd16..08f9c8c4c2 100644 --- a/physicsnemo/utils/sdf.py +++ b/physicsnemo/utils/sdf.py @@ -175,7 +175,7 @@ def signed_distance_field( ) # Unflatten the output to be like the input: - sdf = sdf.reshape(input_shape[:-1] + (1,)) + sdf = sdf.reshape(input_shape[:-1]) sdf_hit_point = sdf_hit_point.reshape(input_shape) return sdf.to(input_points.dtype), sdf_hit_point.to(input_points.dtype) From 4c26ae128a509c06f3a1d7111cd811fc9be425c6 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 27 Aug 2025 12:27:10 -0700 Subject: [PATCH 11/98] A few more fixes for domino. --- physicsnemo/datapipes/cae/domino_datapipe2.py | 20 +++++++++++-------- physicsnemo/models/domino/model.py | 2 ++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 6a0ce133fa..e88c988a0c 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -496,14 +496,12 @@ def process_surface( pos_normals_com_surface = pos_normals_com_surface[idx_surface] # Now, perform the kNN on the sampled points: - print(self.config.num_surface_neighbors) if self.config.num_surface_neighbors > 1: neighbor_indices, neighbor_distances = knn( points=surface_coordinates, queries=surface_coordinates_sampled, k=self.config.num_surface_neighbors, ) - print(f"datapipe neighbor_indices: {neighbor_indices.shape}") # Pull out the neighbor elements. Note that ii is the index into the original # points - but only exists for the sampled points @@ -660,7 +658,8 @@ def process_volume( volume_coordinates, use_sign_winding_number=True, ) - + sdf_nodes = sdf_nodes.reshape((-1, 1)) + if self.config.positional_encoding: pos_normals_closest_vol = calculate_normal_positional_encoding( volume_coordinates, @@ -756,7 +755,7 @@ def process_data(self, data_dict, idx: int): mesh_indices_flattened=mesh_indices_flattened, ) return_dict["surf_grid"] = surf_grid - print(f"datapipe sdf_surf_grid: {sdf_surf_grid.shape}") + return_dict["sdf_surf_grid"] = sdf_surf_grid return_dict["geometry_coordinates"] = geom_centers @@ -788,16 +787,18 @@ def process_data(self, data_dict, idx: int): ) return_dict.update(surface_dict) - if self.device.type == "cuda": - self._preprocess_events[idx] = torch.cuda.Event() - self._preprocess_events[idx].record(self.preprocess_stream) - # Mark all cuda tensors to be consumed on the main stream: if self.device.type == "cuda": for key in return_dict.keys(): if isinstance(return_dict[key], torch.Tensor): return_dict[key].record_stream(torch.cuda.default_stream()) + + if self.device.type == "cuda": + self._preprocess_events[idx] = torch.cuda.Event() + self._preprocess_events[idx].record(self.preprocess_stream) + + return return_dict @profile @@ -826,6 +827,9 @@ def __getitem__(self, idx): torch.cuda.current_stream().wait_event(self._preprocess_events[idx]) self._preprocess_events.pop(idx) + # Add a batch dimension to the data_dict + data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()} + return data_dict def idx_to_index(self, idx): diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index c95f971e97..4aad8c4f35 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -157,6 +157,8 @@ def forward( batch_size = x.shape[0] nx, ny, nz = self.grid_resolution + print(f"p_grid shape: {p_grid.shape}") + print(f"x shape: {x.shape}") p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3)) if reverse_mapping: From 45100efe60badd27c28ed25d3fe4ed53521f5616 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 27 Aug 2025 19:38:24 +0000 Subject: [PATCH 12/98] Fix pre-commit issues --- physicsnemo/datapipes/cae/domino_datapipe2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index e88c988a0c..a24084ae7b 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -301,7 +301,9 @@ def __init__( self.keys_to_read_if_available = { "global_params_values": torch.tensor([[30.0], [1.226]], device=self.device), - "global_params_reference": torch.tensor([[30.0], [1.226]], device=self.device), + "global_params_reference": torch.tensor( + [[30.0], [1.226]], device=self.device + ), } self.volume_keys = ["volume_mesh_centers", "volume_fields"] @@ -659,7 +661,7 @@ def process_volume( use_sign_winding_number=True, ) sdf_nodes = sdf_nodes.reshape((-1, 1)) - + if self.config.positional_encoding: pos_normals_closest_vol = calculate_normal_positional_encoding( volume_coordinates, @@ -793,12 +795,10 @@ def process_data(self, data_dict, idx: int): if isinstance(return_dict[key], torch.Tensor): return_dict[key].record_stream(torch.cuda.default_stream()) - if self.device.type == "cuda": self._preprocess_events[idx] = torch.cuda.Event() self._preprocess_events[idx].record(self.preprocess_stream) - return return_dict @profile From 675c5469321dad4bfd7a36a892c1dac97ed2d562 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 3 Sep 2025 06:27:13 -0700 Subject: [PATCH 13/98] Port domino utils from cupy/numpy to pure torch. Update domino_datapipe2 (temporary name). --- physicsnemo/datapipes/cae/__init__.py | 2 +- physicsnemo/datapipes/cae/domino_datapipe2.py | 164 ++++---- .../datapipes/cae/drivaer_ml_dataset.py | 10 +- physicsnemo/models/domino/model.py | 30 +- physicsnemo/utils/domino/utils.py | 377 +++++++----------- physicsnemo/utils/neighbors/knn/_cuml_impl.py | 11 +- .../neighbors/radius_search/_warp_impl.py | 167 ++++---- test/utils/test_domino_utils.py | 120 +++--- 8 files changed, 394 insertions(+), 487 deletions(-) diff --git a/physicsnemo/datapipes/cae/__init__.py b/physicsnemo/datapipes/cae/__init__.py index c0d17ff723..9af8d88db2 100644 --- a/physicsnemo/datapipes/cae/__init__.py +++ b/physicsnemo/datapipes/cae/__init__.py @@ -14,5 +14,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .domino_datapipe import DoMINODataPipe +from .domino_datapipe2 import DoMINODataPipe from .mesh_datapipe import MeshDatapipe diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index a24084ae7b..dcc82d49cd 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -45,7 +45,6 @@ from physicsnemo.distributed import DistributedManager from physicsnemo.utils.domino.utils import ( ArrayType, - area_weighted_shuffle_array, calculate_center_of_mass, calculate_normal_positional_encoding, create_grid, @@ -232,10 +231,8 @@ def __init__( if self.config.gpu_preprocessing or self.config.gpu_output: # Make sure we move data to the right device: target_device = dist.device - self.preprocess_stream = torch.cuda.Stream() else: target_device = torch.device("cpu") - self.preprocess_stream = None self.device = target_device @@ -323,7 +320,7 @@ def __init__( data_dir=self.config.data_path, keys_to_read=self.keys_to_read, output_device=self.device, - consumer_stream=self.preprocess_stream, + consumer_stream=torch.cuda.default_stream(), ) # This is thread storage for data preprocessing: @@ -344,7 +341,6 @@ def set_indices(self, indices: list[int]): def __len__(self): return len(self.dataset) - @torch.compile(dynamic=True) def compute_stl_scaling( self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None ): @@ -407,7 +403,6 @@ def process_combined( return (sdf_surf_grid, geom_centers) - @torch.compile(dynamic=True) def process_surface( self, s_min: torch.Tensor, @@ -530,7 +525,7 @@ def process_surface( queries=surface_coordinates, k=self.config.num_surface_neighbors, ) - print(f"datapipe neighbor_indices: {neighbor_indices.shape}") + # Construct the neighbors arrays: surface_neighbors = surface_coordinates[neighbor_indices][:, 1:] surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:] @@ -570,7 +565,6 @@ def process_surface( return return_dict - @torch.compile(dynamic=True) def process_volume( self, s_min: torch.Tensor, @@ -707,101 +701,89 @@ def process_volume( return return_dict - @profile + @torch.no_grad() def process_data(self, data_dict, idx: int): for key in self.keys_to_read_if_available.keys(): if key not in data_dict: data_dict[key] = self.keys_to_read_if_available[key] - with torch.cuda.stream(self.preprocess_stream): - if self.config.deterministic: - torch.manual_seed(idx) + if self.config.deterministic: + torch.manual_seed(idx) - # Start building the preprocessed return dict: - return_dict = { - "global_params_values": data_dict["global_params_values"], - "global_params_reference": data_dict["global_params_reference"], - } + # Start building the preprocessed return dict: + return_dict = { + "global_params_values": data_dict["global_params_values"], + "global_params_reference": data_dict["global_params_reference"], + } - # This function gets information about the surface scale, - # and decides what the surface grid will be: - (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = ( - self.compute_stl_scaling( - data_dict["stl_coordinates"], self.config.bounding_box_dims_surf - ) + # This function gets information about the surface scale, + # and decides what the surface grid will be: + (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = ( + self.compute_stl_scaling( + data_dict["stl_coordinates"], self.config.bounding_box_dims_surf ) + ) - # This is a center of mass computation for the stl surface, - # using the size of each mesh point as weight. + # This is a center of mass computation for the stl surface, + # using the size of each mesh point as weight. - center_of_mass = calculate_center_of_mass( - data_dict["stl_centers"], data_dict["stl_areas"] - ) + center_of_mass = calculate_center_of_mass( + data_dict["stl_centers"], data_dict["stl_areas"] + ) - # For SDF calculations, make sure the mesh_indices_flattened is an integer array: - mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) + # For SDF calculations, make sure the mesh_indices_flattened is an integer array: + mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) - return_dict.update( - { - "length_scale": length_scale, - "surface_min_max": surf_grid_max_min, - } - ) + return_dict.update( + { + "length_scale": length_scale, + "surface_min_max": surf_grid_max_min, + } + ) + + # This will compute the sdf on the surface grid and apply downsampling if needed + sdf_surf_grid, geom_centers = self.process_combined( + s_min, + s_max, + surf_grid, + stl_vertices=data_dict["stl_coordinates"], + mesh_indices_flattened=mesh_indices_flattened, + ) + return_dict["surf_grid"] = surf_grid + + return_dict["sdf_surf_grid"] = sdf_surf_grid + return_dict["geometry_coordinates"] = geom_centers - # This will compute the sdf on the surface grid and apply downsampling if needed - sdf_surf_grid, geom_centers = self.process_combined( + # Up to here works all in torch! + + if self.model_type == "volume" or self.model_type == "combined": + volume_dict = self.process_volume( s_min, s_max, - surf_grid, + volume_coordinates=data_dict["volume_mesh_centers"], + volume_fields=data_dict["volume_fields"], stl_vertices=data_dict["stl_coordinates"], mesh_indices_flattened=mesh_indices_flattened, + center_of_mass=center_of_mass, ) - return_dict["surf_grid"] = surf_grid - - return_dict["sdf_surf_grid"] = sdf_surf_grid - return_dict["geometry_coordinates"] = geom_centers - - # Up to here works all in torch! - - if self.model_type == "volume" or self.model_type == "combined": - volume_dict = self.process_volume( - s_min, - s_max, - volume_coordinates=data_dict["volume_mesh_centers"], - volume_fields=data_dict["volume_fields"], - stl_vertices=data_dict["stl_coordinates"], - mesh_indices_flattened=mesh_indices_flattened, - center_of_mass=center_of_mass, - ) - return_dict.update(volume_dict) - - if self.model_type == "surface" or self.model_type == "combined": - surface_dict = self.process_surface( - s_min, - s_max, - center_of_mass, - surf_grid, - surface_coordinates=data_dict["surface_mesh_centers"], - surface_normals=data_dict["surface_normals"], - surface_sizes=data_dict["surface_areas"], - surface_fields=data_dict["surface_fields"], - ) - return_dict.update(surface_dict) - - # Mark all cuda tensors to be consumed on the main stream: - if self.device.type == "cuda": - for key in return_dict.keys(): - if isinstance(return_dict[key], torch.Tensor): - return_dict[key].record_stream(torch.cuda.default_stream()) + return_dict.update(volume_dict) - if self.device.type == "cuda": - self._preprocess_events[idx] = torch.cuda.Event() - self._preprocess_events[idx].record(self.preprocess_stream) + if self.model_type == "surface" or self.model_type == "combined": + surface_dict = self.process_surface( + s_min, + s_max, + center_of_mass, + surf_grid, + surface_coordinates=data_dict["surface_mesh_centers"], + surface_normals=data_dict["surface_normals"], + surface_sizes=data_dict["surface_areas"], + surface_fields=data_dict["surface_fields"], + ) + return_dict.update(surface_dict) return return_dict - @profile def __getitem__(self, idx): """ Function for fetching and processing a single file's data. @@ -822,11 +804,6 @@ def __getitem__(self, idx): data_dict = self.dataset[index] data_dict = self.process_data(data_dict, idx) - # This blocks the main stream until the preprocessing has transferred to GPU - if idx in self._preprocess_events: - torch.cuda.current_stream().wait_event(self._preprocess_events[idx]) - self._preprocess_events.pop(idx) - # Add a batch dimension to the data_dict data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()} @@ -889,12 +866,9 @@ def __next__(self): # Start loading two ahead: if len(self.dataset) >= current_idx + 2: + self.dataset.preload(self.idx_to_index(current_idx + 1)) self.dataset.preload(self.idx_to_index(current_idx + 2)) - # Start preprocessing one ahead: - if len(self.dataset) >= current_idx + 1: - self.preprocess(current_idx + 1) - # If no preprocessing was done for this index, process it now data = self.__getitem__(current_idx) @@ -914,13 +888,9 @@ def __iter__(self): if len(self.dataset) >= 2: self.dataset.preload(self.idx_to_index(self.i + 1)) - # Start preprocessing index 0 - self.preprocess(self.i) - return self -@profile def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None: # Create a dataset for just the field keys: @@ -1038,10 +1008,10 @@ def __getitem__(self, idx): # Sample surface points if present if "surface_mesh_centers" in result and self.surface_points: if self.surface_sampling_algorithm == "area_weighted": - coords_sampled, idx_surface = area_weighted_shuffle_array( - result["surface_mesh_centers"], - self.surface_points, - result["surface_areas"], + coords_sampled, idx_surface = shuffle_array( + points=result["surface_mesh_centers"], + n_points=self.surface_points, + weights=result["surface_areas"], ) else: coords_sampled, idx_surface = shuffle_array( diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index aac34197ea..78f9407ebd 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -292,6 +292,7 @@ def __init__( self.output_device = output_device if output_device.type == "cuda": + # self._data_loader_stream = torch.cuda.default_stream() self._data_loader_stream = torch.cuda.Stream() else: self._data_loader_stream = None @@ -362,10 +363,11 @@ def _move_to_gpu( # Move to GPU if available result[key] = data[key].to(self.output_device, non_blocking=True) result[key].record_stream(self.consumer_stream) - # Mark the consumer stream: - transfer_event = torch.cuda.Event() - transfer_event.record(self._data_loader_stream) - # result.set_event("transfer", transfer_event) + + # Mark the consumer stream: + transfer_event = torch.cuda.Event() + transfer_event.record(self._data_loader_stream) + self._transfer_events[idx] = transfer_event return result diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 4aad8c4f35..ff0a5482c8 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -155,11 +155,8 @@ def forward( - outputs: Tensor containing coordinates of the neighboring points """ batch_size = x.shape[0] - nx, ny, nz = self.grid_resolution - print(f"p_grid shape: {p_grid.shape}") - print(f"x shape: {x.shape}") - p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3)) + p_grid = p_grid.reshape(batch_size, -1, 3) if reverse_mapping: mapping, outputs = radius_search( @@ -594,15 +591,23 @@ def forward( if self.geo_encoding_type == "both" or self.geo_encoding_type == "stl": # Calculate multi-scale geoemtry dependency x_encoding = [] + for j in range(len(self.radii)): - mapping, k_short = self.bq_warp[j](x, p_grid) - x_encoding_inter = self.geo_conv_out[j](k_short, p_grid) - # Propagate information in the geometry enclosed BBox - for _ in range(self.hops): - dx = self.geo_processors[j](x_encoding_inter) / self.hops - x_encoding_inter = x_encoding_inter + dx - x_encoding_inter = self.geo_processor_out[j](x_encoding_inter) - x_encoding.append(x_encoding_inter) + with torch.autograd.profiler.record_function(f"bq_warp_{j}"): + mapping, k_short = self.bq_warp[j](x, p_grid) + x_encoding_inter = self.geo_conv_out[j](k_short, p_grid) + # Propagate information in the geometry enclosed BBox + for _i in range(self.hops): + with torch.autograd.profiler.record_function( + f"geo_processor_{j}_{_i}" + ): + dx = self.geo_processors[j](x_encoding_inter) / self.hops + x_encoding_inter = x_encoding_inter + dx + x_encoding_inter = self.geo_processor_out[j](x_encoding_inter) + + x_encoding.append(x_encoding_inter) + + # current_stream. x_encoding = torch.cat(x_encoding, dim=1) if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf": @@ -1661,6 +1666,7 @@ def calculate_solution( return_volume_neighbors=False, ): """Function to approximate solution sampling the neighborhood information""" + if eval_mode == "volume": num_variables = self.num_variables_vol nn_basis = self.nn_basis_vol diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index 5d63def82e..3abb968c5a 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -19,49 +19,15 @@ This module provides essential utilities for computational fluid dynamics data processing, mesh manipulation, field normalization, and geometric computations. It supports both -CPU (NumPy) and GPU (CuPy) operations with automatic fallbacks. +torch.Tensor operations on either CPU or GPU. """ from pathlib import Path from typing import Any, Sequence -import numpy as np import torch -from scipy.spatial import KDTree -# Type alias for arrays that can be either NumPy or CuPy -try: - import cupy as cp - - ArrayType = np.ndarray | cp.ndarray -except ImportError: - ArrayType = np.ndarray - - -def array_type(array: ArrayType) -> "type[np] | type[cp]": - """Determine the array module (NumPy or CuPy) for the given array. - - This function enables array-agnostic code by returning the appropriate - array module that can be used for operations on the input array. - - Args: - array: Input array that can be either NumPy or CuPy array. - - Returns: - The array module (numpy or cupy) corresponding to the input array type. - - Examples: - >>> import numpy as np - >>> arr = np.array([1, 2, 3]) - >>> xp = array_type(arr) - >>> result = xp.sum(arr) # Uses numpy.sum - """ - try: - import cupy as cp - - return cp.get_array_module(array) - except ImportError: - return np +from physicsnemo.utils.neighbors import knn def calculate_center_of_mass( @@ -73,13 +39,13 @@ def calculate_center_of_mass( in computational fluid dynamics for mesh analysis and load balancing. Args: - centers: Array of shape (n_elements, 3) containing the centroid + centers: torch.Tensor of shape (n_elements, 3) containing the centroid coordinates of each element. - sizes: Array of shape (n_elements,) containing the volume + sizes: torch.Tensor of shape (n_elements,) containing the volume or area of each element used as weights. Returns: - Array of shape (1, 3) containing the x, y, z coordinates of the center of mass. + torch.Tensor of shape (1, 3) containing the x, y, z coordinates of the center of mass. Raises: ValueError: If centers and sizes have incompatible shapes. @@ -111,7 +77,7 @@ def normalize( ensure numerical stability and faster convergence. Args: - field: Input field array to be normalized. + field: Input field tensor to be normalized. max_val: Maximum values for normalization, can be scalar or array. If None, computed from the field data. min_val: Minimum values for normalization, can be scalar or array. @@ -136,9 +102,9 @@ def normalize( """ if max_val is None: - max_val = field.max(axis=0, keepdim=True) + max_val, _ = field.max(axis=0, keepdim=True) if min_val is None: - min_val = field.min(axis=0, keepdim=True) + min_val, _ = field.min(axis=0, keepdim=True) field_range = max_val - min_val return 2.0 * (field - min_val) / field_range - 1.0 @@ -183,7 +149,7 @@ def standardize( when the data follows a normal distribution. Args: - field: Input field array to be standardized. + field: Input field tensor to be standardized. mean: Mean values for standardization. If None, computed from field data. std: Standard deviation values for standardization. If None, computed from field data. @@ -242,10 +208,10 @@ def unstandardize( def calculate_normal_positional_encoding( - coordinates_a: ArrayType, - coordinates_b: ArrayType | None = None, + coordinates_a: torch.Tensor, + coordinates_b: torch.Tensor | None = None, cell_dimensions: Sequence[float] = (1.0, 1.0, 1.0), -) -> ArrayType: +) -> torch.Tensor: """Calculate sinusoidal positional encoding for 3D coordinates. This function computes transformer-style positional encodings for 3D spatial @@ -254,51 +220,51 @@ def calculate_normal_positional_encoding( unique representations for each spatial position. Args: - coordinates_a: Primary coordinates array of shape (n_points, 3). + coordinates_a: Primary coordinates tensor of shape (n_points, 3). coordinates_b: Optional secondary coordinates for computing relative positions. If provided, the encoding is computed for (coordinates_a - coordinates_b). cell_dimensions: Characteristic length scales for x, y, z dimensions used for normalization. Defaults to unit dimensions. Returns: - Array of shape (n_points, 12) containing positional encodings with + torch.Tensor of shape (n_points, 12) containing positional encodings with 4 encoding dimensions per spatial axis (x, y, z). Examples: - >>> import numpy as np - >>> coords = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]) + >>> import torch + >>> coords = torch.tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]) >>> cell_size = [0.1, 0.1, 0.1] >>> encoding = calculate_normal_positional_encoding(coords, cell_dimensions=cell_size) >>> encoding.shape (2, 12) >>> # Relative positioning example - >>> coords_b = np.array([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]) + >>> coords_b = torch.tensor([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]) >>> encoding_rel = calculate_normal_positional_encoding(coords, coords_b, cell_size) >>> encoding_rel.shape (2, 12) """ dx, dy, dz = cell_dimensions[0], cell_dimensions[1], cell_dimensions[2] - xp = array_type(coordinates_a) if coordinates_b is not None: normals = coordinates_a - coordinates_b - pos_x = xp.asarray(calculate_pos_encoding(normals[:, 0] / dx, d=4)) - pos_y = xp.asarray(calculate_pos_encoding(normals[:, 1] / dy, d=4)) - pos_z = xp.asarray(calculate_pos_encoding(normals[:, 2] / dz, d=4)) - pos_normals = xp.concatenate((pos_x, pos_y, pos_z), axis=0).reshape(-1, 12) + pos_x = torch.cat(calculate_pos_encoding(normals[:, 0] / dx, d=4), dim=-1) + pos_y = torch.cat(calculate_pos_encoding(normals[:, 1] / dy, d=4), dim=-1) + pos_z = torch.cat(calculate_pos_encoding(normals[:, 2] / dz, d=4), dim=-1) + pos_normals = torch.cat((pos_x, pos_y, pos_z), dim=0).reshape(-1, 12) else: normals = coordinates_a - pos_x = xp.asarray(calculate_pos_encoding(normals[:, 0] / dx, d=4)) - pos_y = xp.asarray(calculate_pos_encoding(normals[:, 1] / dy, d=4)) - pos_z = xp.asarray(calculate_pos_encoding(normals[:, 2] / dz, d=4)) - pos_normals = xp.concatenate((pos_x, pos_y, pos_z), axis=0).reshape(-1, 12) + pos_x = torch.cat(calculate_pos_encoding(normals[:, 0] / dx, d=4), dim=-1) + pos_y = torch.cat(calculate_pos_encoding(normals[:, 1] / dy, d=4), dim=-1) + pos_z = torch.cat(calculate_pos_encoding(normals[:, 2] / dz, d=4), dim=-1) + print(pos_x.shape, pos_y.shape, pos_z.shape) + pos_normals = torch.cat((pos_x, pos_y, pos_z), dim=0).reshape(-1, 12) return pos_normals def nd_interpolator( - coordinates: ArrayType, field: ArrayType, grid: ArrayType, k: int = 2 -) -> ArrayType: + coordinates: torch.Tensor, field: torch.Tensor, grid: torch.Tensor, k: int = 2 +) -> torch.Tensor: """Perform n-dimensional interpolation using k-nearest neighbors. This function interpolates field values from scattered points to a regular @@ -306,114 +272,126 @@ def nd_interpolator( fields on regular grids from irregular measurement points. Args: - coordinates: Array of shape (n_points, n_dims) containing source point coordinates. - field: Array of shape (n_points, n_fields) containing field values at source points. - grid: Array of shape (n_field_points, n_dims) containing target grid points for interpolation. + coordinates: torch.Tensor of shape (n_points, n_dims) containing source point coordinates. + field: torch.Tensor of shape (n_points, n_fields) containing field values at source points. + grid: torch.Tensor of shape (n_field_points, n_dims) containing target grid points for interpolation. k: Number of nearest neighbors to use for interpolation. Returns: Interpolated field values at grid points using k-nearest neighbor averaging. - Note: - This function currently uses SciPy's KDTree which only supports CPU arrays. - A future enhancement could add CuML support for GPU acceleration. Examples: - >>> import numpy as np + >>> import torch >>> # Simple 2D interpolation example - >>> coords = np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]) - >>> field_vals = np.array([[1.0], [2.0], [3.0], [4.0]]) - >>> grid_points = np.array([[0.5, 0.5]]) - >>> result = nd_interpolator([coords], field_vals, grid_points) + >>> coords = torch.tensor([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]) + >>> field_vals = torch.tensor([[1.0], [2.0], [3.0], [4.0]]) + >>> grid_points = torch.tensor([[0.5, 0.5]]) + >>> result = nd_interpolator(coords, field_vals, grid_points) >>> result.shape[0] == 1 # One grid point True """ - # TODO - this function should get updated for cuml if using cupy. - kdtree = KDTree(coordinates[0]) - distances, neighbor_indices = kdtree.query(grid, k=k) + neighbor_indices, distances = knn(coordinates, grid, k=k) field_grid = field[neighbor_indices] - field_grid = np.mean(field_grid, axis=1) + field_grid = torch.mean(field_grid, dim=1) return field_grid -def pad(arr: ArrayType, n_points: int, pad_value: float = 0.0) -> ArrayType: - """Pad 2D array with constant values to reach target size. +def pad(arr: torch.Tensor, n_points: int, pad_value: float = 0.0) -> torch.Tensor: + """Pad 2D tensor with constant values to reach target size. - This function extends a 2D array by adding rows filled with a constant - value. It's commonly used to standardize array sizes in batch processing + This function extends a 2D tensor by adding rows filled with a constant + value. It's commonly used to standardize tensor sizes in batch processing for machine learning applications. Args: - arr: Input array of shape (n_points, n_features) to be padded. + arr: Input tensor of shape (n_points, n_features) to be padded. n_points: Target number of points (rows) after padding. pad_value: Constant value used for padding. Defaults to 0.0. Returns: - Padded array of shape (n_points, n_features). If n_points <= arr.shape[0], - returns the original array unchanged. + Padded tensor of shape (n_points, n_features). If n_points <= arr.shape[0], + returns the original tensor unchanged. Examples: - >>> import numpy as np - >>> arr = np.array([[1.0, 2.0], [3.0, 4.0]]) + >>> import torch + >>> arr = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) >>> padded = pad(arr, 4, -1.0) >>> padded.shape (4, 2) - >>> np.array_equal(padded[:2], arr) + >>> torch.allclose(padded[:2], arr) True - >>> bool(np.all(padded[2:] == -1.0)) + >>> bool(torch.all(padded[2:] == -1.0)) True >>> # No padding needed >>> same = pad(arr, 2) - >>> np.array_equal(same, arr) + >>> torch.allclose(same, arr) True """ - xp = array_type(arr) + if n_points <= arr.shape[0]: return arr - arr_pad = pad_value * xp.ones( - (n_points - arr.shape[0], arr.shape[1]), dtype=xp.float32 + n_pad = n_points - arr.shape[0] + arr_padded = torch.nn.functional.pad( + arr, + ( + 0, + 0, + 0, + n_pad, + ), + mode="constant", + value=pad_value, ) - arr_padded = xp.concatenate((arr, arr_pad), axis=0) return arr_padded -def pad_inp(arr: ArrayType, n_points: int, pad_value: float = 0.0) -> ArrayType: - """Pad 3D array with constant values to reach target size. +def pad_inp(arr: torch.Tensor, n_points: int, pad_value: float = 0.0) -> torch.Tensor: + """Pad 3D tensor with constant values to reach target size. - This function extends a 3D array by adding entries along the first dimension + This function extends a 3D tensor by adding entries along the first dimension filled with a constant value. Used for standardizing 3D tensor sizes in batch processing workflows. Args: - arr: Input array of shape (n_points, height, width) to be padded. + arr: Input tensor of shape (n_points, height, width) to be padded. n_points: Target number of points along first dimension after padding. pad_value: Constant value used for padding. Defaults to 0.0. Returns: - Padded array of shape (n_points, height, width). If n_points <= arr.shape[0], - returns the original array unchanged. + Padded tensor of shape (n_points, height, width). If n_points <= arr.shape[0], + returns the original tensor unchanged. Examples: - >>> import numpy as np - >>> arr = np.array([[[1.0, 2.0]], [[3.0, 4.0]]]) + >>> import torch + >>> arr = torch.tensor([[[1.0, 2.0]], [[3.0, 4.0]]]) >>> padded = pad_inp(arr, 4, 0.0) >>> padded.shape (4, 1, 2) - >>> np.array_equal(padded[:2], arr) + >>> torch.allclose(padded[:2], arr) True - >>> bool(np.all(padded[2:] == 0.0)) + >>> bool(torch.all(padded[2:] == 0.0)) True """ - xp = array_type(arr) if n_points <= arr.shape[0]: return arr - arr_pad = pad_value * xp.ones( - (n_points - arr.shape[0], arr.shape[1], arr.shape[2]), dtype=xp.float32 + n_pad = n_points - arr.shape[0] + arr_padded = torch.nn.functional.pad( + arr, + ( + 0, + 0, + 0, + 0, + 0, + n_pad, + ), + mode="constant", + value=pad_value, ) - arr_padded = xp.concatenate((arr, arr_pad), axis=0) return arr_padded @@ -423,9 +401,9 @@ def shuffle_array( weights: torch.Tensor = None, ): """ - Randomly sample points from array without replacement. + Randomly sample points from tensor without replacement. - This function performs random sampling from the input array, selecting + This function performs random sampling from the input tensor, selecting n_points points without replacement. It's commonly used for creating training subsets and data augmentation in machine learning workflows. @@ -435,14 +413,14 @@ def shuffle_array( If the input is larger than that, it will be split and sampled from each chunk. Args: - arr: Input array to sample from, shape (n_points, ...). + points: Input tensor to sample from, shape (n_points, ...). n_points: Number of points to sample. If greater than arr.shape[0], all points are returned. weights: Optional weights for sampling. If None, uniform weights are used. Returns: Tuple containing: - - Sampled array subset + - Sampled tensor subset - Indices of the selected points Examples: @@ -454,7 +432,7 @@ def shuffle_array( (2, 2) >>> indices.shape (2,) - >>> len(np.unique(indices)) == 2 # No duplicates + >>> len(torch.unique(indices)) == 2 # No duplicates True """ @@ -514,62 +492,21 @@ def shuffle_array( return points_selected, idx -# @profile -# def shuffle_array( -# arr: ArrayType, -# n_points: int, -# ) -> tuple[ArrayType, ArrayType]: -# """Randomly sample points from array without replacement. - -# This function performs random sampling from the input array, selecting -# n_points points without replacement. It's commonly used for creating training -# subsets and data augmentation in machine learning workflows. - -# Args: -# arr: Input array to sample from, shape (n_points, ...). -# n_points: Number of points to sample. If greater than arr.shape[0], -# all points are returned. - -# Returns: -# Tuple containing: -# - Sampled array subset -# - Indices of the selected points - -# Examples: -# >>> import numpy as np -# >>> np.random.seed(42) # For reproducible results -# >>> data = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) -# >>> subset, indices = shuffle_array(data, 2) -# >>> subset.shape -# (2, 2) -# >>> indices.shape -# (2,) -# >>> len(np.unique(indices)) == 2 # No duplicates -# True -# """ -# xp = array_type(arr) -# if n_points > arr.shape[0]: -# # If asking too many points, truncate the ask but still shuffle. -# n_points = arr.shape[0] -# idx = xp.random.choice(arr.shape[0], size=n_points, replace=False) -# return arr[idx], idx - - def shuffle_array_without_sampling( arr: torch.Tensor, ) -> tuple[torch.Tensor, torch.Tensor]: - """Shuffle array order without changing the number of elements. + """Shuffle tensor order without changing the number of elements. - This function reorders all elements in the array randomly while preserving + This function reorders all elements in the tensor randomly while preserving all data points. It's useful for randomizing data order before training while maintaining the complete dataset. Args: - arr: Input array to shuffle, shape (n_points, ...). + arr: Input tensor to shuffle, shape (n_points, ...). Returns: Tuple containing: - - Shuffled array with same shape as input + - Shuffled tensor with same shape as input - Permutation indices used for shuffling Examples: @@ -636,7 +573,7 @@ def get_filenames(filepath: str | Path, exclude_dirs: bool = False) -> list[str] return filenames -def calculate_pos_encoding(nx: ArrayType, d: int = 8) -> list[ArrayType]: +def calculate_pos_encoding(nx: torch.Tensor, d: int = 8) -> list[torch.Tensor]: """Calculate sinusoidal positional encoding for transformer architectures. This function computes positional encodings using alternating sine and cosine @@ -648,12 +585,12 @@ def calculate_pos_encoding(nx: ArrayType, d: int = 8) -> list[ArrayType]: d: Encoding dimensionality. Must be even number. Defaults to 8. Returns: - List of d arrays containing alternating sine and cosine encodings. + List of d tensors containing alternating sine and cosine encodings. Each pair (sin, cos) uses progressively lower frequencies. Examples: - >>> import numpy as np - >>> positions = np.array([0.0, 1.0, 2.0]) + >>> import torch + >>> positions = torch.tensor([0.0, 1.0, 2.0]) >>> encodings = calculate_pos_encoding(positions, d=4) >>> len(encodings) 4 @@ -661,10 +598,9 @@ def calculate_pos_encoding(nx: ArrayType, d: int = 8) -> list[ArrayType]: True """ vec = [] - xp = array_type(nx) for k in range(int(d / 2)): - vec.append(xp.sin(nx / 10000 ** (2 * k / d))) - vec.append(xp.cos(nx / 10000 ** (2 * k / d))) + vec.append(torch.sin(nx / 10000 ** (2 * k / d))) + vec.append(torch.cos(nx / 10000 ** (2 * k / d))) return vec @@ -715,7 +651,7 @@ def create_grid( resolution: Number of grid points [nx, ny, nz] in each dimension. Returns: - Grid array of shape (nx, ny, nz, 3) containing 3D coordinates for each + Grid tensor of shape (nx, ny, nz, 3) containing 3D coordinates for each grid point. The last dimension contains [x, y, z] coordinates. Examples: @@ -754,7 +690,7 @@ def create_grid( def mean_std_sampling( - field: ArrayType, mean: ArrayType, std: ArrayType, tolerance: float = 3.0 + field: torch.Tensor, mean: torch.Tensor, std: torch.Tensor, tolerance: float = 3.0 ) -> list[int]: """Identify outlier points based on statistical distance from mean. @@ -763,7 +699,7 @@ def mean_std_sampling( It's useful for data cleaning and identifying regions of interest in CFD data. Args: - field: Input field array of shape (n_points, n_components). + field: Input field tensor of shape (n_points, n_components). mean: Mean values for each field component, shape (n_components,). std: Standard deviation for each component, shape (n_components,). tolerance: Number of standard deviations to use as outlier threshold. @@ -773,20 +709,20 @@ def mean_std_sampling( List of indices identifying outlier points that exceed the statistical threshold. Examples: - >>> import numpy as np + >>> import torch >>> # Create test data with outliers - >>> field = np.array([[1.0], [2.0], [3.0], [10.0]]) # 10.0 is outlier - >>> field_mean = np.array([2.0]) - >>> field_std = np.array([1.0]) + >>> field = torch.tensor([[1.0], [2.0], [3.0], [10.0]]) # 10.0 is outlier + >>> field_mean = torch.tensor([2.0]) + >>> field_std = torch.tensor([1.0]) >>> outliers = mean_std_sampling(field, field_mean, field_std, 2.0) >>> 3 in outliers # Index 3 (value 10.0) should be detected as outlier True """ - xp = array_type(field) + idx_all = [] for v in range(field.shape[-1]): fv = field[:, v] - idx = xp.where( + idx = torch.where( (fv > mean[v] + tolerance * std[v]) | (fv < mean[v] - tolerance * std[v]) ) if len(idx[0]) != 0: @@ -830,16 +766,16 @@ def dict_to_device( def area_weighted_shuffle_array( - arr: ArrayType, n_points: int, area: ArrayType, area_factor: float = 1.0 -) -> tuple[ArrayType, ArrayType]: - """Perform area-weighted random sampling from array. + arr: torch.Tensor, n_points: int, area: torch.Tensor, area_factor: float = 1.0 +) -> tuple[torch.Tensor, torch.Tensor]: + """Perform area-weighted random sampling from tensor. - This function samples points from an array with probability proportional to + This function samples points from a tensor with probability proportional to their associated area weights. This is particularly useful in CFD applications where larger cells or surface elements should have higher sampling probability. Args: - arr: Input array to sample from, shape (n_points, ...). + arr: Input tensor to sample from, shape (n_points, ...). n_points: Number of points to sample. If greater than arr.shape[0], samples all available points. area: Area weights for each point, shape (n_points,). Larger values @@ -850,19 +786,18 @@ def area_weighted_shuffle_array( Returns: Tuple containing: - - Sampled array subset weighted by area + - Sampled tensor subset weighted by area - Indices of the selected points Note: - For GPU arrays (CuPy), the sampling is performed on CPU due to memory - efficiency considerations. The Alias method could be implemented for - future GPU acceleration. + For GPU tensors, the sampling is performed on the current device. + The sampling uses torch.multinomial for efficient weighted sampling. Examples: - >>> import numpy as np - >>> np.random.seed(42) # For reproducible results - >>> mesh_data = np.array([[1.0], [2.0], [3.0], [4.0]]) - >>> cell_areas = np.array([0.1, 0.1, 0.1, 10.0]) # Last point has much larger area + >>> import torch + >>> torch.manual_seed(42) # For reproducible results + >>> mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]]) + >>> cell_areas = torch.tensor([0.1, 0.1, 0.1, 10.0]) # Last point has much larger area >>> subset, indices = area_weighted_shuffle_array(mesh_data, 2, cell_areas) >>> subset.shape (2, 1) @@ -874,40 +809,28 @@ def area_weighted_shuffle_array( >>> # Use higher area_factor for stronger bias toward large areas >>> subset_biased, _ = area_weighted_shuffle_array(mesh_data, 2, cell_areas, area_factor=2.0) """ - xp = array_type(arr) + # Calculate area-weighted probabilities sampling_probabilities = area**area_factor - sampling_probabilities /= xp.sum(sampling_probabilities) # Normalize to sum to 1 - - # Ensure we don't request more points than available - n_points = min(n_points, arr.shape[0]) - - # Create index array for all available points - point_indices = xp.arange(arr.shape[0]) - - if xp != np: - point_indices = point_indices.get() - sampling_probabilities = sampling_probabilities.get() - - selected_indices = np.random.choice( - point_indices, n_points, p=sampling_probabilities - ) - selected_indices = xp.asarray(selected_indices) + sampling_probabilities /= sampling_probabilities.sum() # Normalize to sum to 1 - return arr[selected_indices], selected_indices + return shuffle_array(arr, n_points, sampling_probabilities) def solution_weighted_shuffle_array( - arr: ArrayType, n_points: int, solution: ArrayType, scaling_factor: float = 1.0 -) -> tuple[ArrayType, ArrayType]: - """Perform solution-weighted random sampling from array. + arr: torch.Tensor, + n_points: int, + solution: torch.Tensor, + scaling_factor: float = 1.0, +) -> tuple[torch.Tensor, torch.Tensor]: + """Perform solution-weighted random sampling from tensor. - This function samples points from an array with probability proportional to + This function samples points from a tensor with probability proportional to their associated solution weights. This is particularly useful in CFD applications where larger cells or surface elements should have higher sampling probability. Args: - arr: Input array to sample from, shape (n_points, ...). + arr: Input tensor to sample from, shape (n_points, ...). n_points: Number of points to sample. If greater than arr.shape[0], samples all available points. solution: Solution weights for each point, shape (n_points,). Larger values @@ -918,19 +841,18 @@ def solution_weighted_shuffle_array( Returns: Tuple containing: - - Sampled array subset weighted by solution fields + - Sampled tensor subset weighted by solution fields - Indices of the selected points Note: - For GPU arrays (CuPy), the sampling is performed on CPU due to memory - efficiency considerations. The Alias method could be implemented for - future GPU acceleration. + For GPU tensors, the sampling is performed on the current device. + The sampling uses torch.multinomial for efficient weighted sampling. Examples: - >>> import numpy as np - >>> np.random.seed(42) # For reproducible results - >>> mesh_data = np.array([[1.0], [2.0], [3.0], [4.0]]) - >>> solution = np.array([0.1, 0.1, 0.1, 10.0]) # Last point has much larger solution field + >>> import torch + >>> torch.manual_seed(42) # For reproducible results + >>> mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]]) + >>> solution = torch.tensor([0.1, 0.1, 0.1, 10.0]) # Last point has much larger solution field >>> subset, indices = solution_weighted_shuffle_array(mesh_data, 2, solution) >>> subset.shape (2, 1) @@ -942,24 +864,9 @@ def solution_weighted_shuffle_array( >>> # Use higher scaling_factor for stronger bias toward large solution fields >>> subset_biased, _ = solution_weighted_shuffle_array(mesh_data, 2, solution, scaling_factor=2.0) """ - xp = array_type(arr) + # Calculate solution-weighted probabilities sampling_probabilities = solution**scaling_factor - sampling_probabilities /= xp.sum(sampling_probabilities) # Normalize to sum to 1 - - # Ensure we don't request more points than available - n_points = min(n_points, arr.shape[0]) - - # Create index array for all available points - point_indices = xp.arange(arr.shape[0]) - - if xp != np: - point_indices = point_indices.get() - sampling_probabilities = sampling_probabilities.get() - - selected_indices = np.random.choice( - point_indices, n_points, p=sampling_probabilities - ) - selected_indices = xp.asarray(selected_indices) + sampling_probabilities /= sampling_probabilities.sum() # Normalize to sum to 1 - return arr[selected_indices], selected_indices + return shuffle_array(arr, n_points, sampling_probabilities) diff --git a/physicsnemo/utils/neighbors/knn/_cuml_impl.py b/physicsnemo/utils/neighbors/knn/_cuml_impl.py index 10d20ce1f5..72546cf6a7 100644 --- a/physicsnemo/utils/neighbors/knn/_cuml_impl.py +++ b/physicsnemo/utils/neighbors/knn/_cuml_impl.py @@ -28,12 +28,21 @@ def knn_impl( points: torch.Tensor, queries: torch.Tensor, k: int = 3 ) -> tuple[torch.Tensor, torch.Tensor]: + # Create a cuml handle to ensure we use the right stream: + torch_stream = torch.cuda.current_stream() + + # Get the raw CUDA stream pointer (as an integer) + ptr = torch_stream.cuda_stream + + # Build a cuML handle with that stream + handle = cuml.Handle(stream=ptr) + # Use dlpack to move the data without copying between pytorch and cuml: points = cp.from_dlpack(points) queries = cp.from_dlpack(queries) # Construct the knn: - knn = cuml.neighbors.NearestNeighbors(n_neighbors=k) + knn = cuml.neighbors.NearestNeighbors(n_neighbors=k, handle=handle) # First pass partitions everything in points to make lookups fast knn.fit(points) diff --git a/physicsnemo/utils/neighbors/radius_search/_warp_impl.py b/physicsnemo/utils/neighbors/radius_search/_warp_impl.py index 9b15b6816f..997f95d3fd 100644 --- a/physicsnemo/utils/neighbors/radius_search/_warp_impl.py +++ b/physicsnemo/utils/neighbors/radius_search/_warp_impl.py @@ -278,11 +278,6 @@ def radius_search_impl( if points.device != queries.device: raise ValueError("points and queries must be on the same device") - # We're in the warp-backended regime. So, the first thing to do is to convert these torch tensors to warp - # These are readonly in warp, allocated with pytorch. - wp_points = wp.from_torch(points, dtype=wp.vec3) - wp_queries = wp.from_torch(queries, dtype=wp.vec3, return_ctype=True) - N_queries = len(queries) # Compute follows data. @@ -297,92 +292,104 @@ def radius_search_impl( wp_launch_stream = None wp_launch_device = "cpu" # CPUs have no streams - # We need to create a hash grid: - grid = wp.HashGrid(dim_x=128, dim_y=128, dim_z=128, device=wp_points.device) - grid.reserve(N_queries) - grid.build(points=wp_points, radius=0.5 * radius) - - # Now, the situations diverge based on max_points. - - if max_points is None: - total_count, wp_offset = count_neighbors( - grid, - wp_points, - wp_queries, - wp_launch_device, - wp_launch_stream, - radius, - N_queries, - ) + with wp.ScopedStream(wp_launch_stream): + # We're in the warp-backended regime. So, the first thing to do is to convert these torch tensors to warp + # These are readonly in warp, allocated with pytorch. + wp_points = wp.from_torch(points, dtype=wp.vec3) + wp_queries = wp.from_torch(queries, dtype=wp.vec3, return_ctype=True) - if not total_count < 2**31 - 1: - raise RuntimeError( - f"Total found neighbors is too large: {total_count} > 2**31 - 1" - ) + # We need to create a hash grid: + grid = wp.HashGrid(dim_x=128, dim_y=128, dim_z=128, device=wp_points.device) + grid.reserve(N_queries) + grid.build(points=wp_points, radius=0.5 * radius) - return gather_neighbors( - grid, - points.device, - wp_points, - wp_queries, - wp_offset, - wp_launch_device, - wp_launch_stream, - radius, - N_queries, - return_dists, - return_points, - total_count, - ) + # Now, the situations diverge based on max_points. - else: - # With a fixed number of output points, we have no need for a second kernel. - indices = torch.full( - (N_queries, max_points), 0, dtype=torch.int32, device=points.device - ) - if return_dists: - distances = torch.zeros( - (N_queries, max_points), dtype=torch.float32, device=points.device + if max_points is None: + total_count, wp_offset = count_neighbors( + grid, + wp_points, + wp_queries, + wp_launch_device, + wp_launch_stream, + radius, + N_queries, ) - else: - distances = torch.empty(0, dtype=torch.float32, device=points.device) - num_neighbors = torch.zeros( - (N_queries,), dtype=torch.int32, device=points.device - ) - if return_points: - points = torch.zeros( - (len(queries), max_points, 3), - dtype=torch.float32, - device=points.device, - ) - else: - points = torch.empty( - (0, max_points, 3), dtype=torch.float32, device=points.device - ) - # This kernel selects up to max_points hits per query. - # It is not necessarily deterministic. - # If the number of matches > max_points, you may get different results. + if not total_count < 2**31 - 1: + raise RuntimeError( + f"Total found neighbors is too large: {total_count} > 2**31 - 1" + ) - wp.launch( - kernel=radius_search_limited_select, - dim=N_queries, - inputs=[ - grid.id, + return gather_neighbors( + grid, + points.device, wp_points, wp_queries, - max_points, + wp_offset, + wp_launch_device, + wp_launch_stream, radius, - wp.from_torch(indices, return_ctype=True), - wp.from_torch(num_neighbors, return_ctype=True), + N_queries, return_dists, - wp.from_torch(distances, return_ctype=True), return_points, - wp.from_torch(points, return_ctype=True) if return_points else None, - ], - stream=wp_launch_stream, - device=wp_launch_device, - ) + total_count, + ) + + else: + # With a fixed number of output points, we have no need for a second kernel. + indices = torch.full( + (N_queries, max_points), 0, dtype=torch.int32, device=points.device + ) + if return_dists: + distances = torch.zeros( + (N_queries, max_points), + dtype=torch.float32, + device=points.device, + ) + else: + distances = torch.empty( + 0, dtype=torch.float32, device=points.device + ) + num_neighbors = torch.zeros( + (N_queries,), dtype=torch.int32, device=points.device + ) + + if return_points: + points = torch.zeros( + (len(queries), max_points, 3), + dtype=torch.float32, + device=points.device, + ) + else: + points = torch.empty( + (0, max_points, 3), dtype=torch.float32, device=points.device + ) + # This kernel selects up to max_points hits per query. + # It is not necessarily deterministic. + # If the number of matches > max_points, you may get different results. + + wp.launch( + kernel=radius_search_limited_select, + dim=N_queries, + inputs=[ + grid.id, + wp_points, + wp_queries, + max_points, + radius, + wp.from_torch(indices, return_ctype=True), + wp.from_torch(num_neighbors, return_ctype=True), + return_dists, + wp.from_torch(distances, return_ctype=True), + return_points, + wp.from_torch(points, return_ctype=True) + if return_points + else None, + ], + stream=wp_launch_stream, + device=wp_launch_device, + ) # Handle the matrix of return values: return indices, points, distances, num_neighbors diff --git a/test/utils/test_domino_utils.py b/test/utils/test_domino_utils.py index 8a0e03637b..a9e1166640 100644 --- a/test/utils/test_domino_utils.py +++ b/test/utils/test_domino_utils.py @@ -21,7 +21,10 @@ module to ensure that the documented examples work correctly. """ -import numpy as np +import math + +import pytest +import torch from physicsnemo.utils.domino.utils import ( area_weighted_shuffle_array, @@ -45,67 +48,70 @@ def test_calculate_center_of_mass(): """Test calculate_center_of_mass function with docstring example.""" - centers = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]) - sizes = np.array([1.0, 2.0, 3.0]) + centers = torch.tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]) + sizes = torch.tensor([1.0, 2.0, 3.0]) com = calculate_center_of_mass(centers, sizes) - expected = np.array([[4.0 / 3.0, 4.0 / 3.0, 4.0 / 3.0]]) - assert np.allclose(com, expected) + expected = torch.tensor([[4.0 / 3.0, 4.0 / 3.0, 4.0 / 3.0]]) + assert torch.allclose(com, expected) def test_normalize(): """Test normalize function with docstring examples.""" # Example 1: With explicit min/max - field = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - normalized = normalize(field, 5.0, 1.0) - expected = np.array([-1.0, -0.5, 0.0, 0.5, 1.0]) - assert np.allclose(normalized, expected) + field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]) + normalized = normalize(field, max_val=5.0, min_val=1.0) + expected = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0]) + assert torch.allclose(normalized, expected) # Example 2: Auto-compute min/max normalized_auto = normalize(field) - expected_auto = np.array([-1.0, -0.5, 0.0, 0.5, 1.0]) - assert np.allclose(normalized_auto, expected_auto) + expected_auto = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0]) + assert torch.allclose(normalized_auto, expected_auto) def test_unnormalize(): """Test unnormalize function with docstring example.""" - normalized = np.array([-1.0, -0.5, 0.0, 0.5, 1.0]) + normalized = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0]) original = unnormalize(normalized, 5.0, 1.0) - expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - assert np.allclose(original, expected) + expected = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]) + assert torch.allclose(original, expected) def test_standardize(): """Test standardize function with docstring examples.""" # Example 1: With explicit mean/std - field = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - standardized = standardize(field, 3.0, np.sqrt(2.5)) - expected = np.array([-1.265, -0.632, 0.0, 0.632, 1.265]) - assert np.allclose(standardized, expected, atol=1e-3) + field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]) + standardized = standardize(field, 3.0, math.sqrt(2.5)) + expected = torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265]) + assert torch.allclose(standardized, expected, atol=1e-3) # Example 2: Auto-compute mean/std standardized_auto = standardize(field) - assert np.allclose(np.mean(standardized_auto), 0.0) - assert np.allclose(np.std(standardized_auto, ddof=0), 1.0) + assert torch.allclose(torch.mean(standardized_auto), torch.tensor(0.0)) + assert torch.allclose(torch.std(standardized_auto, correction=1), torch.tensor(1.0)) def test_unstandardize(): """Test unstandardize function with docstring example.""" - standardized = np.array([-1.265, -0.632, 0.0, 0.632, 1.265]) - original = unstandardize(standardized, 3.0, np.sqrt(2.5)) - expected = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - assert np.allclose(original, expected, atol=1e-3) + standardized = torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265]) + original = unstandardize(standardized, 3.0, math.sqrt(2.5)) + expected = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]) + assert torch.allclose(original, expected, atol=1e-3) -def test_calculate_normal_positional_encoding(): +@pytest.mark.parametrize("relative", [True, False]) +def test_calculate_normal_positional_encoding(relative): """Test calculate_normal_positional_encoding function with docstring examples.""" # Example 1: Basic coordinates - coords = np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]) + coords = torch.tensor([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]) cell_size = [0.1, 0.1, 0.1] - encoding = calculate_normal_positional_encoding(coords, cell_dimensions=cell_size) - assert encoding.shape == (2, 12) # Example 2: Relative positioning - coords_b = np.array([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]) + if relative: + coords_b = torch.tensor([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]) + else: + coords_b = None + encoding_rel = calculate_normal_positional_encoding(coords, coords_b, cell_size) assert encoding_rel.shape == (2, 12) @@ -113,9 +119,9 @@ def test_calculate_normal_positional_encoding(): def test_nd_interpolator(): """Test nd_interpolator function with docstring example.""" # Simple 2D interpolation example - coords = np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]) - field_vals = np.array([[1.0], [2.0], [3.0], [4.0]]) - grid_points = np.array([[0.5, 0.5]]) + coords = torch.tensor([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]) + field_vals = torch.tensor([[1.0], [2.0], [3.0], [4.0]]) + grid_points = torch.tensor([[0.5, 0.5]]) result = nd_interpolator([coords], field_vals, grid_points) assert result.shape[0] == 1 # One grid point @@ -123,49 +129,49 @@ def test_nd_interpolator(): def test_pad(): """Test pad function with docstring examples.""" # Example 1: Padding needed - arr = np.array([[1.0, 2.0], [3.0, 4.0]]) + arr = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) padded = pad(arr, 4, -1.0) assert padded.shape == (4, 2) - assert np.array_equal(padded[:2], arr) - assert bool(np.all(padded[2:] == -1.0)) + assert torch.allclose(padded[:2], arr) + assert bool(torch.all(padded[2:] == -1.0)) # Example 2: No padding needed same = pad(arr, 2) - assert np.array_equal(same, arr) + assert torch.allclose(same, arr) def test_pad_inp(): """Test pad_inp function with docstring example.""" - arr = np.array([[[1.0, 2.0]], [[3.0, 4.0]]]) + arr = torch.tensor([[[1.0, 2.0]], [[3.0, 4.0]]]) padded = pad_inp(arr, 4, 0.0) assert padded.shape == (4, 1, 2) - assert np.array_equal(padded[:2], arr) - assert bool(np.all(padded[2:] == 0.0)) + assert torch.allclose(padded[:2], arr) + assert bool(torch.all(padded[2:] == 0.0)) def test_shuffle_array(): """Test shuffle_array function with docstring example.""" - np.random.seed(42) # For reproducible results - data = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) + torch.manual_seed(42) # For reproducible results + data = torch.tensor([[1, 2], [3, 4], [5, 6], [7, 8]]) subset, indices = shuffle_array(data, 2) assert subset.shape == (2, 2) assert indices.shape == (2,) - assert len(np.unique(indices)) == 2 # No duplicates + assert len(torch.unique(indices)) == 2 # No duplicates def test_shuffle_array_without_sampling(): """Test shuffle_array_without_sampling function with docstring example.""" - np.random.seed(42) # For reproducible results - data = np.array([[1], [2], [3], [4]]) + torch.manual_seed(42) # For reproducible results + data = torch.tensor([[1], [2], [3], [4]]) shuffled, indices = shuffle_array_without_sampling(data) assert shuffled.shape == (4, 1) assert indices.shape == (4,) - assert set(indices) == set(range(4)) # All original indices present + assert set(indices.tolist()) == set(range(4)) # All original indices present def test_calculate_pos_encoding(): """Test calculate_pos_encoding function with docstring example.""" - positions = np.array([0.0, 1.0, 2.0]) + positions = torch.tensor([0.0, 1.0, 2.0]) encodings = calculate_pos_encoding(positions, d=4) assert len(encodings) == 4 assert all(enc.shape == (3,) for enc in encodings) @@ -182,30 +188,30 @@ def test_combine_dict(): def test_create_grid(): """Test create_grid function with docstring example.""" - min_bounds = np.array([0.0, 0.0, 0.0]) - max_bounds = np.array([1.0, 1.0, 1.0]) - grid_res = np.array([2, 2, 2]) + min_bounds = torch.tensor([0.0, 0.0, 0.0]) + max_bounds = torch.tensor([1.0, 1.0, 1.0]) + grid_res = torch.tensor([2, 2, 2]) grid = create_grid(max_bounds, min_bounds, grid_res) assert grid.shape == (2, 2, 2, 3) - assert np.allclose(grid[0, 0, 0], [0.0, 0.0, 0.0]) - assert np.allclose(grid[1, 1, 1], [1.0, 1.0, 1.0]) + assert torch.allclose(grid[0, 0, 0], torch.tensor([0.0, 0.0, 0.0])) + assert torch.allclose(grid[1, 1, 1], torch.tensor([1.0, 1.0, 1.0])) def test_mean_std_sampling(): """Test mean_std_sampling function with docstring example.""" # Create test data with outliers - field = np.array([[1.0], [2.0], [3.0], [10.0]]) # 10.0 is outlier - field_mean = np.array([2.0]) - field_std = np.array([1.0]) + field = torch.tensor([[1.0], [2.0], [3.0], [10.0]]) # 10.0 is outlier + field_mean = torch.tensor([2.0]) + field_std = torch.tensor([1.0]) outliers = mean_std_sampling(field, field_mean, field_std, 2.0) assert 3 in outliers # Index 3 (value 10.0) should be detected as outlier def test_area_weighted_shuffle_array(): """Test area_weighted_shuffle_array function with docstring example.""" - np.random.seed(42) # For reproducible results - mesh_data = np.array([[1.0], [2.0], [3.0], [4.0]]) - cell_areas = np.array([0.1, 0.1, 0.1, 10.0]) # Last point has much larger area + torch.manual_seed(42) # For reproducible results + mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]]) + cell_areas = torch.tensor([0.1, 0.1, 0.1, 10.0]) # Last point has much larger area subset, indices = area_weighted_shuffle_array(mesh_data, 2, cell_areas) assert subset.shape == (2, 1) assert indices.shape == (2,) From 45789759bd4f56d65db7cbc83a69712d07608998 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 3 Sep 2025 09:49:53 -0700 Subject: [PATCH 14/98] update training script for new datapipe --- .../external_aerodynamics/domino/src/train.py | 144 +++++++++--------- 1 file changed, 76 insertions(+), 68 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 96e30b58e7..3311083e04 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -41,7 +41,7 @@ from hydra.utils import to_absolute_path from omegaconf import DictConfig, OmegaConf import torch.distributed as dist -from torch.cuda.amp import GradScaler, autocast +from torch.amp import GradScaler, autocast from torch.nn.parallel import DistributedDataParallel from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler @@ -54,7 +54,7 @@ from physicsnemo.launch.utils import load_checkpoint, save_checkpoint from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper -from physicsnemo.datapipes.cae.domino_datapipe import ( +from physicsnemo.datapipes.cae.domino_datapipe2 import ( DoMINODataPipe, compute_scaling_factors, create_domino_dataset, @@ -73,7 +73,7 @@ from physicsnemo.utils.profiling import profile, Profiler -# Profiler().enable("line_profiler") +# Profiler().enable("torch") # Profiler().initialize() @@ -620,8 +620,8 @@ def validation_step( with torch.no_grad(): for i_batch, sample_batched in enumerate(dataloader): sampled_batched = dict_to_device(sample_batched, device) - - with autocast(enabled=True): + print(f"validation i batch {i_batch}") + with autocast("cuda", enabled=True): if add_physics_loss: prediction_vol, prediction_surf = model( sampled_batched, return_volume_neighbors=True @@ -680,70 +680,75 @@ def train_epoch( gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) start_time = time.perf_counter() - for i_batch, sample_batched in enumerate(dataloader): - sampled_batched = dict_to_device(sample_batched, device) - - if add_physics_loss: - autocast_enabled = False - else: - autocast_enabled = True - with autocast(enabled=autocast_enabled): - with nvtx.range("Model Forward Pass"): - if add_physics_loss: - prediction_vol, prediction_surf = model( - sampled_batched, return_volume_neighbors=True - ) - else: - prediction_vol, prediction_surf = model(sampled_batched) - - loss, loss_dict = compute_loss_dict( - prediction_vol, - prediction_surf, - sampled_batched, - loss_fn_type, - integral_scaling_factor, - surf_loss_scaling, - vol_loss_scaling, - first_deriv, - eqn, - bounding_box, - vol_factors, - add_physics_loss, - ) + with Profiler(): + for i_batch, sample_batched in enumerate(dataloader): + sampled_batched = dict_to_device(sample_batched, device) - loss = loss / loss_interval - scaler.scale(loss).backward() + if add_physics_loss: + autocast_enabled = False + else: + autocast_enabled = True + with autocast("cuda", enabled=autocast_enabled): + with nvtx.range("Model Forward Pass"): + if add_physics_loss: + prediction_vol, prediction_surf = model( + sampled_batched, return_volume_neighbors=True + ) + else: + prediction_vol, prediction_surf = model(sampled_batched) - if ((i_batch + 1) % loss_interval == 0) or (i_batch + 1 == len(dataloader)): - scaler.step(optimizer) - scaler.update() - optimizer.zero_grad() + loss, loss_dict = compute_loss_dict( + prediction_vol, + prediction_surf, + sampled_batched, + loss_fn_type, + integral_scaling_factor, + surf_loss_scaling, + vol_loss_scaling, + first_deriv, + eqn, + bounding_box, + vol_factors, + add_physics_loss, + ) - # Gather data and report - running_loss += loss.item() - elapsed_time = time.perf_counter() - start_time - start_time = time.perf_counter() - gpu_end_info = nvmlDeviceGetMemoryInfo(gpu_handle) - gpu_memory_used = gpu_end_info.used / (1024**3) - gpu_memory_delta = (gpu_end_info.used - gpu_start_info.used) / (1024**3) - - logging_string = f"Device {device}, batch processed: {i_batch + 1}\n" - # Format the loss dict into a string: - loss_string = ( - " " - + "\t".join([f"{key.replace('loss_', ''):<10}" for key in loss_dict.keys()]) - + "\n" - ) - loss_string += ( - " " + f"\t".join([f"{l.item():<10.3e}" for l in loss_dict.values()]) + "\n" - ) + loss = loss / loss_interval + scaler.scale(loss).backward() + + if ((i_batch + 1) % loss_interval == 0) or (i_batch + 1 == len(dataloader)): + scaler.step(optimizer) + scaler.update() + optimizer.zero_grad() + + # Gather data and report + running_loss += loss.item() + elapsed_time = time.perf_counter() - start_time + start_time = time.perf_counter() + gpu_end_info = nvmlDeviceGetMemoryInfo(gpu_handle) + gpu_memory_used = gpu_end_info.used / (1024**3) + gpu_memory_delta = (gpu_end_info.used - gpu_start_info.used) / (1024**3) + + logging_string = f"Device {device}, batch processed: {i_batch + 1}\n" + # Format the loss dict into a string: + loss_string = ( + " " + + "\t".join( + [f"{key.replace('loss_', ''):<10}" for key in loss_dict.keys()] + ) + + "\n" + ) + loss_string += ( + " " + + f"\t".join([f"{l.item():<10.3e}" for l in loss_dict.values()]) + + "\n" + ) - logging_string += loss_string - logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" - logging_string += f" GPU memory delta: {gpu_memory_delta:.3f} Gb\n" - logging_string += f" Time taken: {elapsed_time:.2f} seconds\n" - logger.info(logging_string) - gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) + logging_string += loss_string + logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" + logging_string += f" GPU memory delta: {gpu_memory_delta:.3f} Gb\n" + logging_string += f" Time taken: {elapsed_time:.2f} seconds\n" + logger.info(logging_string) + gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) last_loss = running_loss / (i_batch + 1) # loss per batch if dist.rank == 0: @@ -904,7 +909,7 @@ def main(cfg: DictConfig) -> None: global_features=num_global_features, model_parameters=cfg.model, ).to(dist.device) - model = torch.compile(model, disable=True) # TODO make this configurable + # model = torch.compile(model, fullgraph=True, dynamic=True) # TODO make this configurable # Print model summary (structure and parmeter count). logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n") @@ -999,7 +1004,7 @@ def main(cfg: DictConfig) -> None: model.train(True) epoch_start_time = time.perf_counter() avg_loss = train_epoch( - dataloader=train_dataloader, + dataloader=train_dataset, model=model, optimizer=optimizer, scaler=scaler, @@ -1026,7 +1031,7 @@ def main(cfg: DictConfig) -> None: model.eval() avg_vloss = validation_step( - dataloader=val_dataloader, + dataloader=val_dataset, model=model, device=dist.device, logger=logger, @@ -1088,4 +1093,7 @@ def main(cfg: DictConfig) -> None: if __name__ == "__main__": + # Profiler().enable("torch") + # Profiler().initialize() main() + # Profiler().finalize() From 9a5d8edf353331b42dcddb8914df9ac2fffc3a85 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:07:38 +0000 Subject: [PATCH 15/98] Add abillity to pin memory, optionally. --- physicsnemo/datapipes/cae/domino_datapipe2.py | 7 +++---- physicsnemo/datapipes/cae/drivaer_ml_dataset.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index dcc82d49cd..d74d450fed 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -44,7 +44,6 @@ ) from physicsnemo.distributed import DistributedManager from physicsnemo.utils.domino.utils import ( - ArrayType, calculate_center_of_mass, calculate_normal_positional_encoding, create_grid, @@ -64,8 +63,8 @@ class BoundingBox(Protocol): Type definition for the required format of bounding box dimensions. """ - min: ArrayType - max: ArrayType + min: Sequence + max: Sequence @dataclass @@ -149,7 +148,7 @@ class DoMINODataConfig: volume_factors: Optional[Sequence] = None bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None - grid_resolution: Union[Sequence, ArrayType] = (256, 96, 64) + grid_resolution: Sequence = (256, 96, 64) normalize_coordinates: bool = False sample_in_bbox: bool = False sampling: bool = False diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index 78f9407ebd..bb3c5b7a1a 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -265,6 +265,7 @@ def __init__( keys_to_read: list[str] | None, output_device: torch.device, preload_depth: int = 2, + pin_memory: bool = True, device_mesh: torch.distributed.DeviceMesh | None = None, placements: dict[str, torch.distributed.tensor.Placement] | None = None, consumer_stream: torch.cuda.Stream | None = None, @@ -285,6 +286,8 @@ def __init__( data_dir ) + self.pin_memory = pin_memory + # Check the file names; some can be read well in parallel, while others # are not parallelizable. @@ -360,8 +363,14 @@ def _move_to_gpu( with torch.cuda.stream(self._data_loader_stream): for key in data.keys(): + if self.pin_memory: + result[key] = ( + data[key].pin_memory().to(self.output_device, non_blocking=True) + ) + else: + result[key] = data[key].to(self.output_device, non_blocking=True) # Move to GPU if available - result[key] = data[key].to(self.output_device, non_blocking=True) + # result[key] = data[key].to(self.output_device, non_blocking=True) result[key].record_stream(self.consumer_stream) # Mark the consumer stream: From c57f985216a3ca659470ee25a5827d7c96b2e257 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 5 Sep 2025 14:19:38 +0000 Subject: [PATCH 16/98] Snapshot updates of cleanups and minor fixes --- .../external_aerodynamics/domino/src/loss.py | 587 ++++++++++++++++ .../external_aerodynamics/domino/src/train.py | 638 ++---------------- .../external_aerodynamics/domino/src/utils.py | 74 ++ physicsnemo/datapipes/cae/domino_datapipe2.py | 21 +- 4 files changed, 733 insertions(+), 587 deletions(-) create mode 100644 examples/cfd/external_aerodynamics/domino/src/loss.py create mode 100644 examples/cfd/external_aerodynamics/domino/src/utils.py diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py new file mode 100644 index 0000000000..0d90ab3674 --- /dev/null +++ b/examples/cfd/external_aerodynamics/domino/src/loss.py @@ -0,0 +1,587 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from typing import Literal, Any + +from physicsnemo.utils.domino.utils import unnormalize + +import time +import os +import re +import torch +import torchinfo + +from typing import Literal, Any + +import apex +import numpy as np +import hydra +from hydra.utils import to_absolute_path +from omegaconf import DictConfig, OmegaConf +import torch.distributed as dist +from torch.amp import GradScaler, autocast +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from torch.utils.tensorboard import SummaryWriter +from nvtx import annotate as nvtx_annotate +import torch.cuda.nvtx as nvtx + + +from physicsnemo.distributed import DistributedManager +from physicsnemo.launch.utils import load_checkpoint, save_checkpoint +from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper + +from physicsnemo.datapipes.cae.domino_datapipe2 import ( + DoMINODataPipe, + compute_scaling_factors, + create_domino_dataset, +) +from physicsnemo.models.domino.model import DoMINO +from physicsnemo.utils.domino.utils import * + +# This is included for GPU memory tracking: +from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo +import time + +# Initialize NVML +nvmlInit() + + +from physicsnemo.utils.profiling import profile, Profiler + + +def compute_physics_loss( + output: torch.Tensor, + target: torch.Tensor, + mask: torch.Tensor, + loss_type: Literal["mse", "rmse"], + dims: tuple[int, ...] | None, + first_deriv: torch.nn.Module, + eqn: Any, + bounding_box: torch.Tensor, + vol_factors: torch.Tensor, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Compute physics-based loss terms for Navier-Stokes equations. + + Args: + output: Model output containing (output, coords_neighbors, output_neighbors, neighbors_list) + target: Ground truth values + mask: Mask for valid values + loss_type: Type of loss to calculate ("mse" or "rmse") + dims: Dimensions for loss calculation + first_deriv: First derivative calculator + eqn: Equations + bounding_box: Bounding box for normalization + vol_factors: Volume factors for normalization + + Returns: + Tuple of (data_loss, continuity_loss, momentum_x_loss, momentum_y_loss, momentum_z_loss) + """ + # Physics loss enabled + output, coords_neighbors, output_neighbors, neighbors_list = output + batch_size = output.shape[1] + fields, num_neighbors = output_neighbors.shape[3], output_neighbors.shape[2] + coords_total = coords_neighbors[0, :] + output_total = output_neighbors[0, :] + output_total_unnormalized = unnormalize( + output_total, vol_factors[0], vol_factors[1] + ) + coords_total_unnormalized = unnormalize( + coords_total, bounding_box[0], bounding_box[1] + ) + + # compute first order gradients on all the nodes from the neighbors_list + grad_list = {} + for parent_id, neighbor_ids in neighbors_list.items(): + neighbor_ids_tensor = torch.tensor(neighbor_ids).to( + output_total_unnormalized.device + ) + du = ( + output_total_unnormalized[:, [parent_id]] + - output_total_unnormalized[:, neighbor_ids_tensor] + ) + dv = ( + coords_total_unnormalized[:, [parent_id]] + - coords_total_unnormalized[:, neighbor_ids_tensor] + ) + grads = first_deriv.forward( + coords=None, connectivity_tensor=None, y=None, du=du, dv=dv + ) + grad = torch.cat(grads, dim=1) + grad_list[parent_id] = grad + + # compute second order gradients on only the center node + neighbor_ids_tensor = torch.tensor(neighbors_list[0]).to( + output_total_unnormalized.device + ) + grad_neighbors_center = torch.stack([v for v in grad_list.values()], dim=1) + grad_neighbors_center = grad_neighbors_center.reshape( + batch_size, len(neighbors_list[0]) + 1, -1 + ) + + du = grad_neighbors_center[:, [0]] - grad_neighbors_center[:, neighbor_ids_tensor] + dv = ( + coords_total_unnormalized[:, [0]] + - coords_total_unnormalized[:, neighbor_ids_tensor] + ) + + # second order gradients + ggrads_center = first_deriv.forward( + coords=None, connectivity_tensor=None, y=None, du=du, dv=dv + ) + ggrad_center = torch.cat(ggrads_center, dim=1) + grad_neighbors_center = grad_neighbors_center.reshape( + batch_size, len(neighbors_list[0]) + 1, 3, -1 + ) + + # Get the outputs on the original nodes + fields_center_unnormalized = output_total_unnormalized[:, 0, :] + grad_center = grad_neighbors_center[:, 0, :, :] + grad_grad_uvw_center = ggrad_center[:, :, :9] + + nu = 1.507 * 1e-5 + + dict_mapping = { + "u": fields_center_unnormalized[:, [0]], + "v": fields_center_unnormalized[:, [1]], + "w": fields_center_unnormalized[:, [2]], + "p": fields_center_unnormalized[:, [3]], + "nu": nu + fields_center_unnormalized[:, [4]], + "u__x": grad_center[:, 0, [0]], + "u__y": grad_center[:, 1, [0]], + "u__z": grad_center[:, 2, [0]], + "v__x": grad_center[:, 0, [1]], + "v__y": grad_center[:, 1, [1]], + "v__z": grad_center[:, 2, [1]], + "w__x": grad_center[:, 0, [2]], + "w__y": grad_center[:, 1, [2]], + "w__z": grad_center[:, 2, [2]], + "p__x": grad_center[:, 0, [3]], + "p__y": grad_center[:, 1, [3]], + "p__z": grad_center[:, 2, [3]], + "nu__x": grad_center[:, 0, [4]], + "nu__y": grad_center[:, 1, [4]], + "nu__z": grad_center[:, 2, [4]], + "u__x__x": grad_grad_uvw_center[:, 0, [0]], + "u__x__y": grad_grad_uvw_center[:, 1, [0]], + "u__x__z": grad_grad_uvw_center[:, 2, [0]], + "u__y__x": grad_grad_uvw_center[:, 1, [0]], # same as __x__y + "u__y__y": grad_grad_uvw_center[:, 1, [1]], + "u__y__z": grad_grad_uvw_center[:, 2, [1]], + "u__z__x": grad_grad_uvw_center[:, 2, [0]], # same as __x__z + "u__z__y": grad_grad_uvw_center[:, 2, [1]], # same as __y__z + "u__z__z": grad_grad_uvw_center[:, 2, [2]], + "v__x__x": grad_grad_uvw_center[:, 0, [3]], + "v__x__y": grad_grad_uvw_center[:, 1, [3]], + "v__x__z": grad_grad_uvw_center[:, 2, [3]], + "v__y__x": grad_grad_uvw_center[:, 1, [3]], # same as __x__y + "v__y__y": grad_grad_uvw_center[:, 1, [4]], + "v__y__z": grad_grad_uvw_center[:, 2, [4]], + "v__z__x": grad_grad_uvw_center[:, 2, [3]], # same as __x__z + "v__z__y": grad_grad_uvw_center[:, 2, [4]], # same as __y__z + "v__z__z": grad_grad_uvw_center[:, 2, [5]], + "w__x__x": grad_grad_uvw_center[:, 0, [6]], + "w__x__y": grad_grad_uvw_center[:, 1, [6]], + "w__x__z": grad_grad_uvw_center[:, 2, [6]], + "w__y__x": grad_grad_uvw_center[:, 1, [6]], # same as __x__y + "w__y__y": grad_grad_uvw_center[:, 1, [7]], + "w__y__z": grad_grad_uvw_center[:, 2, [7]], + "w__z__x": grad_grad_uvw_center[:, 2, [6]], # same as __x__z + "w__z__y": grad_grad_uvw_center[:, 2, [7]], # same as __y__z + "w__z__z": grad_grad_uvw_center[:, 2, [8]], + } + continuity = eqn["continuity"].evaluate(dict_mapping)["continuity"] + momentum_x = eqn["momentum_x"].evaluate(dict_mapping)["momentum_x"] + momentum_y = eqn["momentum_y"].evaluate(dict_mapping)["momentum_y"] + momentum_z = eqn["momentum_z"].evaluate(dict_mapping)["momentum_z"] + + # Compute the weights for the equation residuals + weight_continuity = torch.sigmoid(0.5 * (torch.abs(continuity) - 10)) + weight_momentum_x = torch.sigmoid(0.5 * (torch.abs(momentum_x) - 10)) + weight_momentum_y = torch.sigmoid(0.5 * (torch.abs(momentum_y) - 10)) + weight_momentum_z = torch.sigmoid(0.5 * (torch.abs(momentum_z) - 10)) + + weighted_continuity = weight_continuity * torch.abs(continuity) + weighted_momentum_x = weight_momentum_x * torch.abs(momentum_x) + weighted_momentum_y = weight_momentum_y * torch.abs(momentum_y) + weighted_momentum_z = weight_momentum_z * torch.abs(momentum_z) + + # Compute data loss + num = torch.sum(mask * (output - target) ** 2.0, dims) + if loss_type == "rmse": + denom = torch.sum(mask * target**2.0, dims) + else: + denom = torch.sum(mask) + + del coords_total, output_total + torch.cuda.empty_cache() + + return ( + torch.mean(num / denom), + torch.mean(torch.abs(weighted_continuity)), + torch.mean(torch.abs(weighted_momentum_x)), + torch.mean(torch.abs(weighted_momentum_y)), + torch.mean(torch.abs(weighted_momentum_z)), + ) + + +def loss_fn( + output: torch.Tensor, + target: torch.Tensor, + loss_type: Literal["mse", "rmse"], + padded_value: float = -10, +) -> torch.Tensor: + """Calculate mean squared error or root mean squared error with masking for padded values. + + Args: + output: Predicted values from the model + target: Ground truth values + loss_type: Type of loss to calculate ("mse" or "rmse") + padded_value: Value used for padding in the tensor + + Returns: + Calculated loss as a scalar tensor + """ + mask = abs(target - padded_value) > 1e-3 + + if loss_type == "rmse": + dims = (0, 1) + else: + dims = None + + num = torch.sum(mask * (output - target) ** 2.0, dims) + if loss_type == "rmse": + denom = torch.sum(mask * target**2.0, dims) + loss = torch.mean(torch.sqrt(num / denom)) + elif loss_type == "mse": + denom = torch.sum(mask) + loss = torch.mean(num / denom) + else: + raise ValueError(f"Invalid loss type: {loss_type}") + return loss + + +def loss_fn_with_physics( + output: torch.Tensor, + target: torch.Tensor, + loss_type: Literal["mse", "rmse"], + padded_value: float = -10, + first_deriv: torch.nn.Module = None, + eqn: Any = None, + bounding_box: torch.Tensor = None, + vol_factors: torch.Tensor = None, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """Calculate loss with physics-based terms for appropriate equations. + + Args: + output: Predicted values from the model (with neighbor data when physics enabled) + target: Ground truth values + loss_type: Type of loss to calculate ("mse" or "rmse") + padded_value: Value used for padding in the tensor + first_deriv: First derivative calculator + eqn: Equations + bounding_box: Bounding box for normalization + vol_factors: Volume factors for normalization + + Returns: + Tuple of (data_loss, continuity_loss, momentum_x_loss, momentum_y_loss, momentum_z_loss) + """ + mask = abs(target - padded_value) > 1e-3 + + if loss_type == "rmse": + dims = (0, 1) + else: + dims = None + + # Call the physics loss computation function + return compute_physics_loss( + output=output, + target=target, + mask=mask, + loss_type=loss_type, + dims=dims, + first_deriv=first_deriv, + eqn=eqn, + bounding_box=bounding_box, + vol_factors=vol_factors, + ) + + +def loss_fn_surface( + output: torch.Tensor, target: torch.Tensor, loss_type: Literal["mse", "rmse"] +) -> torch.Tensor: + """Calculate loss for surface data by handling scalar and vector components separately. + + Args: + output: Predicted surface values from the model + target: Ground truth surface values + loss_type: Type of loss to calculate ("mse" or "rmse") + + Returns: + Combined scalar and vector loss as a scalar tensor + """ + # Separate the scalar and vector components: + output_scalar, output_vector = torch.split(output, [1, 3], dim=2) + target_scalar, target_vector = torch.split(target, [1, 3], dim=2) + + numerator = torch.mean((output_scalar - target_scalar) ** 2.0) + vector_diff_sq = torch.mean((target_vector - output_vector) ** 2.0, (0, 1)) + if loss_type == "mse": + masked_loss_pres = numerator + masked_loss_ws = torch.sum(vector_diff_sq) + else: + denom = torch.mean((target_scalar) ** 2.0) + masked_loss_pres = numerator / denom + + # Compute the mean diff**2 of the vector component, leave the last dimension: + masked_loss_ws_num = vector_diff_sq + masked_loss_ws_denom = torch.mean((target_vector) ** 2.0, (0, 1)) + masked_loss_ws = torch.sum(masked_loss_ws_num / masked_loss_ws_denom) + + loss = masked_loss_pres + masked_loss_ws + + return loss / 4.0 + + +def loss_fn_area( + output: torch.Tensor, + target: torch.Tensor, + normals: torch.Tensor, + area: torch.Tensor, + area_scaling_factor: float, + loss_type: Literal["mse", "rmse"], +) -> torch.Tensor: + """Calculate area-weighted loss for surface data considering normal vectors. + + Args: + output: Predicted surface values from the model + target: Ground truth surface values + normals: Normal vectors for the surface + area: Area values for surface elements + area_scaling_factor: Scaling factor for area weighting + loss_type: Type of loss to calculate ("mse" or "rmse") + + Returns: + Area-weighted loss as a scalar tensor + """ + area = area * area_scaling_factor + area_scale_factor = area + + # Separate the scalar and vector components. + target_scalar, target_vector = torch.split( + target * area_scale_factor, [1, 3], dim=2 + ) + output_scalar, output_vector = torch.split( + output * area_scale_factor, [1, 3], dim=2 + ) + + # Apply the normals to the scalar components (only [:,:,0]): + normals, _ = torch.split(normals, [1, normals.shape[-1] - 1], dim=2) + target_scalar = target_scalar * normals + output_scalar = output_scalar * normals + + # Compute the mean diff**2 of the scalar component: + masked_loss_pres = torch.mean(((output_scalar - target_scalar) ** 2.0), dim=(0, 1)) + if loss_type == "rmse": + masked_loss_pres /= torch.mean(target_scalar**2.0, dim=(0, 1)) + + # Compute the mean diff**2 of the vector component, leave the last dimension: + masked_loss_ws = torch.mean((target_vector - output_vector) ** 2.0, (0, 1)) + + if loss_type == "rmse": + masked_loss_ws /= torch.mean((target_vector) ** 2.0, (0, 1)) + + # Combine the scalar and vector components: + loss = 0.25 * (masked_loss_pres + torch.sum(masked_loss_ws)) + + return loss + + +def integral_loss_fn( + output, target, area, normals, stream_velocity=None, padded_value=-10 +): + drag_loss = drag_loss_fn( + output, target, area, normals, stream_velocity=stream_velocity, padded_value=-10 + ) + lift_loss = lift_loss_fn( + output, target, area, normals, stream_velocity=stream_velocity, padded_value=-10 + ) + return lift_loss + drag_loss + + +def lift_loss_fn(output, target, area, normals, stream_velocity=None, padded_value=-10): + vel_inlet = stream_velocity # Get this from the dataset + mask = abs(target - padded_value) > 1e-3 + + output_true = target * mask * area * (vel_inlet) ** 2.0 + output_pred = output * mask * area * (vel_inlet) ** 2.0 + + normals = torch.select(normals, 2, 2) + # output_true_0 = output_true[:, :, 0] + output_true_0 = output_true.select(2, 0) + output_pred_0 = output_pred.select(2, 0) + + pres_true = output_true_0 * normals + pres_pred = output_pred_0 * normals + + wz_true = output_true[:, :, -1] + wz_pred = output_pred[:, :, -1] + + masked_pred = torch.mean(pres_pred + wz_pred, (1)) + masked_truth = torch.mean(pres_true + wz_true, (1)) + + loss = (masked_pred - masked_truth) ** 2.0 + loss = torch.mean(loss) + return loss + + +def drag_loss_fn(output, target, area, normals, stream_velocity=None, padded_value=-10): + vel_inlet = stream_velocity # Get this from the dataset + mask = abs(target - padded_value) > 1e-3 + output_true = target * mask * area * (vel_inlet) ** 2.0 + output_pred = output * mask * area * (vel_inlet) ** 2.0 + + pres_true = output_true[:, :, 0] * normals[:, :, 0] + pres_pred = output_pred[:, :, 0] * normals[:, :, 0] + + wx_true = output_true[:, :, 1] + wx_pred = output_pred[:, :, 1] + + masked_pred = torch.mean(pres_pred + wx_pred, (1)) + masked_truth = torch.mean(pres_true + wx_true, (1)) + + loss = (masked_pred - masked_truth) ** 2.0 + loss = torch.mean(loss) + return loss + + +def compute_loss_dict( + prediction_vol: torch.Tensor, + prediction_surf: torch.Tensor, + batch_inputs: dict, + loss_fn_type: dict, + integral_scaling_factor: float, + surf_loss_scaling: float, + vol_loss_scaling: float, + first_deriv: torch.nn.Module | None = None, + eqn: Any = None, + bounding_box: torch.Tensor | None = None, + vol_factors: torch.Tensor | None = None, + add_physics_loss: bool = False, +) -> tuple[torch.Tensor, dict]: + """ + Compute the loss terms in a single function call. + + Computes: + - Volume loss if prediction_vol is not None + - Surface loss if prediction_surf is not None + - Integral loss if prediction_surf is not None + - Total loss as a weighted sum of the above + + Returns: + - Total loss as a scalar tensor + - Dictionary of loss terms (for logging, etc) + """ + nvtx.range_push("Loss Calculation") + total_loss_terms = [] + loss_dict = {} + + if prediction_vol is not None: + target_vol = batch_inputs["volume_fields"] + + if add_physics_loss: + loss_vol = loss_fn_with_physics( + prediction_vol, + target_vol, + loss_fn_type.loss_type, + padded_value=-10, + first_deriv=first_deriv, + eqn=eqn, + bounding_box=bounding_box, + vol_factors=vol_factors, + ) + loss_dict["loss_vol"] = loss_vol[0] + loss_dict["loss_continuity"] = loss_vol[1] + loss_dict["loss_momentum_x"] = loss_vol[2] + loss_dict["loss_momentum_y"] = loss_vol[3] + loss_dict["loss_momentum_z"] = loss_vol[4] + total_loss_terms.append(loss_vol[0]) + total_loss_terms.append(loss_vol[1]) + total_loss_terms.append(loss_vol[2]) + total_loss_terms.append(loss_vol[3]) + total_loss_terms.append(loss_vol[4]) + else: + loss_vol = loss_fn( + prediction_vol, + target_vol, + loss_fn_type.loss_type, + padded_value=-10, + ) + loss_dict["loss_vol"] = loss_vol + total_loss_terms.append(loss_vol) + + if prediction_surf is not None: + target_surf = batch_inputs["surface_fields"] + surface_areas = batch_inputs["surface_areas"] + surface_areas = torch.unsqueeze(surface_areas, -1) + surface_normals = batch_inputs["surface_normals"] + + # Needs to be taken from the dataset + stream_velocity = batch_inputs["global_params_values"][:, 0, :] + + loss_surf = loss_fn_surface( + prediction_surf, + target_surf, + loss_fn_type.loss_type, + ) + + loss_surf_area = loss_fn_area( + prediction_surf, + target_surf, + surface_normals, + surface_areas, + area_scaling_factor=loss_fn_type.area_weighing_factor, + loss_type=loss_fn_type.loss_type, + ) + + if loss_fn_type.loss_type == "mse": + loss_surf = loss_surf * surf_loss_scaling + loss_surf_area = loss_surf_area * surf_loss_scaling + + total_loss_terms.append(loss_surf) + loss_dict["loss_surf"] = loss_surf + total_loss_terms.append(loss_surf_area) + loss_dict["loss_surf_area"] = loss_surf_area + loss_integral = ( + integral_loss_fn( + prediction_surf, + target_surf, + surface_areas, + surface_normals, + stream_velocity, + padded_value=-10, + ) + ) * integral_scaling_factor + loss_dict["loss_integral"] = loss_integral + total_loss_terms.append(loss_integral) + + total_loss = sum(total_loss_terms) + loss_dict["total_loss"] = total_loss + nvtx.range_pop() + + return total_loss, loss_dict diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 3311083e04..7882e2d006 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -76,527 +76,8 @@ # Profiler().enable("torch") # Profiler().initialize() - -def compute_physics_loss( - output: torch.Tensor, - target: torch.Tensor, - mask: torch.Tensor, - loss_type: Literal["mse", "rmse"], - dims: tuple[int, ...] | None, - first_deriv: torch.nn.Module, - eqn: Any, - bounding_box: torch.Tensor, - vol_factors: torch.Tensor, -) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - """Compute physics-based loss terms for Navier-Stokes equations. - - Args: - output: Model output containing (output, coords_neighbors, output_neighbors, neighbors_list) - target: Ground truth values - mask: Mask for valid values - loss_type: Type of loss to calculate ("mse" or "rmse") - dims: Dimensions for loss calculation - first_deriv: First derivative calculator - eqn: Equations - bounding_box: Bounding box for normalization - vol_factors: Volume factors for normalization - - Returns: - Tuple of (data_loss, continuity_loss, momentum_x_loss, momentum_y_loss, momentum_z_loss) - """ - # Physics loss enabled - output, coords_neighbors, output_neighbors, neighbors_list = output - batch_size = output.shape[1] - fields, num_neighbors = output_neighbors.shape[3], output_neighbors.shape[2] - coords_total = coords_neighbors[0, :] - output_total = output_neighbors[0, :] - output_total_unnormalized = unnormalize( - output_total, vol_factors[0], vol_factors[1] - ) - coords_total_unnormalized = unnormalize( - coords_total, bounding_box[0], bounding_box[1] - ) - - # compute first order gradients on all the nodes from the neighbors_list - grad_list = {} - for parent_id, neighbor_ids in neighbors_list.items(): - neighbor_ids_tensor = torch.tensor(neighbor_ids).to( - output_total_unnormalized.device - ) - du = ( - output_total_unnormalized[:, [parent_id]] - - output_total_unnormalized[:, neighbor_ids_tensor] - ) - dv = ( - coords_total_unnormalized[:, [parent_id]] - - coords_total_unnormalized[:, neighbor_ids_tensor] - ) - grads = first_deriv.forward( - coords=None, connectivity_tensor=None, y=None, du=du, dv=dv - ) - grad = torch.cat(grads, dim=1) - grad_list[parent_id] = grad - - # compute second order gradients on only the center node - neighbor_ids_tensor = torch.tensor(neighbors_list[0]).to( - output_total_unnormalized.device - ) - grad_neighbors_center = torch.stack([v for v in grad_list.values()], dim=1) - grad_neighbors_center = grad_neighbors_center.reshape( - batch_size, len(neighbors_list[0]) + 1, -1 - ) - - du = grad_neighbors_center[:, [0]] - grad_neighbors_center[:, neighbor_ids_tensor] - dv = ( - coords_total_unnormalized[:, [0]] - - coords_total_unnormalized[:, neighbor_ids_tensor] - ) - - # second order gradients - ggrads_center = first_deriv.forward( - coords=None, connectivity_tensor=None, y=None, du=du, dv=dv - ) - ggrad_center = torch.cat(ggrads_center, dim=1) - grad_neighbors_center = grad_neighbors_center.reshape( - batch_size, len(neighbors_list[0]) + 1, 3, -1 - ) - - # Get the outputs on the original nodes - fields_center_unnormalized = output_total_unnormalized[:, 0, :] - grad_center = grad_neighbors_center[:, 0, :, :] - grad_grad_uvw_center = ggrad_center[:, :, :9] - - nu = 1.507 * 1e-5 - - dict_mapping = { - "u": fields_center_unnormalized[:, [0]], - "v": fields_center_unnormalized[:, [1]], - "w": fields_center_unnormalized[:, [2]], - "p": fields_center_unnormalized[:, [3]], - "nu": nu + fields_center_unnormalized[:, [4]], - "u__x": grad_center[:, 0, [0]], - "u__y": grad_center[:, 1, [0]], - "u__z": grad_center[:, 2, [0]], - "v__x": grad_center[:, 0, [1]], - "v__y": grad_center[:, 1, [1]], - "v__z": grad_center[:, 2, [1]], - "w__x": grad_center[:, 0, [2]], - "w__y": grad_center[:, 1, [2]], - "w__z": grad_center[:, 2, [2]], - "p__x": grad_center[:, 0, [3]], - "p__y": grad_center[:, 1, [3]], - "p__z": grad_center[:, 2, [3]], - "nu__x": grad_center[:, 0, [4]], - "nu__y": grad_center[:, 1, [4]], - "nu__z": grad_center[:, 2, [4]], - "u__x__x": grad_grad_uvw_center[:, 0, [0]], - "u__x__y": grad_grad_uvw_center[:, 1, [0]], - "u__x__z": grad_grad_uvw_center[:, 2, [0]], - "u__y__x": grad_grad_uvw_center[:, 1, [0]], # same as __x__y - "u__y__y": grad_grad_uvw_center[:, 1, [1]], - "u__y__z": grad_grad_uvw_center[:, 2, [1]], - "u__z__x": grad_grad_uvw_center[:, 2, [0]], # same as __x__z - "u__z__y": grad_grad_uvw_center[:, 2, [1]], # same as __y__z - "u__z__z": grad_grad_uvw_center[:, 2, [2]], - "v__x__x": grad_grad_uvw_center[:, 0, [3]], - "v__x__y": grad_grad_uvw_center[:, 1, [3]], - "v__x__z": grad_grad_uvw_center[:, 2, [3]], - "v__y__x": grad_grad_uvw_center[:, 1, [3]], # same as __x__y - "v__y__y": grad_grad_uvw_center[:, 1, [4]], - "v__y__z": grad_grad_uvw_center[:, 2, [4]], - "v__z__x": grad_grad_uvw_center[:, 2, [3]], # same as __x__z - "v__z__y": grad_grad_uvw_center[:, 2, [4]], # same as __y__z - "v__z__z": grad_grad_uvw_center[:, 2, [5]], - "w__x__x": grad_grad_uvw_center[:, 0, [6]], - "w__x__y": grad_grad_uvw_center[:, 1, [6]], - "w__x__z": grad_grad_uvw_center[:, 2, [6]], - "w__y__x": grad_grad_uvw_center[:, 1, [6]], # same as __x__y - "w__y__y": grad_grad_uvw_center[:, 1, [7]], - "w__y__z": grad_grad_uvw_center[:, 2, [7]], - "w__z__x": grad_grad_uvw_center[:, 2, [6]], # same as __x__z - "w__z__y": grad_grad_uvw_center[:, 2, [7]], # same as __y__z - "w__z__z": grad_grad_uvw_center[:, 2, [8]], - } - continuity = eqn["continuity"].evaluate(dict_mapping)["continuity"] - momentum_x = eqn["momentum_x"].evaluate(dict_mapping)["momentum_x"] - momentum_y = eqn["momentum_y"].evaluate(dict_mapping)["momentum_y"] - momentum_z = eqn["momentum_z"].evaluate(dict_mapping)["momentum_z"] - - # Compute the weights for the equation residuals - weight_continuity = torch.sigmoid(0.5 * (torch.abs(continuity) - 10)) - weight_momentum_x = torch.sigmoid(0.5 * (torch.abs(momentum_x) - 10)) - weight_momentum_y = torch.sigmoid(0.5 * (torch.abs(momentum_y) - 10)) - weight_momentum_z = torch.sigmoid(0.5 * (torch.abs(momentum_z) - 10)) - - weighted_continuity = weight_continuity * torch.abs(continuity) - weighted_momentum_x = weight_momentum_x * torch.abs(momentum_x) - weighted_momentum_y = weight_momentum_y * torch.abs(momentum_y) - weighted_momentum_z = weight_momentum_z * torch.abs(momentum_z) - - # Compute data loss - num = torch.sum(mask * (output - target) ** 2.0, dims) - if loss_type == "rmse": - denom = torch.sum(mask * target**2.0, dims) - else: - denom = torch.sum(mask) - - del coords_total, output_total - torch.cuda.empty_cache() - - return ( - torch.mean(num / denom), - torch.mean(torch.abs(weighted_continuity)), - torch.mean(torch.abs(weighted_momentum_x)), - torch.mean(torch.abs(weighted_momentum_y)), - torch.mean(torch.abs(weighted_momentum_z)), - ) - - -def loss_fn( - output: torch.Tensor, - target: torch.Tensor, - loss_type: Literal["mse", "rmse"], - padded_value: float = -10, -) -> torch.Tensor: - """Calculate mean squared error or root mean squared error with masking for padded values. - - Args: - output: Predicted values from the model - target: Ground truth values - loss_type: Type of loss to calculate ("mse" or "rmse") - padded_value: Value used for padding in the tensor - - Returns: - Calculated loss as a scalar tensor - """ - mask = abs(target - padded_value) > 1e-3 - - if loss_type == "rmse": - dims = (0, 1) - else: - dims = None - - num = torch.sum(mask * (output - target) ** 2.0, dims) - if loss_type == "rmse": - denom = torch.sum(mask * target**2.0, dims) - loss = torch.mean(torch.sqrt(num / denom)) - elif loss_type == "mse": - denom = torch.sum(mask) - loss = torch.mean(num / denom) - else: - raise ValueError(f"Invalid loss type: {loss_type}") - return loss - - -def loss_fn_with_physics( - output: torch.Tensor, - target: torch.Tensor, - loss_type: Literal["mse", "rmse"], - padded_value: float = -10, - first_deriv: torch.nn.Module = None, - eqn: Any = None, - bounding_box: torch.Tensor = None, - vol_factors: torch.Tensor = None, -) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - """Calculate loss with physics-based terms for appropriate equations. - - Args: - output: Predicted values from the model (with neighbor data when physics enabled) - target: Ground truth values - loss_type: Type of loss to calculate ("mse" or "rmse") - padded_value: Value used for padding in the tensor - first_deriv: First derivative calculator - eqn: Equations - bounding_box: Bounding box for normalization - vol_factors: Volume factors for normalization - - Returns: - Tuple of (data_loss, continuity_loss, momentum_x_loss, momentum_y_loss, momentum_z_loss) - """ - mask = abs(target - padded_value) > 1e-3 - - if loss_type == "rmse": - dims = (0, 1) - else: - dims = None - - # Call the physics loss computation function - return compute_physics_loss( - output=output, - target=target, - mask=mask, - loss_type=loss_type, - dims=dims, - first_deriv=first_deriv, - eqn=eqn, - bounding_box=bounding_box, - vol_factors=vol_factors, - ) - - -def loss_fn_surface( - output: torch.Tensor, target: torch.Tensor, loss_type: Literal["mse", "rmse"] -) -> torch.Tensor: - """Calculate loss for surface data by handling scalar and vector components separately. - - Args: - output: Predicted surface values from the model - target: Ground truth surface values - loss_type: Type of loss to calculate ("mse" or "rmse") - - Returns: - Combined scalar and vector loss as a scalar tensor - """ - # Separate the scalar and vector components: - output_scalar, output_vector = torch.split(output, [1, 3], dim=2) - target_scalar, target_vector = torch.split(target, [1, 3], dim=2) - - numerator = torch.mean((output_scalar - target_scalar) ** 2.0) - vector_diff_sq = torch.mean((target_vector - output_vector) ** 2.0, (0, 1)) - if loss_type == "mse": - masked_loss_pres = numerator - masked_loss_ws = torch.sum(vector_diff_sq) - else: - denom = torch.mean((target_scalar) ** 2.0) - masked_loss_pres = numerator / denom - - # Compute the mean diff**2 of the vector component, leave the last dimension: - masked_loss_ws_num = vector_diff_sq - masked_loss_ws_denom = torch.mean((target_vector) ** 2.0, (0, 1)) - masked_loss_ws = torch.sum(masked_loss_ws_num / masked_loss_ws_denom) - - loss = masked_loss_pres + masked_loss_ws - - return loss / 4.0 - - -def loss_fn_area( - output: torch.Tensor, - target: torch.Tensor, - normals: torch.Tensor, - area: torch.Tensor, - area_scaling_factor: float, - loss_type: Literal["mse", "rmse"], -) -> torch.Tensor: - """Calculate area-weighted loss for surface data considering normal vectors. - - Args: - output: Predicted surface values from the model - target: Ground truth surface values - normals: Normal vectors for the surface - area: Area values for surface elements - area_scaling_factor: Scaling factor for area weighting - loss_type: Type of loss to calculate ("mse" or "rmse") - - Returns: - Area-weighted loss as a scalar tensor - """ - area = area * area_scaling_factor - area_scale_factor = area - - # Separate the scalar and vector components. - target_scalar, target_vector = torch.split( - target * area_scale_factor, [1, 3], dim=2 - ) - output_scalar, output_vector = torch.split( - output * area_scale_factor, [1, 3], dim=2 - ) - - # Apply the normals to the scalar components (only [:,:,0]): - normals, _ = torch.split(normals, [1, normals.shape[-1] - 1], dim=2) - target_scalar = target_scalar * normals - output_scalar = output_scalar * normals - - # Compute the mean diff**2 of the scalar component: - masked_loss_pres = torch.mean(((output_scalar - target_scalar) ** 2.0), dim=(0, 1)) - if loss_type == "rmse": - masked_loss_pres /= torch.mean(target_scalar**2.0, dim=(0, 1)) - - # Compute the mean diff**2 of the vector component, leave the last dimension: - masked_loss_ws = torch.mean((target_vector - output_vector) ** 2.0, (0, 1)) - - if loss_type == "rmse": - masked_loss_ws /= torch.mean((target_vector) ** 2.0, (0, 1)) - - # Combine the scalar and vector components: - loss = 0.25 * (masked_loss_pres + torch.sum(masked_loss_ws)) - - return loss - - -def integral_loss_fn( - output, target, area, normals, stream_velocity=None, padded_value=-10 -): - drag_loss = drag_loss_fn( - output, target, area, normals, stream_velocity=stream_velocity, padded_value=-10 - ) - lift_loss = lift_loss_fn( - output, target, area, normals, stream_velocity=stream_velocity, padded_value=-10 - ) - return lift_loss + drag_loss - - -def lift_loss_fn(output, target, area, normals, stream_velocity=None, padded_value=-10): - vel_inlet = stream_velocity # Get this from the dataset - mask = abs(target - padded_value) > 1e-3 - - output_true = target * mask * area * (vel_inlet) ** 2.0 - output_pred = output * mask * area * (vel_inlet) ** 2.0 - - normals = torch.select(normals, 2, 2) - # output_true_0 = output_true[:, :, 0] - output_true_0 = output_true.select(2, 0) - output_pred_0 = output_pred.select(2, 0) - - pres_true = output_true_0 * normals - pres_pred = output_pred_0 * normals - - wz_true = output_true[:, :, -1] - wz_pred = output_pred[:, :, -1] - - masked_pred = torch.mean(pres_pred + wz_pred, (1)) - masked_truth = torch.mean(pres_true + wz_true, (1)) - - loss = (masked_pred - masked_truth) ** 2.0 - loss = torch.mean(loss) - return loss - - -def drag_loss_fn(output, target, area, normals, stream_velocity=None, padded_value=-10): - vel_inlet = stream_velocity # Get this from the dataset - mask = abs(target - padded_value) > 1e-3 - output_true = target * mask * area * (vel_inlet) ** 2.0 - output_pred = output * mask * area * (vel_inlet) ** 2.0 - - pres_true = output_true[:, :, 0] * normals[:, :, 0] - pres_pred = output_pred[:, :, 0] * normals[:, :, 0] - - wx_true = output_true[:, :, 1] - wx_pred = output_pred[:, :, 1] - - masked_pred = torch.mean(pres_pred + wx_pred, (1)) - masked_truth = torch.mean(pres_true + wx_true, (1)) - - loss = (masked_pred - masked_truth) ** 2.0 - loss = torch.mean(loss) - return loss - - -def compute_loss_dict( - prediction_vol: torch.Tensor, - prediction_surf: torch.Tensor, - batch_inputs: dict, - loss_fn_type: dict, - integral_scaling_factor: float, - surf_loss_scaling: float, - vol_loss_scaling: float, - first_deriv: torch.nn.Module | None = None, - eqn: Any = None, - bounding_box: torch.Tensor | None = None, - vol_factors: torch.Tensor | None = None, - add_physics_loss: bool = False, -) -> tuple[torch.Tensor, dict]: - """ - Compute the loss terms in a single function call. - - Computes: - - Volume loss if prediction_vol is not None - - Surface loss if prediction_surf is not None - - Integral loss if prediction_surf is not None - - Total loss as a weighted sum of the above - - Returns: - - Total loss as a scalar tensor - - Dictionary of loss terms (for logging, etc) - """ - nvtx.range_push("Loss Calculation") - total_loss_terms = [] - loss_dict = {} - - if prediction_vol is not None: - target_vol = batch_inputs["volume_fields"] - - if add_physics_loss: - loss_vol = loss_fn_with_physics( - prediction_vol, - target_vol, - loss_fn_type.loss_type, - padded_value=-10, - first_deriv=first_deriv, - eqn=eqn, - bounding_box=bounding_box, - vol_factors=vol_factors, - ) - loss_dict["loss_vol"] = loss_vol[0] - loss_dict["loss_continuity"] = loss_vol[1] - loss_dict["loss_momentum_x"] = loss_vol[2] - loss_dict["loss_momentum_y"] = loss_vol[3] - loss_dict["loss_momentum_z"] = loss_vol[4] - total_loss_terms.append(loss_vol[0]) - total_loss_terms.append(loss_vol[1]) - total_loss_terms.append(loss_vol[2]) - total_loss_terms.append(loss_vol[3]) - total_loss_terms.append(loss_vol[4]) - else: - loss_vol = loss_fn( - prediction_vol, - target_vol, - loss_fn_type.loss_type, - padded_value=-10, - ) - loss_dict["loss_vol"] = loss_vol - total_loss_terms.append(loss_vol) - - if prediction_surf is not None: - target_surf = batch_inputs["surface_fields"] - surface_areas = batch_inputs["surface_areas"] - surface_areas = torch.unsqueeze(surface_areas, -1) - surface_normals = batch_inputs["surface_normals"] - - # Needs to be taken from the dataset - stream_velocity = batch_inputs["global_params_values"][:, 0, :] - - loss_surf = loss_fn_surface( - prediction_surf, - target_surf, - loss_fn_type.loss_type, - ) - - loss_surf_area = loss_fn_area( - prediction_surf, - target_surf, - surface_normals, - surface_areas, - area_scaling_factor=loss_fn_type.area_weighing_factor, - loss_type=loss_fn_type.loss_type, - ) - - if loss_fn_type.loss_type == "mse": - loss_surf = loss_surf * surf_loss_scaling - loss_surf_area = loss_surf_area * surf_loss_scaling - - total_loss_terms.append(loss_surf) - loss_dict["loss_surf"] = loss_surf - total_loss_terms.append(loss_surf_area) - loss_dict["loss_surf_area"] = loss_surf_area - loss_integral = ( - integral_loss_fn( - prediction_surf, - target_surf, - surface_areas, - surface_normals, - stream_velocity, - padded_value=-10, - ) - ) * integral_scaling_factor - loss_dict["loss_integral"] = loss_integral - total_loss_terms.append(loss_integral) - - total_loss = sum(total_loss_terms) - loss_dict["total_loss"] = total_loss - nvtx.range_pop() - - return total_loss, loss_dict +from loss import compute_loss_dict +from utils import get_num_vars def validation_step( @@ -763,71 +244,31 @@ def train_epoch( @hydra.main(version_base="1.3", config_path="conf", config_name="config") def main(cfg: DictConfig) -> None: + ################################ # initialize distributed manager + ################################ DistributedManager.initialize() dist = DistributedManager() + ################################ # Initialize NVML + ################################ nvmlInit() - gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index) - compute_scaling_factors( - cfg=cfg, - input_path=cfg.data.input_dir, - use_cache=cfg.data_processor.use_cache, - ) - model_type = cfg.model.model_type + ################################ + # Initialize logger + ################################ logger = PythonLogger("Train") logger = RankZeroLoggingWrapper(logger, dist) logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") - # Get physics imports conditionally - add_physics_loss = getattr(cfg.train, "add_physics_loss", False) - - if add_physics_loss: - from physicsnemo.sym.eq.pde import PDE - from physicsnemo.sym.eq.ls.grads import FirstDeriv - from physicsnemo.sym.eq.pdes.navier_stokes import IncompressibleNavierStokes - else: - PDE = FirstDeriv = IncompressibleNavierStokes = None - - num_vol_vars = 0 - volume_variable_names = [] - if model_type == "volume" or model_type == "combined": - volume_variable_names = list(cfg.variables.volume.solution.keys()) - for j in volume_variable_names: - if cfg.variables.volume.solution[j] == "vector": - num_vol_vars += 3 - else: - num_vol_vars += 1 - else: - num_vol_vars = None - - num_surf_vars = 0 - surface_variable_names = [] - if model_type == "surface" or model_type == "combined": - surface_variable_names = list(cfg.variables.surface.solution.keys()) - num_surf_vars = 0 - for j in surface_variable_names: - if cfg.variables.surface.solution[j] == "vector": - num_surf_vars += 3 - else: - num_surf_vars += 1 - else: - num_surf_vars = None - - num_global_features = 0 - global_params_names = list(cfg.variables.global_parameters.keys()) - for param in global_params_names: - if cfg.variables.global_parameters[param].type == "vector": - num_global_features += len(cfg.variables.global_parameters[param].reference) - elif cfg.variables.global_parameters[param].type == "scalar": - num_global_features += 1 - else: - raise ValueError(f"Unknown global parameter type") + ################################ + # Get or compute scaling and normalization factors + # min/max/mean/std of input points + targets + ################################ vol_save_path = os.path.join( "outputs", cfg.project.name, "volume_scaling_factors.npy" @@ -844,6 +285,36 @@ def main(cfg: DictConfig) -> None: vol_factors = None vol_factors_tensor = None + scaling_factors = compute_scaling_factors( + cfg=cfg, + input_path=cfg.data.input_dir, + use_cache=cfg.data_processor.use_cache, + ) + + model_type = cfg.model.model_type + + # Get physics imports conditionally + add_physics_loss = getattr(cfg.train, "add_physics_loss", False) + + if add_physics_loss: + from physicsnemo.sym.eq.pde import PDE + from physicsnemo.sym.eq.ls.grads import FirstDeriv + from physicsnemo.sym.eq.pdes.navier_stokes import IncompressibleNavierStokes + else: + PDE = FirstDeriv = IncompressibleNavierStokes = None + + num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type) + + if model_type == "combined" or model_type == "surface": + surface_variable_names = list(cfg.variables.surface.solution.keys()) + else: + surface_variable_names = [] + + if model_type == "combined" or model_type == "volume": + volume_variable_names = list(cfg.variables.volume.solution.keys()) + else: + volume_variable_names = [] + bounding_box = None if add_physics_loss: bounding_box = cfg.data.bounding_box @@ -891,16 +362,16 @@ def main(cfg: DictConfig) -> None: **cfg.val.sampler, ) - train_dataloader = DataLoader( - train_dataset, - sampler=train_sampler, - **cfg.train.dataloader, - ) - val_dataloader = DataLoader( - val_dataset, - sampler=val_sampler, - **cfg.val.dataloader, - ) + # train_dataloader = DataLoader( + # train_dataset, + # sampler=train_sampler, + # **cfg.train.dataloader, + # ) + # val_dataloader = DataLoader( + # val_dataset, + # sampler=val_sampler, + # **cfg.val.dataloader, + # ) model = DoMINO( input_features=3, @@ -1001,6 +472,9 @@ def main(cfg: DictConfig) -> None: else: surface_scaling_loss = cfg.model.surf_loss_scaling + train_dataset.set_indices(list(train_sampler)) + print(f"train_dataset.indices: {train_dataset.indices}") + model.train(True) epoch_start_time = time.perf_counter() avg_loss = train_epoch( diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py new file mode 100644 index 0000000000..abfc4d7351 --- /dev/null +++ b/examples/cfd/external_aerodynamics/domino/src/utils.py @@ -0,0 +1,74 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"]): + """Calculate the number of variables for volume, surface, and global features. + + This function analyzes the configuration to determine how many variables are needed + for different mesh data types based on the model type. Vector variables contribute + 3 components (x, y, z) while scalar variables contribute 1 component each. + + Args: + cfg: Configuration object containing variable definitions for volume, surface, + and global parameters with their types (scalar/vector). + model_type (str): Type of model - can be "volume", "surface", or "combined". + Determines which variable types are included in the count. + + Returns: + tuple: A 3-tuple containing: + - num_vol_vars (int or None): Number of volume variables. None if model_type + is not "volume" or "combined". + - num_surf_vars (int or None): Number of surface variables. None if model_type + is not "surface" or "combined". + - num_global_features (int): Number of global parameter features. + """ + num_vol_vars = 0 + volume_variable_names = [] + if model_type == "volume" or model_type == "combined": + volume_variable_names = list(cfg.variables.volume.solution.keys()) + for j in volume_variable_names: + if cfg.variables.volume.solution[j] == "vector": + num_vol_vars += 3 + else: + num_vol_vars += 1 + else: + num_vol_vars = None + + num_surf_vars = 0 + surface_variable_names = [] + if model_type == "surface" or model_type == "combined": + surface_variable_names = list(cfg.variables.surface.solution.keys()) + num_surf_vars = 0 + for j in surface_variable_names: + if cfg.variables.surface.solution[j] == "vector": + num_surf_vars += 3 + else: + num_surf_vars += 1 + else: + num_surf_vars = None + + num_global_features = 0 + global_params_names = list(cfg.variables.global_parameters.keys()) + for param in global_params_names: + if cfg.variables.global_parameters[param].type == "vector": + num_global_features += len(cfg.variables.global_parameters[param].reference) + elif cfg.variables.global_parameters[param].type == "scalar": + num_global_features += 1 + else: + raise ValueError(f"Unknown global parameter type") + + return num_vol_vars, num_surf_vars, num_global_features diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index d74d450fed..7bf4abb6c4 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -864,7 +864,8 @@ def __next__(self): current_idx = self.i # Start loading two ahead: - if len(self.dataset) >= current_idx + 2: + N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) + if N >= current_idx + 2: self.dataset.preload(self.idx_to_index(current_idx + 1)) self.dataset.preload(self.idx_to_index(current_idx + 2)) @@ -881,10 +882,12 @@ def __iter__(self): self.i = 0 + N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) + # Trigger the dataset to start loading index 0: - if len(self.dataset) >= 1: + if N >= 1: self.dataset.preload(self.idx_to_index(self.i)) - if len(self.dataset) >= 2: + if N >= 2: self.dataset.preload(self.idx_to_index(self.i + 1)) return self @@ -893,15 +896,23 @@ def __iter__(self): def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None: # Create a dataset for just the field keys: + norm_keys = [ + "volume_fields", + "surface_fields", + "stl_centers", + "volume_mesh_centers", + "surface_mesh_centers", + ] + dataset = DrivaerMLDataset( data_dir=input_path, - keys_to_read=["volume_fields", "surface_fields"], + keys_to_read=norm_keys, output_device=torch.device("cuda"), # TODO - configure this more carefully here ) mean, std, min_val, max_val = compute_mean_std_min_max( dataset, - field_keys=["volume_fields", "surface_fields"], + field_keys=norm_keys, ) return mean, std, min_val, max_val From 02b03a0272252e968ddf54d2d79205aff5b9f29a Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 5 Sep 2025 21:22:45 +0000 Subject: [PATCH 17/98] Most datapipe tests passing. Add compute_statistics script. Clean up training script a little, simply by moving things around ... --- .../domino/src/compute_statistics.py | 163 ++++++++++++++++++ .../external_aerodynamics/domino/src/train.py | 43 ++--- .../external_aerodynamics/domino/src/utils.py | 88 ++++++++++ physicsnemo/datapipes/cae/domino_datapipe2.py | 78 +++++---- .../datapipes/cae/drivaer_ml_dataset.py | 35 +++- 5 files changed, 347 insertions(+), 60 deletions(-) create mode 100644 examples/cfd/external_aerodynamics/domino/src/compute_statistics.py diff --git a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py new file mode 100644 index 0000000000..5c9ef21f04 --- /dev/null +++ b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py @@ -0,0 +1,163 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Compute and save scaling factors for DoMINO datasets. + +This script computes mean, standard deviation, minimum, and maximum values +for all field variables in a DoMINO dataset. The computed statistics are +saved in a structured format that can be easily loaded and used for +normalization during training and inference. + +The script uses the same configuration system as the training script, +ensuring consistency in dataset handling and processing parameters. +""" + +import os +import time +from pathlib import Path + +import hydra +import torch +from omegaconf import DictConfig, OmegaConf + +from physicsnemo.distributed import DistributedManager +from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper + +from physicsnemo.datapipes.cae.domino_datapipe2 import compute_scaling_factors +from utils import ScalingFactors + + +@hydra.main(version_base="1.3", config_path="conf", config_name="config") +def main(cfg: DictConfig) -> None: + """ + Main function to compute and save scaling factors. + + Args: + cfg: Hydra configuration object containing all parameters + """ + ################################ + # Initialize distributed manager + ################################ + DistributedManager.initialize() + dist = DistributedManager() + + ################################ + # Initialize logger + ################################ + logger = PythonLogger("ComputeStatistics") + logger = RankZeroLoggingWrapper(logger, dist) + + logger.info("Starting scaling factors computation") + logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") + + ################################ + # Create output directory + ################################ + output_dir = os.path.join(cfg.output, "scaling_factors") + os.makedirs(output_dir, exist_ok=True) + + if dist.world_size > 1: + torch.distributed.barrier() + + ################################ + # Check if scaling exists + ################################ + pickle_path = output_dir + "/scaling_factors.pkl" + + try: + scaling_factors = ScalingFactors.load(pickle_path) + logger.info(f"Scaling factors loaded from: {pickle_path}") + except FileNotFoundError: + logger.info(f"Scaling factors not found at: {pickle_path}; recomputing.") + scaling_factors = None + + ################################ + # Compute scaling factors + ################################ + if scaling_factors is None: + logger.info("Computing scaling factors from dataset...") + start_time = time.perf_counter() + + target_keys = [ + "volume_fields", + "surface_fields", + "stl_centers", + "volume_mesh_centers", + "surface_mesh_centers", + ] + + mean, std, min_val, max_val = compute_scaling_factors( + cfg=cfg, + input_path=cfg.data.input_dir, + target_keys=target_keys, + ) + mean = {k: m.cpu().numpy() for k, m in mean.items()} + std = {k: s.cpu().numpy() for k, s in std.items()} + min_val = {k: m.cpu().numpy() for k, m in min_val.items()} + max_val = {k: m.cpu().numpy() for k, m in max_val.items()} + + compute_time = time.perf_counter() - start_time + logger.info( + f"Scaling factors computation completed in {compute_time:.2f} seconds" + ) + + ################################ + # Create structured data object + ################################ + dataset_info = { + "input_path": cfg.data.input_dir, + "model_type": cfg.model.model_type, + "normalization": cfg.model.normalization, + "compute_time": compute_time, + "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), + "config_name": cfg.project.name, + } + + scaling_factors = ScalingFactors( + mean=mean, + std=std, + min_val=min_val, + max_val=max_val, + field_keys=target_keys, + ) + + ################################ + # Save scaling factors + ################################ + if dist.rank == 0: + # Save as structured pickle file + pickle_path = output_dir + "/scaling_factors.pkl" + scaling_factors.save(pickle_path) + logger.info(f"Scaling factors saved to: {pickle_path}") + + # Save summary report + summary_path = output_dir + "/scaling_factors_summary.txt" + with open(summary_path, "w") as f: + f.write(scaling_factors.summary()) + logger.info(f"Summary report saved to: {summary_path}") + + ################################ + # Display summary + ################################ + logger.info("Scaling factors computation summary:") + logger.info(f"Field keys processed: {scaling_factors.field_keys}") + + logger.info("Scaling factors computation completed successfully!") + + +if __name__ == "__main__": + main() diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 7882e2d006..f30964f3ed 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -56,12 +56,13 @@ from physicsnemo.datapipes.cae.domino_datapipe2 import ( DoMINODataPipe, - compute_scaling_factors, create_domino_dataset, ) from physicsnemo.models.domino.model import DoMINO from physicsnemo.utils.domino.utils import * +from utils import ScalingFactors + # This is included for GPU memory tracking: from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo import time @@ -266,30 +267,17 @@ def main(cfg: DictConfig) -> None: logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") ################################ - # Get or compute scaling and normalization factors - # min/max/mean/std of input points + targets + # Get scaling factors ################################ - - vol_save_path = os.path.join( - "outputs", cfg.project.name, "volume_scaling_factors.npy" - ) - surf_save_path = os.path.join( - "outputs", cfg.project.name, "surface_scaling_factors.npy" - ) - if os.path.exists(vol_save_path): - vol_factors = np.load(vol_save_path) - vol_factors_tensor = ( - torch.from_numpy(vol_factors).to(dist.device) if add_physics_loss else None + pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" + + try: + scaling_factors = ScalingFactors.load(pickle_path) + logger.info(f"Scaling factors loaded from: {pickle_path}") + except FileNotFoundError: + raise FileNotFoundError( + f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." ) - else: - vol_factors = None - vol_factors_tensor = None - - scaling_factors = compute_scaling_factors( - cfg=cfg, - input_path=cfg.data.input_dir, - use_cache=cfg.data_processor.use_cache, - ) model_type = cfg.model.model_type @@ -315,6 +303,10 @@ def main(cfg: DictConfig) -> None: else: volume_variable_names = [] + vol_factors = scaling_factors.mean["volume_fields"] + surf_factors = scaling_factors.mean["surface_fields"] + vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + bounding_box = None if add_physics_loss: bounding_box = cfg.data.bounding_box @@ -326,11 +318,6 @@ def main(cfg: DictConfig) -> None: .to(dist.device) ) - if os.path.exists(surf_save_path): - surf_factors = np.load(surf_save_path) - else: - surf_factors = None - train_dataset = create_domino_dataset( cfg, phase="train", diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py index abfc4d7351..6befff00bb 100644 --- a/examples/cfd/external_aerodynamics/domino/src/utils.py +++ b/examples/cfd/external_aerodynamics/domino/src/utils.py @@ -14,6 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from dataclasses import dataclass +from typing import Dict, Optional, Any +import numpy as np +import torch +import pickle +from pathlib import Path +from typing import Literal + def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"]): """Calculate the number of variables for volume, surface, and global features. @@ -72,3 +80,83 @@ def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"] raise ValueError(f"Unknown global parameter type") return num_vol_vars, num_surf_vars, num_global_features + + +@dataclass +class ScalingFactors: + """ + Data structure for storing scaling factors computed for DoMINO datasets. + + This class provides a clean, easily serializable format for storing + mean, std, min, and max values for different array keys in the dataset. + Uses numpy arrays for easy serialization and cross-platform compatibility. + + Attributes: + mean: Dictionary mapping keys to mean numpy arrays + std: Dictionary mapping keys to standard deviation numpy arrays + min_val: Dictionary mapping keys to minimum value numpy arrays + max_val: Dictionary mapping keys to maximum value numpy arrays + field_keys: List of field keys for which statistics were computed + """ + + mean: Dict[str, np.ndarray] + std: Dict[str, np.ndarray] + min_val: Dict[str, np.ndarray] + max_val: Dict[str, np.ndarray] + field_keys: list[str] + + def to_torch( + self, device: Optional[torch.device] = None + ) -> Dict[str, Dict[str, torch.Tensor]]: + """Convert numpy arrays to torch tensors for use in training/inference.""" + device = device or torch.device("cpu") + + return { + "mean": {k: torch.from_numpy(v).to(device) for k, v in self.mean.items()}, + "std": {k: torch.from_numpy(v).to(device) for k, v in self.std.items()}, + "min_val": { + k: torch.from_numpy(v).to(device) for k, v in self.min_val.items() + }, + "max_val": { + k: torch.from_numpy(v).to(device) for k, v in self.max_val.items() + }, + } + + def save(self, filepath: str | Path) -> None: + """Save scaling factors to pickle file.""" + filepath = Path(filepath) + filepath.parent.mkdir(parents=True, exist_ok=True) + + with open(filepath, "wb") as f: + pickle.dump(self, f) + + @classmethod + def load(cls, filepath: str | Path) -> "ScalingFactors": + """Load scaling factors from pickle file.""" + with open(filepath, "rb") as f: + factors = pickle.load(f) + return factors + + def get_field_shapes(self) -> Dict[str, tuple]: + """Get the shape of each field's statistics.""" + return {key: self.mean[key].shape for key in self.field_keys} + + def summary(self) -> str: + """Generate a human-readable summary of the scaling factors.""" + summary = ["Scaling Factors Summary:"] + summary.append(f"Field Keys: {self.field_keys}") + + for key in self.field_keys: + mean_val = self.mean[key] + std_val = self.std[key] + min_val = self.min_val[key] + max_val = self.max_val[key] + + summary.append(f"\n{key}:") + summary.append(f" Shape: {mean_val.shape}") + summary.append(f" Mean: {mean_val}") + summary.append(f" Std: {std_val}") + summary.append(f" Min: {min_val}") + summary.append(f" Max: {max_val}") + + return "\n".join(summary) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 7bf4abb6c4..f9e66b6135 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -217,6 +217,7 @@ def __init__( self, input_path, model_type: Literal["surface", "volume", "combined"], + pin_memory: bool = False, **data_config_overrides, ): # Perform config packaging and validation @@ -227,13 +228,13 @@ def __init__( DistributedManager.initialize() dist = DistributedManager() - if self.config.gpu_preprocessing or self.config.gpu_output: - # Make sure we move data to the right device: - target_device = dist.device - else: - target_device = torch.device("cpu") - self.device = target_device + self.preproc_device = ( + dist.device if self.config.gpu_preprocessing else torch.device("cpu") + ) + self.output_device = ( + dist.device if self.config.gpu_output else torch.device("cpu") + ) self.model_type = model_type @@ -244,12 +245,12 @@ def __init__( self.config.bounding_box_dims = [ torch.tensor( self.config.bounding_box_dims.max, - device=self.device, + device=self.preproc_device, dtype=torch.float32, ), torch.tensor( self.config.bounding_box_dims.min, - device=self.device, + device=self.preproc_device, dtype=torch.float32, ), ] @@ -265,12 +266,12 @@ def __init__( self.config.bounding_box_dims_surf = [ torch.tensor( self.config.bounding_box_dims_surf.max, - device=self.device, + device=self.preproc_device, dtype=torch.float32, ), torch.tensor( self.config.bounding_box_dims_surf.min, - device=self.device, + device=self.preproc_device, dtype=torch.float32, ), ] @@ -285,20 +286,26 @@ def __init__( # and on the right device: if self.config.volume_factors is not None: self.config.volume_factors = torch.tensor( - self.config.volume_factors, device=self.device, dtype=torch.float32 + self.config.volume_factors, + device=self.preproc_device, + dtype=torch.float32, ) if self.config.surface_factors is not None: self.config.surface_factors = torch.tensor( - self.config.surface_factors, device=self.device, dtype=torch.float32 + self.config.surface_factors, + device=self.preproc_device, + dtype=torch.float32, ) # Always read these keys: self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"] self.keys_to_read_if_available = { - "global_params_values": torch.tensor([[30.0], [1.226]], device=self.device), + "global_params_values": torch.tensor( + [[30.0], [1.226]], device=self.preproc_device + ), "global_params_reference": torch.tensor( - [[30.0], [1.226]], device=self.device + [[30.0], [1.226]], device=self.preproc_device ), } @@ -318,7 +325,8 @@ def __init__( self.dataset = DrivaerMLDataset( data_dir=self.config.data_path, keys_to_read=self.keys_to_read, - output_device=self.device, + output_device=self.preproc_device, + pin_memory=pin_memory, consumer_stream=torch.cuda.default_stream(), ) @@ -803,6 +811,11 @@ def __getitem__(self, idx): data_dict = self.dataset[index] data_dict = self.process_data(data_dict, idx) + # If the data is not on the target device, put it there: + for key, value in data_dict.items(): + if value.device != self.output_device: + data_dict[key] = value.to(self.output_device) + # Add a batch dimension to the data_dict data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()} @@ -865,7 +878,8 @@ def __next__(self): # Start loading two ahead: N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) - if N >= current_idx + 2: + print(f"N: {N}, current_idx: {current_idx}") + if N > current_idx + 2: self.dataset.preload(self.idx_to_index(current_idx + 1)) self.dataset.preload(self.idx_to_index(current_idx + 2)) @@ -885,34 +899,38 @@ def __iter__(self): N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) # Trigger the dataset to start loading index 0: - if N >= 1: + if N > 1: self.dataset.preload(self.idx_to_index(self.i)) - if N >= 2: + if N > 2: self.dataset.preload(self.idx_to_index(self.i + 1)) return self -def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None: - # Create a dataset for just the field keys: +def compute_scaling_factors( + cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Using the dataset at the path, compute the mean, std, min, and max of the target keys. + + Args: + cfg: Hydra configuration object containing all parameters + input_path: Path to the dataset to load. + target_keys: List of keys to compute the mean, std, min, and max of. + use_cache: (deprecated) This argument has no effect. + """ - norm_keys = [ - "volume_fields", - "surface_fields", - "stl_centers", - "volume_mesh_centers", - "surface_mesh_centers", - ] + device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") dataset = DrivaerMLDataset( data_dir=input_path, - keys_to_read=norm_keys, - output_device=torch.device("cuda"), # TODO - configure this more carefully here + keys_to_read=target_keys, + output_device=device, ) mean, std, min_val, max_val = compute_mean_std_min_max( dataset, - field_keys=norm_keys, + field_keys=target_keys, ) return mean, std, min_val, max_val diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index bb3c5b7a1a..c9871db8c0 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -128,6 +128,35 @@ def read_file_sharded( pass +class NpzFileReader(BackendReader): + """ + Reader for npz files. + """ + + def __init__(self, keys_to_read: list[str] | None) -> None: + super().__init__(keys_to_read) + + def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: + """ + Read a file and return a dictionary of tensors. + """ + in_data = np.load(filename) + + keys_found = set(in_data.keys()) + keys_missing = set(self.keys_to_read) - keys_found + if len(keys_missing) > 0: + raise ValueError(f"Keys {keys_missing} not found in file {filename}") + + data = {key: torch.from_numpy(in_data[key][:]) for key in self.keys_to_read} + + return data + + def read_file_sharded( + self, filename: pathlib.Path, parallel_rank: int, parallel_size: int + ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + pass + + class ZarrFileReader(BackendReader): """ Reader for zarr files. @@ -265,7 +294,7 @@ def __init__( keys_to_read: list[str] | None, output_device: torch.device, preload_depth: int = 2, - pin_memory: bool = True, + pin_memory: bool = False, device_mesh: torch.distributed.DeviceMesh | None = None, placements: dict[str, torch.distributed.tensor.Placement] | None = None, consumer_stream: torch.cuda.Stream | None = None, @@ -333,6 +362,9 @@ def _infer_file_type_and_filenames( if all(file.suffix == ".npy" for file in files): file_reader = NpyFileReader(self._keys_to_read) return file_reader, files + elif all(file.suffix == ".npz" for file in files): + file_reader = NpzFileReader(self._keys_to_read) + return file_reader, files elif all(file.suffix == ".zarr" and file.is_dir() for file in files): if TENSORSTORE_AVAILABLE: file_reader = TensorStoreZarrReader(self._keys_to_read) @@ -358,7 +390,6 @@ def _move_to_gpu( if self.output_device.type != "cuda": return data - # result = StreamDict() result = {} with torch.cuda.stream(self._data_loader_stream): From ff185b3ef33b78cc4a3da24be96f7d78b2f1f1c4 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 8 Sep 2025 14:56:20 +0000 Subject: [PATCH 18/98] Update tests for the new pipeline (mostly fix indexing from batch size) and fix a few details in the new pipeline. Use new pipeline in training script --- .../external_aerodynamics/domino/src/train.py | 19 ++----- physicsnemo/datapipes/cae/domino_datapipe2.py | 21 ++++---- test/datapipes/test_domino_datapipe.py | 49 +++++++++---------- 3 files changed, 37 insertions(+), 52 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index f30964f3ed..2ff363e40a 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -102,7 +102,7 @@ def validation_step( with torch.no_grad(): for i_batch, sample_batched in enumerate(dataloader): sampled_batched = dict_to_device(sample_batched, device) - print(f"validation i batch {i_batch}") + with autocast("cuda", enabled=True): if add_physics_loss: prediction_vol, prediction_surf = model( @@ -349,17 +349,6 @@ def main(cfg: DictConfig) -> None: **cfg.val.sampler, ) - # train_dataloader = DataLoader( - # train_dataset, - # sampler=train_sampler, - # **cfg.train.dataloader, - # ) - # val_dataloader = DataLoader( - # val_dataset, - # sampler=val_sampler, - # **cfg.val.dataloader, - # ) - model = DoMINO( input_features=3, output_features_vol=num_vol_vars, @@ -449,8 +438,11 @@ def main(cfg: DictConfig) -> None: "Physics loss enabled - mixed precision (autocast) will be disabled as physics loss computation is not supported with mixed precision" ) + # This controls what indices to use for each epoch. train_sampler.set_epoch(epoch) val_sampler.set_epoch(epoch) + train_dataset.set_indices(list(train_sampler)) + val_dataset.set_indices(list(val_sampler)) initial_integral_factor = initial_integral_factor_orig @@ -459,9 +451,6 @@ def main(cfg: DictConfig) -> None: else: surface_scaling_loss = cfg.model.surf_loss_scaling - train_dataset.set_indices(list(train_sampler)) - print(f"train_dataset.indices: {train_dataset.indices}") - model.train(True) epoch_start_time = time.perf_counter() avg_loss = train_epoch( diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index f9e66b6135..faaeac8543 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -35,7 +35,6 @@ import torch import torch.cuda.nvtx as nvtx from omegaconf import DictConfig -from torch import Tensor from torch.utils.data import Dataset from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( @@ -468,7 +467,7 @@ def process_surface( (s_max[2] - s_min[2]) / nz, ) pos_normals_com_surface = calculate_normal_positional_encoding( - surface_coordinates, center_of_mass, cell_length=[dx, dy, dz] + surface_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz] ) else: pos_normals_com_surface = surface_coordinates - center_of_mass @@ -549,12 +548,10 @@ def process_surface( if self.config.scaling_type == "mean_std_scaling": surf_mean = self.config.surface_factors[0] surf_std = self.config.surface_factors[1] - # TODO - Are these array calls needed? surface_fields = standardize(surface_fields, surf_mean, surf_std) elif self.config.scaling_type == "min_max_scaling": surf_min = self.config.surface_factors[1] surf_max = self.config.surface_factors[0] - # TODO - Are these array calls needed? surface_fields = normalize(surface_fields, surf_max, surf_min) return_dict.update( @@ -667,10 +664,10 @@ def process_volume( pos_normals_closest_vol = calculate_normal_positional_encoding( volume_coordinates, sdf_node_closest_point, - cell_length=[dx, dy, dz], + cell_dimensions=[dx, dy, dz], ) pos_normals_com_vol = calculate_normal_positional_encoding( - volume_coordinates, center_of_mass, cell_length=[dx, dy, dz] + volume_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz] ) else: pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point @@ -679,6 +676,8 @@ def process_volume( if self.config.normalize_coordinates: volume_coordinates = normalize(volume_coordinates, c_max, c_min) grid = normalize(self.volume_grid, c_max, c_min) + else: + grid = self.volume_grid if self.config.scaling_type is not None: if self.config.volume_factors is not None: @@ -870,15 +869,16 @@ def __next__(self): # - the preprocessing pipe has to implicitly wait for idx +1 in the dataset # - wait for the preprocessing pipe at idx to finish # return the data. - if self.i >= len(self.dataset): + N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) + + if self.i >= N: self.i = 0 raise StopIteration current_idx = self.i # Start loading two ahead: - N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) - print(f"N: {N}, current_idx: {current_idx}") + if N > current_idx + 2: self.dataset.preload(self.idx_to_index(current_idx + 1)) self.dataset.preload(self.idx_to_index(current_idx + 2)) @@ -1004,7 +1004,8 @@ def __getitem__(self, idx): filepath = self.data_path / cfd_filename result = np.load(filepath, allow_pickle=True).item() result = { - k: v.numpy() if isinstance(v, Tensor) else v for k, v in result.items() + k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v + for k, v in result.items() } nvtx.range_pop() diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py index a2f5ad645a..8df540d9ef 100644 --- a/test/datapipes/test_domino_datapipe.py +++ b/test/datapipes/test_domino_datapipe.py @@ -27,6 +27,12 @@ from pytest_utils import import_or_fail from scipy.spatial import ConvexHull +from physicsnemo.datapipes.cae.domino_datapipe2 import ( + CachedDoMINODataset, + DoMINODataConfig, + DoMINODataPipe, +) + Tensor = torch.Tensor # DEFINING GLOBAL VARIABLES HERE @@ -91,7 +97,7 @@ def synthetic_domino_data( for i in range(n_examples): # We are generating a mesh on a random sphere. stl_points = random_sample_on_unit_sphere(N_mesh_points) - print(f"stl_points.shape: {stl_points.shape}") + # Generate the triangles with ConvexHull: hull = ConvexHull(stl_points) faces = hull.simplices # (M, 3) @@ -238,7 +244,6 @@ def bounding_boxes(): def create_basic_dataset(data_dir, model_type, **kwargs): """Helper function to create a basic DoMINODataPipe with default settings.""" - from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataPipe # assert model_type in ["volume", "surface", "combined"] @@ -270,6 +275,8 @@ def create_basic_dataset(data_dir, model_type, **kwargs): default_kwargs.update(kwargs) + print(f"kwargs: {default_kwargs}") + return DoMINODataPipe( input_path=input_path, model_type=model_type, **default_kwargs ) @@ -327,7 +334,6 @@ def test_domino_datapipe_core( """Core test for basic functionality with different device and model configurations.""" data_dir = request.getfixturevalue(data_dir) - print(f"data_dir: {data_dir}") dataset = create_basic_dataset( data_dir, model_type, gpu_preprocessing=gpu_preprocessing, gpu_output=gpu_output ) @@ -360,13 +366,12 @@ def test_domino_datapipe_coordinate_normalization( v_coords = sample["volume_mesh_centers"] s_coords = sample["surface_mesh_centers"] - v_min = torch.min(v_coords, dim=0).values - v_max = torch.max(v_coords, dim=0).values - s_min = torch.min(s_coords, dim=0).values - s_max = torch.max(s_coords, dim=0).values + # Batch size is 1 here, but in principle this could be a loop: + v_min = torch.min(v_coords[0], dim=0).values + v_max = torch.max(v_coords[0], dim=0).values + s_min = torch.min(s_coords[0], dim=0).values + s_max = torch.max(s_coords[0], dim=0).values - print(f"{normalize_coordinates} v_coords: {v_min} to {v_max}") - print(f"{normalize_coordinates} s_coords: {s_min} to {s_max}") # If normalization is enabled, coordinates should be in [-2, 2] range if normalize_coordinates: if sample_in_bbox: @@ -467,9 +472,9 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf if model_type in ["volume", "combined"]: for key in ["volume_mesh_centers", "volume_fields"]: if sampling: - assert sample[key].shape[0] == sample_points + assert sample[key].shape[1] == sample_points else: - assert sample[key].shape[0] == sample["volume_mesh_centers"].shape[0] + assert sample[key].shape[1] == sample["volume_mesh_centers"].shape[1] # Model-specific keys if model_type in ["surface", "combined"]: @@ -480,20 +485,20 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf "surface_fields", ]: if sampling: - assert sample[key].shape[0] == sample_points + assert sample[key].shape[1] == sample_points else: - assert sample[key].shape[0] == sample["surface_mesh_centers"].shape[0] + assert sample[key].shape[1] == sample["surface_mesh_centers"].shape[1] for key in [ "surface_mesh_neighbors", "surface_neighbors_normals", "surface_neighbors_areas", ]: if sampling: - assert sample[key].shape[0] == sample_points - assert sample[key].shape[1] == dataset.config.num_surface_neighbors - 1 + assert sample[key].shape[1] == sample_points + assert sample[key].shape[2] == dataset.config.num_surface_neighbors - 1 else: - assert sample[key].shape[0] == sample["surface_mesh_neighbors"].shape[0] - assert sample[key].shape[1] == dataset.config.num_surface_neighbors - 1 + assert sample[key].shape[1] == sample["surface_mesh_neighbors"].shape[1] + assert sample[key].shape[2] == dataset.config.num_surface_neighbors - 1 @import_or_fail(["warp", "cupy", "cuml"]) @@ -572,7 +577,6 @@ def test_domino_datapipe_caching_config(zarr_dataset, model_type, pytestconfig): @import_or_fail(["warp", "cupy", "cuml"]) def test_cached_domino_dataset(zarr_dataset, tmp_path, pytestconfig): """Test CachedDoMINODataset functionality.""" - from physicsnemo.datapipes.cae.domino_datapipe import CachedDoMINODataset # Create some mock cached data files for i in range(3): @@ -637,7 +641,6 @@ def test_domino_datapipe_invalid_caching_config(zarr_dataset, pytestconfig): @import_or_fail(["warp", "cupy", "cuml"]) def test_domino_datapipe_invalid_phase(pytestconfig): """Test that invalid phase values raise appropriate errors.""" - from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataConfig with pytest.raises(ValueError, match="phase should be one of"): DoMINODataConfig(data_path=tempfile.mkdtemp(), phase="invalid_phase") @@ -646,7 +649,6 @@ def test_domino_datapipe_invalid_phase(pytestconfig): @import_or_fail(["warp", "cupy", "cuml"]) def test_domino_datapipe_invalid_scaling_type(pytestconfig): """Test that invalid scaling_type values raise appropriate errors.""" - from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataConfig with pytest.raises(ValueError, match="scaling_type should be one of"): DoMINODataConfig( @@ -684,10 +686,3 @@ def test_domino_datapipe_surface_sampling( sample = dataset[0] validate_sample_structure(sample, "surface", gpu_output=True) - - -if __name__ == "__main__": - out_dir = synthetic_domino_data( - out_format="zarr", - ) - print(out_dir) From c7c94cb314c6bd5e99b4a20340b41aea7c29237c Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 10 Sep 2025 01:08:11 +0000 Subject: [PATCH 19/98] add a utility to sample on a mesh with torch. some tweaks to enable the preprocess pipeline for inference. --- physicsnemo/datapipes/cae/domino_datapipe2.py | 138 ++++++++++----- .../datapipes/cae/drivaer_ml_dataset.py | 167 ++++++++++++++++-- physicsnemo/utils/domino/utils.py | 70 +++++++- 3 files changed, 308 insertions(+), 67 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index faaeac8543..18d198f457 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -128,7 +128,7 @@ class DoMINODataConfig: You might choose gpu_preprocessing=True and gpu_output=False if caching. """ - data_path: Path + data_path: Path | None phase: Literal["train", "val", "test"] # Surface-specific variables: @@ -161,16 +161,17 @@ class DoMINODataConfig: gpu_output: bool = True def __post_init__(self): - # Ensure data_path is a Path object: - if isinstance(self.data_path, str): - self.data_path = Path(self.data_path) - self.data_path = self.data_path.expanduser() + if self.data_path is not None: + # Ensure data_path is a Path object: + if isinstance(self.data_path, str): + self.data_path = Path(self.data_path) + self.data_path = self.data_path.expanduser() - if not self.data_path.exists(): - raise ValueError(f"Path {self.data_path} does not exist") + if not self.data_path.exists(): + raise ValueError(f"Path {self.data_path} does not exist") - if not self.data_path.is_dir(): - raise ValueError(f"Path {self.data_path} is not a directory") + if not self.data_path.is_dir(): + raise ValueError(f"Path {self.data_path} is not a directory") # Object if caching settings are impossible: if self.caching: @@ -321,13 +322,16 @@ def __init__( if self.model_type == "surface" or self.model_type == "combined": self.keys_to_read.extend(self.surface_keys) - self.dataset = DrivaerMLDataset( - data_dir=self.config.data_path, - keys_to_read=self.keys_to_read, - output_device=self.preproc_device, - pin_memory=pin_memory, - consumer_stream=torch.cuda.default_stream(), - ) + if self.config.data_path is not None: + self.dataset = DrivaerMLDataset( + data_dir=self.config.data_path, + keys_to_read=self.keys_to_read, + output_device=self.preproc_device, + pin_memory=pin_memory, + consumer_stream=torch.cuda.default_stream(), + ) + else: + self.dataset = None # This is thread storage for data preprocessing: self._preprocess_queue = {} @@ -345,7 +349,10 @@ def set_indices(self, indices: list[int]): self.indices = indices def __len__(self): - return len(self.dataset) + if self.dataset is not None: + return len(self.dataset) + else: + return 0 def compute_stl_scaling( self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None @@ -358,8 +365,6 @@ def compute_stl_scaling( s_min = torch.amin(stl_vertices, 0) s_max = torch.amax(stl_vertices, 0) - length_scale = torch.amax(s_max - s_min) - # if dynamic_bbox_scaling: # Check the bounding box is not unit length @@ -373,7 +378,7 @@ def compute_stl_scaling( surf_grid_max_min = torch.stack([s_min, s_max]) - return s_min, s_max, length_scale, surf_grid_max_min, surf_grid + return s_min, s_max, surf_grid_max_min, surf_grid @profile def process_combined( @@ -418,7 +423,7 @@ def process_surface( surface_coordinates: torch.Tensor, surface_normals: torch.Tensor, surface_sizes: torch.Tensor, - surface_fields: torch.Tensor, + surface_fields: torch.Tensor | None, ) -> dict[str, torch.Tensor]: nx, ny, nz = self.config.grid_resolution @@ -427,7 +432,8 @@ def process_surface( # Remove any sizes <= 0: idx = surface_sizes > 0 surface_sizes = surface_sizes[idx] - surface_fields = surface_fields[idx] + if surface_fields is not None: + surface_fields = surface_fields[idx] surface_normals = surface_normals[idx] surface_coordinates = surface_coordinates[idx] @@ -442,7 +448,8 @@ def process_surface( ) surface_normals = surface_normals[idx_s] surface_sizes = surface_sizes[idx_s] - surface_fields = surface_fields[idx_s] + if surface_fields is not None: + surface_fields = surface_fields[idx_s] c_max = self.config.bounding_box_dims[0] c_min = self.config.bounding_box_dims[1] @@ -457,7 +464,8 @@ def process_surface( surface_coordinates = surface_coordinates[ids_in_bbox] surface_normals = surface_normals[ids_in_bbox] surface_sizes = surface_sizes[ids_in_bbox] - surface_fields = surface_fields[ids_in_bbox] + if surface_fields is not None: + surface_fields = surface_fields[ids_in_bbox] # Compute the positional encoding before sampling if self.config.positional_encoding: @@ -548,11 +556,15 @@ def process_surface( if self.config.scaling_type == "mean_std_scaling": surf_mean = self.config.surface_factors[0] surf_std = self.config.surface_factors[1] - surface_fields = standardize(surface_fields, surf_mean, surf_std) + if surface_fields is not None: + surface_fields = standardize( + surface_fields, surf_mean, surf_std + ) elif self.config.scaling_type == "min_max_scaling": surf_min = self.config.surface_factors[1] surf_max = self.config.surface_factors[0] - surface_fields = normalize(surface_fields, surf_max, surf_min) + if surface_fields is not None: + surface_fields = normalize(surface_fields, surf_max, surf_min) return_dict.update( { @@ -563,9 +575,10 @@ def process_surface( "surface_neighbors_normals": surface_neighbors_normals, "surface_areas": surface_sizes, "surface_neighbors_areas": surface_neighbors_sizes, - "surface_fields": surface_fields, } ) + if surface_fields is not None: + return_dict["surface_fields"] = surface_fields return return_dict @@ -574,7 +587,7 @@ def process_volume( s_min: torch.Tensor, s_max: torch.Tensor, volume_coordinates: torch.Tensor, - volume_fields: torch.Tensor, + volume_fields: torch.Tensor | None, stl_vertices: torch.Tensor, mesh_indices_flattened: torch.Tensor, center_of_mass: torch.Tensor, @@ -602,7 +615,8 @@ def process_volume( ids_in_bbox = ids_in_bbox.all(dim=1) volume_coordinates = volume_coordinates[ids_in_bbox] - volume_fields = volume_fields[ids_in_bbox] + if volume_fields is not None: + volume_fields = volume_fields[ids_in_bbox] dx, dy, dz = ( (c_max[0] - c_min[0]) / nx, @@ -646,8 +660,8 @@ def process_volume( mode="constant", value=-10.0, ) - - volume_fields = volume_fields[idx_volume] + if volume_fields is not None: + volume_fields = volume_fields[idx_volume] volume_coordinates = volume_coordinates_sampled # Get the SDF of all the selected volume coordinates, @@ -684,11 +698,13 @@ def process_volume( if self.config.scaling_type == "mean_std_scaling": vol_mean = self.config.volume_factors[0] vol_std = self.config.volume_factors[1] - volume_fields = standardize(volume_fields, vol_mean, vol_std) + if volume_fields is not None: + volume_fields = standardize(volume_fields, vol_mean, vol_std) elif self.config.scaling_type == "min_max_scaling": vol_min = self.config.volume_factors[1] vol_max = self.config.volume_factors[0] - volume_fields = normalize(volume_fields, vol_max, vol_min) + if volume_fields is not None: + volume_fields = normalize(volume_fields, vol_max, vol_min) vol_grid_max_min = torch.stack([c_min, c_max]) @@ -699,11 +715,12 @@ def process_volume( "grid": grid, "sdf_grid": sdf_grid, "sdf_nodes": sdf_nodes, - "volume_fields": volume_fields, "volume_mesh_centers": volume_coordinates, "volume_min_max": vol_grid_max_min, } ) + if volume_fields is not None: + return_dict["volume_fields"] = volume_fields return return_dict @@ -724,10 +741,8 @@ def process_data(self, data_dict, idx: int): # This function gets information about the surface scale, # and decides what the surface grid will be: - (s_min, s_max, length_scale, surf_grid_max_min, surf_grid) = ( - self.compute_stl_scaling( - data_dict["stl_coordinates"], self.config.bounding_box_dims_surf - ) + (s_min, s_max, surf_grid_max_min, surf_grid) = self.compute_stl_scaling( + data_dict["stl_coordinates"], self.config.bounding_box_dims_surf ) # This is a center of mass computation for the stl surface, @@ -742,7 +757,6 @@ def process_data(self, data_dict, idx: int): return_dict.update( { - "length_scale": length_scale, "surface_min_max": surf_grid_max_min, } ) @@ -767,7 +781,9 @@ def process_data(self, data_dict, idx: int): s_min, s_max, volume_coordinates=data_dict["volume_mesh_centers"], - volume_fields=data_dict["volume_fields"], + volume_fields=data_dict["volume_fields"] + if "volume_fields" in data_dict + else None, stl_vertices=data_dict["stl_coordinates"], mesh_indices_flattened=mesh_indices_flattened, center_of_mass=center_of_mass, @@ -784,7 +800,9 @@ def process_data(self, data_dict, idx: int): surface_coordinates=data_dict["surface_mesh_centers"], surface_normals=data_dict["surface_normals"], surface_sizes=data_dict["surface_areas"], - surface_fields=data_dict["surface_fields"], + surface_fields=data_dict["surface_fields"] + if "surface_fields" in data_dict + else None, ) return_dict.update(surface_dict) @@ -798,6 +816,9 @@ def __getitem__(self, idx): are relatively large due to the mesh size. """ + if self.dataset is None: + raise ValueError("Dataset is not present") + index = self.idx_to_index(idx) # Get the preprocessed data: @@ -831,6 +852,9 @@ def preprocess(self, idx: int) -> None: Start preprocessing for the given index (1 step ahead). This processes preloaded data or loads it if not available. """ + if self.dataset is None: + raise ValueError("Dataset is not present") + if idx in self._preprocess_queue: # Skip items that are already being preprocessed return @@ -869,6 +893,10 @@ def __next__(self): # - the preprocessing pipe has to implicitly wait for idx +1 in the dataset # - wait for the preprocessing pipe at idx to finish # return the data. + + if self.dataset is None: + raise ValueError("Dataset is not present") + N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) if self.i >= N: @@ -894,6 +922,9 @@ def __iter__(self): # at idx = 0, idx = 1 # Start preprocessing at idx = 0, when the load completes + if self.dataset is None: + raise ValueError("Dataset is not present") + self.i = 0 N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) @@ -1087,12 +1118,25 @@ def __getitem__(self, idx): def create_domino_dataset( - cfg, phase, volume_variable_names, surface_variable_names, vol_factors, surf_factors + cfg: DictConfig, + phase: Literal["train", "val", "test"], + volume_variable_names: list[str], + surface_variable_names: list[str], + vol_factors: list[float], + surf_factors: list[float], + normalize_coordinates: bool = True, + sample_in_bbox: bool = True, + sampling: bool = True, ): if phase == "train": input_path = cfg.data.input_dir + model_type = cfg.model.model_type elif phase == "val": input_path = cfg.data.input_dir_val + model_type = cfg.model.model_type + elif phase == "test": + input_path = cfg.eval.test_path + model_type = "inference" else: raise ValueError(f"Invalid phase {phase}") @@ -1100,7 +1144,7 @@ def create_domino_dataset( return CachedDoMINODataset( input_path, phase=phase, - sampling=True, + sampling=sampling, volume_points_sample=cfg.model.volume_points_sample, surface_points_sample=cfg.model.surface_points_sample, geom_points_sample=cfg.model.geom_points_sample, @@ -1121,9 +1165,9 @@ def create_domino_dataset( grid_resolution=cfg.model.interp_res, volume_variables=volume_variable_names, surface_variables=surface_variable_names, - normalize_coordinates=True, - sampling=True, - sample_in_bbox=True, + normalize_coordinates=normalize_coordinates, + sampling=sampling, + sample_in_bbox=sample_in_bbox, volume_points_sample=cfg.model.volume_points_sample, surface_points_sample=cfg.model.surface_points_sample, geom_points_sample=cfg.model.geom_points_sample, @@ -1131,7 +1175,7 @@ def create_domino_dataset( volume_factors=vol_factors, surface_factors=surf_factors, scaling_type=cfg.model.normalization, - model_type=cfg.model.model_type, + model_type=model_type, bounding_box_dims=cfg.data.bounding_box, bounding_box_dims_surf=cfg.data.bounding_box_surface, num_surface_neighbors=cfg.model.num_neighbors_surface, diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index c9871db8c0..17f486fb6c 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -20,7 +20,6 @@ from concurrent.futures import ThreadPoolExecutor import numpy as np -import psutil import torch import zarr @@ -31,25 +30,17 @@ except ImportError: TENSORSTORE_AVAILABLE = False -from physicsnemo.distributed import ShardTensor, ShardTensorSpec - -# from physicsnemo.distributed.utils import compute_split_shapes - -# For use on systems where cpu_affinity is not available: -psutil_process = psutil.Process() - +try: + import pyvista as pv -class FakeProcess: - """ - Enable a fake cpu affinity setting if it's not available - """ + PV_AVAILABLE = True +except ImportError: + PV_AVAILABLE = False - def cpu_affinity(self, cpus: list[int] | None) -> None: - pass +from physicsnemo.distributed import ShardTensor, ShardTensorSpec +# from physicsnemo.distributed.utils import compute_split_shapes -if not hasattr(psutil_process, "cpu_affinity"): - psutil_process = FakeProcess() # Abstractions: # - want to read npy/npz/.zarr/.stl/.vtp files @@ -187,7 +178,128 @@ def read_file_sharded( """ Read a file and return a dictionary of tensors. """ - pass + raise NotImplementedError("Not implemented yet.") + + +if PV_AVAILABLE: + + class VTKFileReader(BackendReader): + """ + Reader for vtk files. + """ + + def __init__(self, keys_to_read: list[str] | None) -> None: + super().__init__(keys_to_read) + + self.stl_file_keys = [ + "stl_coordinates", + "stl_centers", + "stl_faces", + "stl_areas", + ] + self.vtp_file_keys = [ + "surface_mesh_centers", + "surface_normals", + "surface_mesh_sizes", + "CpMeanTrim", + "pMeanTrim", + "wallShearStressMeanTrim", + ] + self.vtu_file_keys = [ + "volume_mesh_centers", + "volume_fields", + ] + + self.exclude_patterns = [ + "single_solid", + ] + + def get_file_name(self, dir_name: pathlib.Path, extension: str) -> pathlib.Path: + """ + Get the file name for a given directory and extension. + """ + # >>> matches = [p for p in list(dir_name.iterdir()) if p.suffix == ".stl" and not any(pattern in p.name for pattern in exclude_patterns)] + matches = [ + p + for p in dir_name.iterdir() + if p.suffix == extension + and not any(pattern in p.name for pattern in self.exclude_patterns) + ] + if len(matches) == 0: + raise FileNotFoundError(f"No {extension} files found in {dir_name}") + fname = matches[0] + return dir_name / fname + + def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: + """ + Read a set of files and return a dictionary of tensors. + """ + + # This reader attempts to only read what's necessary, and not more. + # So, the functions that do the reading are each "one file" functions + # and we open them for processing only when necessary. + + return_data = {} + + # Note that this reader is, already, running in a background thread. + # It may or may not help to further thread these calls. + if any(key in self.stl_file_keys for key in self.keys_to_read): + stl_path = self.get_file_name(filename, ".stl") + stl_data = self.read_data_from_stl(stl_path) + return_data.update(stl_data) + if any(key in self.vtp_file_keys for key in self.keys_to_read): + vtp_path = self.get_file_name(filename, ".vtp") + vtp_data = self.read_data_from_vtp(vtp_path) + return_data.update(vtp_data) + if any(key in self.vtu_file_keys for key in self.keys_to_read): + raise NotImplementedError("VTU files are not supported yet.") + + return return_data + + def read_file_sharded( + self, filename: pathlib.Path, parallel_rank: int, parallel_size: int + ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + """ + Read a file and return a dictionary of tensors. + """ + raise NotImplementedError("Not implemented yet.") + + def read_data_from_stl( + self, + stl_path: str, + ) -> dict: + """ + Reads surface mesh data from an STL file and prepares a batch dictionary for inference. + + Args: + stl_path (str): Path to the STL file. + + Returns: + dict: Batch dictionary with mesh faces and coordinates as torch tensors. + """ + + mesh = pv.read(stl_path) + + batch = {} + + faces = mesh.faces.reshape(-1, 4) + faces = faces[:, 1:] + + batch["stl_faces"] = faces.flatten() + + batch["stl_coordinates"] = mesh.points + batch["surface_normals"] = mesh.cell_normals + + batch = {k: torch.from_numpy(v) for k, v in batch.items()} + + return batch + + def read_data_from_vtp(self, vtp_path: str) -> dict: + """ + Read vtp file from a file + """ + + raise NotImplementedError("Not implemented yet.") if TENSORSTORE_AVAILABLE: @@ -253,7 +365,20 @@ class TensorStoreZarrReader(BackendReader): Null reader for tensorstore zarr files. """ - pass + def __init__(self, keys_to_read: list[str] | None) -> None: + # Raise an exception on construction if we get here: + raise NotImplementedError( + "TensorStoreZarrReader is not available without tensorstore. `pip install tensorstore`." + ) + + +def is_vtk_directory(file: pathlib.Path) -> bool: + """ + Check if a file is a vtk directory. + """ + return file.is_dir() and all( + [f.suffix in [".vtp", ".stl", ".vtu", ".vtk", ".csv"] for f in file.iterdir()] + ) class DrivaerMLDataset: @@ -371,9 +496,13 @@ def _infer_file_type_and_filenames( else: file_reader = ZarrFileReader(self._keys_to_read) return file_reader, files + elif all(is_vtk_directory(file) for file in files): + file_reader = VTKFileReader(self._keys_to_read) + return file_reader, files + # Each "file" here is a directory of .vtp, stl, etc. else: # TODO - support folders of stl, vtp, vtu. - raise ValueError(f"Unsupported file type: {files}") + raise ValueError(f"Unsupported file type: {files[0]}") def _move_to_gpu( self, data: dict[str, torch.Tensor], idx: int diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index 3abb968c5a..336a411497 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -483,7 +483,7 @@ def shuffle_array( for w, p in zip(chunk_weights, points_per_chunk) ] - # Stich the chunks back together: + # Stitch the chunks back together: idx = torch.cat(idx_chunks) # Apply the selection: @@ -870,3 +870,71 @@ def solution_weighted_shuffle_array( sampling_probabilities /= sampling_probabilities.sum() # Normalize to sum to 1 return shuffle_array(arr, n_points, sampling_probabilities) + + +def sample_points_on_mesh( + mesh_coordinates: torch.Tensor, + mesh_faces: torch.Tensor, + n_points: int, + mesh_areas: torch.Tensor | None = None, + mesh_normals: torch.Tensor | None = None, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Uniformly sample points on a mesh. + + Will use area-weighted sampling to select mesh regions, then uniform + sampling within those triangles. + """ + + # First, if we don't have the areas, compute them: + faces_reshaped = mesh_faces.reshape(-1, 3) + + if mesh_areas is None or mesh_normals is None: + # We have to do 90% of the work for both of these, + # to get either. So check at the last minute: + faces_reshaped_p0 = faces_reshaped[:, 0] + faces_reshaped_p1 = faces_reshaped[:, 1] + faces_reshaped_p2 = faces_reshaped[:, 2] + d1 = mesh_coordinates[faces_reshaped_p1] - mesh_coordinates[faces_reshaped_p0] + d2 = mesh_coordinates[faces_reshaped_p2] - mesh_coordinates[faces_reshaped_p0] + inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1) + normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1) + inferred_mesh_normals = inferred_mesh_normals / normals_norm.unsqueeze(1) + if mesh_normals is None: + mesh_normals = inferred_mesh_normals + if mesh_areas is None: + mesh_areas = 0.5 * normals_norm + + # Next, use the areas to compute a weighted sampling of the triangles: + target_triangles = torch.multinomial( + mesh_areas, + n_points, + replacement=True, + ) + + target_faces = faces_reshaped[target_triangles] + + # Next, generate random points within each selected triangle. + # We'll map two uniform distributions to the points in the triangles. + # See https://stackoverflow.com/questions/47410054/generate-random-locations-within-a-triangular-domain + # and the original reference https://www.cs.princeton.edu/%7Efunk/tog02.pdf + # for more information + r1 = torch.rand((n_points, 1), device=mesh_coordinates.device) + r2 = torch.rand((n_points, 1), device=mesh_coordinates.device) + + s1 = torch.sqrt(r1) + + local_coords = torch.stack( + (1.0 - s1, (1.0 - r2) * s1, r2 * s1), + dim=1, + ) + + barycentric_coordinates = torch.sum( + mesh_coordinates[target_faces] * local_coords, dim=1 + ) + + # Apply the selection to the other tensors, too: + target_areas = mesh_areas[target_triangles] + target_normals = mesh_normals[target_triangles] + + return barycentric_coordinates, target_triangles, target_areas, target_normals From 471aae99866d6eec401ac1d130a4e14e60075bae Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 10 Sep 2025 01:10:05 +0000 Subject: [PATCH 20/98] Add revamped inference script. Doesn't write outputs yet. --- .../external_aerodynamics/domino/README.md | 74 +++- .../domino/src/inference_on_stl2.py | 378 ++++++++++++++++++ .../external_aerodynamics/domino/src/train.py | 16 +- 3 files changed, 460 insertions(+), 8 deletions(-) create mode 100644 examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md index d66d42f45a..470a6a5f46 100644 --- a/examples/cfd/external_aerodynamics/domino/README.md +++ b/examples/cfd/external_aerodynamics/domino/README.md @@ -77,19 +77,24 @@ please refer to their [paper](https://arxiv.org/pdf/2408.11969). #### Data Preprocessing -`PhysicsNeMo` has a related project to help with data processing, called [PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator). +`PhysicsNeMo` has a related project to help with data processing, called +[PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator). Using `PhysicsNeMo-Curator`, the data needed to train a DoMINO model can be setup easily. -Please refer to [these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator) +Please refer to +[these instructions on getting started](https://github.com/NVIDIA/physicsnemo-curator?tab=readme-ov-file#what-is-physicsnemo-curator) with `PhysicsNeMo-Curator`. -Download the DrivAer ML dataset using the [provided instructions in PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md#download-drivaerml-dataset). +Download the DrivAer ML dataset using the +[provided instructions in PhysicsNeMo-Curator](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md#download-drivaerml-dataset). The first step for running the DoMINO pipeline requires processing the raw data (vtp, vtu and stl) into either Zarr or NumPy format for training. Each of the raw simulations files are downloaded in `vtp`, `vtu` and `stl` formats. For instructions on running data processing to produce a DoMINO training ready dataset, -please refer to [How-to Curate data for DoMINO Model](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md). +please refer to +[How-to Curate data for DoMINO Model](https://github.com/NVIDIA/physicsnemo-curator/blob/main/examples/external_aerodynamics/domino/README.md). -Caching is implemented in [`CachedDoMINODataset`](https://github.com/NVIDIA/physicsnemo/blob/main/physicsnemo/datapipes/cae/domino_datapipe.py#L1250). +Caching is implemented in +[`CachedDoMINODataset`](https://github.com/NVIDIA/physicsnemo/blob/main/physicsnemo/datapipes/cae/domino_datapipe.py#L1250). Optionally, users can run `cache_data.py` to save outputs of DoMINO datapipe in the `.npy` files. The DoMINO datapipe is set up to calculate Signed Distance Field and Nearest Neighbor interpolations on-the-fly during @@ -101,6 +106,36 @@ processed files. The final processed dataset should be divided and saved into 2 directories, for training and validation. +#### Data Scaling factors + +DoMINO has several data-specific configuration tools that rely on some +knowledge of the dataset: + +- The output fields (the labels) are normalized during training to a mean + of zero and a standard deviation of one, averaged over the dataset. + The scaling is controlled by passing the `volume_factors` and + `surface_factors` values to the datapipe. +- The input locations are scaled by, and optionally cropped to, used defined + bounding boxes for both surface and volume. Whether cropping occurs, or not, + is controlled by the `sample_in_bbox` value of the datapipe. Normalization + to the bounding box is enabled with `normalize_coordinates`. By default, + both are set to true. The value of the boxes are configured in the + `config.yaml` file, and are configured separately for surface and volume. + +> Note: The datapipe module has a helper function `create_domino_dataset` +> with sensible defaults to help create a Domino Datapipe. + +To facilitate setting reasonable values of these, you can use the +`compute_statistics.py` script. This will load the core dataset as defined +in your `config.yaml` file, loop over several events (20, by default), and +both print and store the surface/volume field statistics as well as the +coordinate statistics. + +> Note that, for volumetric fields especially, the min/max found may be +> significantly outside the surface region. Many simulations extend volumetric +> sampling to far field, and you may instead want to crop significant amounts +> of volumetric distance. + #### Training Specify the training and validation data paths, bounding box sizes etc. in the @@ -189,6 +224,35 @@ launch overhead at the cost of more memory use. For non-sharded training, the `two-loop` setting is more optimal. The difference in `one-loop` or `two-loop` is purely computational, not algorithmic. +### Performance Optimizations + +The training and inference scripts for DoMINO contain several performance +enhancements to accelerate the training and usage of the model. In this +section we'll highlight several of them, as well as how to customize them +if needed. + +#### Memory Pool Optimizations + +The preprocessor of DoMINO requires a computation of k Nearest Neighbors, +which is accelerated via the `cuml` Neighbors tool. By default, `cuml` and +`torch` both use memory allocation pools to speed up allocating tensors, but +they do not use the same pool. This means that during preprocessing, it's +possible for the kNN operation to spend a significant amount of time in +memory allocations - and further, it limits the available memory to `torch`. + +To mitigate this, by default in DoMINO we use the Rapids Memory Manager +([`rmm`](https://github.com/rapidsai/rmm)). If, for some reason, you wish +to disable this you can do so with an environment variable: + +```bash +export DOMINO_DISABLE_RMM=True +``` + +> Note - why not make it configurable? We have to set up the shared memory +> pool allocation very early in the program, before the config has even +> been read. So, we enable by default and the opt-out path is via the +> environment. + ### Training with Physics Losses DoMINO supports enforcing of PDE residuals as soft constraints. This can be used diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py new file mode 100644 index 0000000000..7b4f71b507 --- /dev/null +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py @@ -0,0 +1,378 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code defines a distributed pipeline for training the DoMINO model on +CFD datasets. It includes the computation of scaling factors, instantiating +the DoMINO model and datapipe, automatically loading the most recent checkpoint, +training the model in parallel using DistributedDataParallel across multiple +GPUs, calculating the loss and updating model parameters using mixed precision. +This is a common recipe that enables training of combined models for surface and +volume as well either of them separately. Validation is also conducted every epoch, +where predictions are compared against ground truth values. The code logs training +and validation metrics to TensorBoard. The train tab in config.yaml can be used to +specify batch size, number of epochs and other training parameters. +""" + +import time +import os +import re +from typing import Literal, Any + +import apex +import numpy as np +import hydra +from hydra.utils import to_absolute_path +from omegaconf import DictConfig, OmegaConf +import torch + +DISABLE_RMM = os.environ.get("DISABLE_RMM", "False") +if not DISABLE_RMM: + import rmm + from rmm.allocators.torch import rmm_torch_allocator + + rmm.reinitialize(pool_allocator=True) + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + +import torchinfo +import torch.distributed as dist +from torch.amp import GradScaler, autocast +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from torch.utils.tensorboard import SummaryWriter +from nvtx import annotate as nvtx_annotate +import torch.cuda.nvtx as nvtx + + +from physicsnemo.distributed import DistributedManager +from physicsnemo.launch.utils import load_checkpoint, save_checkpoint +from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper + +from physicsnemo.datapipes.cae.domino_datapipe2 import ( + DoMINODataPipe, + create_domino_dataset, +) +from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( + DrivaerMLDataset, +) + +from physicsnemo.models.domino.model import DoMINO +from physicsnemo.utils.domino.utils import sample_points_on_mesh + +from utils import ScalingFactors + +# This is included for GPU memory tracking: +from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo +import time + + +# Initialize NVML +nvmlInit() + + +from physicsnemo.utils.profiling import profile, Profiler + + +# Profiler().enable("torch") +# Profiler().initialize() + +from loss import compute_loss_dict +from utils import get_num_vars + + +def inference_epoch( + dataset: DrivaerMLDataset, + sampler: DistributedSampler, + datapipe: DoMINODataPipe, + model: DoMINO, + gpu_handle: int, + device: torch.device, + logger: PythonLogger, + batch_size: int = 1_024_000, + total_points: int = 1_024_000, +) -> float: + epoch_indices = list(sampler) + + # n_steps = total_points // batch_size + # if n_steps * batch_size < total_points: + # n_steps += 1 + # last_batch_size = total_points - n_steps * batch_size + + # Assuming here there are more than two target meshes: + dataset.preload(epoch_indices[0]) + dataset.preload(epoch_indices[1]) + for i_batch, epoch_index in enumerate(epoch_indices): + # Do some preloading of input data: + + data_time_start = time.perf_counter() + if i_batch + 2 < len(epoch_indices): + # Preload next next + dataset.preload(epoch_indices[i_batch + 2]) + # Get the data for this index: + sample_batched = dataset[epoch_index] + data_time_end = time.perf_counter() + print(f"Data {i_batch} time: {data_time_end - data_time_start:.3f} seconds") + procesing_time_start = time.perf_counter() + # We always need these keys, but are only reading the faces and coordinates + # which saves on IO speed. + # "stl_coordinates", "stl_centers", "stl_faces", "stl_areas" + + # So, do the computation of the areas and centers: + # Center is a mean of the 3 vertices + triangle_vertices = sample_batched["stl_coordinates"][ + sample_batched["stl_faces"].reshape((-1, 3)) + ] + sample_batched["stl_centers"] = triangle_vertices.mean(dim=-1) + # Area we compute from the cross product of two sides: + d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0] + d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0] + inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1) + normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1) + sample_batched["stl_areas"] = 0.5 * normals_norm + + for i in range(10): + batch_time_start = time.perf_counter() + # Now that we have the meshes, begin to build a batch of data up for preprocessing: + sampled_points, sampled_faces, sampled_areas, sampled_normals = ( + sample_points_on_mesh( + sample_batched["stl_coordinates"], + sample_batched["stl_faces"], + batch_size, + mesh_normals=sample_batched["surface_normals"], + mesh_areas=sample_batched["stl_areas"], + ) + ) + + # Build up volume points too: + c_min = datapipe.config.bounding_box_dims[1] + c_max = datapipe.config.bounding_box_dims[0] + + sampled_volume_points = (c_max - c_min) * torch.rand( + batch_size, 3, device=device, dtype=torch.float32 + ) + c_min + + inference_dict = { + "stl_coordinates": sample_batched["stl_coordinates"], + "stl_faces": sample_batched["stl_faces"], + "stl_centers": sample_batched["stl_centers"], + "stl_areas": sample_batched["stl_areas"], + "surface_mesh_centers": sampled_points, + "surface_normals": sampled_normals, + "surface_areas": sampled_areas, + "surface_faces": sampled_faces, + "volume_mesh_centers": sampled_volume_points, + } + + preprocessed_data = datapipe.process_data(inference_dict, i_batch) + + # Add a batch dimension to the data_dict + preprocessed_data = { + k: v.unsqueeze(0) for k, v in preprocessed_data.items() + } + + with torch.no_grad(): + output_data = model(preprocessed_data) + + batch_time_end = time.perf_counter() + points_per_second = batch_size / (batch_time_end - batch_time_start) + print( + f"Batch {i} in {i_batch} time: {batch_time_end - batch_time_start:.3f} seconds, {points_per_second:.3f} points per second" + ) + procesing_time_end = time.perf_counter() + print( + f"Processing {i_batch} time: {procesing_time_end - procesing_time_start:.3f} seconds" + ) + if i_batch > 20: + break + print(sample_batched.keys()) + + return 0.0 + + +@hydra.main(version_base="1.3", config_path="conf", config_name="config") +def main(cfg: DictConfig) -> None: + ################################ + # initialize distributed manager + ################################ + DistributedManager.initialize() + dist = DistributedManager() + + ################################ + # Initialize NVML + ################################ + nvmlInit() + gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index) + + ################################ + # Initialize logger + ################################ + + logger = PythonLogger("Train") + logger = RankZeroLoggingWrapper(logger, dist) + + logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") + + ################################ + # Get scaling factors + ################################ + pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" + + try: + scaling_factors = ScalingFactors.load(pickle_path) + logger.info(f"Scaling factors loaded from: {pickle_path}") + except FileNotFoundError: + raise FileNotFoundError( + f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." + ) + + model_type = cfg.model.model_type + + num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type) + + if model_type == "combined" or model_type == "surface": + surface_variable_names = list(cfg.variables.surface.solution.keys()) + else: + surface_variable_names = [] + + if model_type == "combined" or model_type == "volume": + volume_variable_names = list(cfg.variables.volume.solution.keys()) + else: + volume_variable_names = [] + + vol_factors = scaling_factors.mean["volume_fields"] + surf_factors = scaling_factors.mean["surface_fields"] + vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + + bounding_box = None + + # Override the model type + # For the inference pipeline, we adjust the tooling a little for the data. + # We use only a bare STL dataset that will read the mesh coordinates + # and triangle definitions. We'll compute the centers and normals + # on the GPU (instead of on the CPU, as pyvista would do) and + # then we can sample from that mesh on the GPU. + test_dataset = DrivaerMLDataset( + data_dir=cfg.eval.test_path, + keys_to_read=[ + "stl_coordinates", + "stl_faces", + ], + output_device=dist.device, + ) + + # Volumetric data will be generated on the fly on the GPU. + + # We _won't_ iterate over the datapipe, however, we can use the + # datapipe processing tools on the sampled surface and + overrides = {} + if hasattr(cfg.data, "gpu_preprocessing"): + overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing + + if hasattr(cfg.data, "gpu_output"): + overrides["gpu_output"] = cfg.data.gpu_output + + test_datapipe = DoMINODataPipe( + None, + phase="test", + grid_resolution=cfg.model.interp_res, + volume_variables=volume_variable_names, + surface_variables=surface_variable_names, + normalize_coordinates=True, + sampling=False, + sample_in_bbox=True, + volume_points_sample=None, + surface_points_sample=None, + geom_points_sample=None, + positional_encoding=cfg.model.positional_encoding, + volume_factors=vol_factors, + surface_factors=surf_factors, + scaling_type=cfg.model.normalization, + model_type=model_type, + bounding_box_dims=cfg.data.bounding_box, + bounding_box_dims_surf=cfg.data.bounding_box_surface, + num_surface_neighbors=cfg.model.num_neighbors_surface, + resample_surfaces=cfg.model.resampling_surface_mesh.resample, + resampling_points=cfg.model.resampling_surface_mesh.points, + surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, + **overrides, + ) + + test_sampler = DistributedSampler( + test_dataset, + num_replicas=dist.world_size, + rank=dist.rank, + **cfg.train.sampler, + ) + + model = DoMINO( + input_features=3, + output_features_vol=num_vol_vars, + output_features_surf=num_surf_vars, + global_features=num_global_features, + model_parameters=cfg.model, + ).to(dist.device) + # model = torch.compile(model, fullgraph=True, dynamic=True) # TODO make this configurable + + # Print model summary (structure and parmeter count). + logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n") + + writer = SummaryWriter(os.path.join(cfg.output, "tensorboard")) + + model_save_path = os.path.join(cfg.output, "models") + param_save_path = os.path.join(cfg.output, "param") + best_model_path = os.path.join(model_save_path, "best_model") + + if dist.world_size > 1: + torch.distributed.barrier() + + load_checkpoint( + to_absolute_path(cfg.resume_dir), + models=model, + device=dist.device, + ) + + initial_integral_factor_orig = cfg.model.integral_loss_scaling_factor + + start_time = time.perf_counter() + + # This controls what indices to use for each epoch. + test_sampler.set_epoch(0) + + initial_integral_factor = initial_integral_factor_orig + + model.eval() + epoch_start_time = time.perf_counter() + inference_epoch( + dataset=test_dataset, + sampler=test_sampler, + datapipe=test_datapipe, + model=model, + logger=logger, + gpu_handle=gpu_handle, + device=dist.device, + ) + epoch_end_time = time.perf_counter() + logger.info( + f"Device {dist.device}, Epoch took {epoch_end_time - epoch_start_time:.3f} seconds" + ) + + +if __name__ == "__main__": + # Profiler().enable("torch") + # Profiler().initialize() + main() + # Profiler().finalize() diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 2ff363e40a..5a155fc198 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -30,9 +30,6 @@ import time import os import re -import torch -import torchinfo - from typing import Literal, Any import apex @@ -40,6 +37,18 @@ import hydra from hydra.utils import to_absolute_path from omegaconf import DictConfig, OmegaConf + + +DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", "False") +if not DISABLE_RMM: + import rmm + from rmm.allocators.torch import rmm_torch_allocator + import torch + + rmm.reinitialize(pool_allocator=True) + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + +import torchinfo import torch.distributed as dist from torch.amp import GradScaler, autocast from torch.nn.parallel import DistributedDataParallel @@ -67,6 +76,7 @@ from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo import time + # Initialize NVML nvmlInit() From 386483013a3d2856e28f4b4f10372306ff2a4a5d Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 10 Sep 2025 15:11:44 +0000 Subject: [PATCH 21/98] Update inference script for Domino STL inference. --- .../external_aerodynamics/domino/README.md | 2 +- .../domino/src/inference_on_stl2.py | 261 ++++++++++++++---- physicsnemo/datapipes/cae/domino_datapipe2.py | 34 +++ physicsnemo/utils/domino/utils.py | 1 - 4 files changed, 237 insertions(+), 61 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md index 470a6a5f46..a786772071 100644 --- a/examples/cfd/external_aerodynamics/domino/README.md +++ b/examples/cfd/external_aerodynamics/domino/README.md @@ -113,7 +113,7 @@ knowledge of the dataset: - The output fields (the labels) are normalized during training to a mean of zero and a standard deviation of one, averaged over the dataset. - The scaling is controlled by passing the `volume_factors` and + The scaling is controlled by passing the `volume_factors` andg `surface_factors` values to the datapipe. - The input locations are scaled by, and optionally cropped to, used defined bounding boxes for both surface and volume. Whether cropping occurs, or not, diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py index 7b4f71b507..9a44f996e9 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py @@ -57,7 +57,6 @@ from nvtx import annotate as nvtx_annotate import torch.cuda.nvtx as nvtx - from physicsnemo.distributed import DistributedManager from physicsnemo.launch.utils import load_checkpoint, save_checkpoint from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper @@ -87,9 +86,6 @@ from physicsnemo.utils.profiling import profile, Profiler -# Profiler().enable("torch") -# Profiler().initialize() - from loss import compute_loss_dict from utils import get_num_vars @@ -102,9 +98,15 @@ def inference_epoch( gpu_handle: int, device: torch.device, logger: PythonLogger, - batch_size: int = 1_024_000, + batch_size: int = 24_000, total_points: int = 1_024_000, ) -> float: + ###################################################### + # Inference can run in a distributed way by coordinating + # the indices for each rank, which the sampler does + ###################################################### + + # Convert the indices right to a list: epoch_indices = list(sampler) # n_steps = total_points // batch_size @@ -112,41 +114,73 @@ def inference_epoch( # n_steps += 1 # last_batch_size = total_points - n_steps * batch_size - # Assuming here there are more than two target meshes: + ###################################################### + # Assuming here there are more than two target meshes + # This will get the IO pipe running in the background + # While we process a dataset. + ###################################################### dataset.preload(epoch_indices[0]) dataset.preload(epoch_indices[1]) - for i_batch, epoch_index in enumerate(epoch_indices): - # Do some preloading of input data: - data_time_start = time.perf_counter() + for i_batch, epoch_index in enumerate(epoch_indices): + batch_start_time = time.perf_counter() + ###################################################### + # Put another example in the preload queue while this + # batch is processed + ###################################################### + data_loading_start = time.perf_counter() if i_batch + 2 < len(epoch_indices): # Preload next next dataset.preload(epoch_indices[i_batch + 2]) + + ###################################################### # Get the data for this index: + ###################################################### sample_batched = dataset[epoch_index] - data_time_end = time.perf_counter() - print(f"Data {i_batch} time: {data_time_end - data_time_start:.3f} seconds") + dataloading_time = time.perf_counter() - data_loading_start + + logger.info( + f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds" + ) + procesing_time_start = time.perf_counter() - # We always need these keys, but are only reading the faces and coordinates - # which saves on IO speed. - # "stl_coordinates", "stl_centers", "stl_faces", "stl_areas" - # So, do the computation of the areas and centers: + ###################################################### + # The IO only reads in "stl_faces" and "stl_coordinates". + # "stl_areas" and "stl_centers" would be computed by + # pyvista on CPU - instead, we do it on the GPU + # right here. + ###################################################### + # Center is a mean of the 3 vertices triangle_vertices = sample_batched["stl_coordinates"][ sample_batched["stl_faces"].reshape((-1, 3)) ] sample_batched["stl_centers"] = triangle_vertices.mean(dim=-1) + ###################################################### # Area we compute from the cross product of two sides: + ###################################################### d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0] d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0] inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1) normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1) sample_batched["stl_areas"] = 0.5 * normals_norm - for i in range(10): - batch_time_start = time.perf_counter() - # Now that we have the meshes, begin to build a batch of data up for preprocessing: + ###################################################### + # For computing the points, we take those stl objects, + # sample in chunks of `batch_size` until we've + # accumulated `total_points` predictions. + ###################################################### + + batch_output_dict = {} + N = 2 + total_points_processed = 0 + while total_points_processed < total_points: + inner_loop_start_time = time.perf_counter() + + ###################################################### + # This function will sample points on the STL surface + ###################################################### sampled_points, sampled_faces, sampled_areas, sampled_normals = ( sample_points_on_mesh( sample_batched["stl_coordinates"], @@ -157,7 +191,11 @@ def inference_epoch( ) ) - # Build up volume points too: + ###################################################### + # Build up volume points too with uniform sampling + # TODO - this doesn't filter points that are + # internal to the mesh + ###################################################### c_min = datapipe.config.bounding_box_dims[1] c_max = datapipe.config.bounding_box_dims[0] @@ -165,6 +203,9 @@ def inference_epoch( batch_size, 3, device=device, dtype=torch.float32 ) + c_min + ###################################################### + # Create the dictionary as the preprocessing expects: + ###################################################### inference_dict = { "stl_coordinates": sample_batched["stl_coordinates"], "stl_faces": sample_batched["stl_faces"], @@ -177,58 +218,113 @@ def inference_epoch( "volume_mesh_centers": sampled_volume_points, } + ###################################################### + # Pre-process the data with the datapipe: + ###################################################### preprocessed_data = datapipe.process_data(inference_dict, i_batch) + ###################################################### # Add a batch dimension to the data_dict + # (normally this is added in __getitem__ of the datapipe) + ###################################################### preprocessed_data = { k: v.unsqueeze(0) for k, v in preprocessed_data.items() } + ###################################################### + # Forward pass through the model: + ###################################################### with torch.no_grad(): - output_data = model(preprocessed_data) - - batch_time_end = time.perf_counter() - points_per_second = batch_size / (batch_time_end - batch_time_start) - print( - f"Batch {i} in {i_batch} time: {batch_time_end - batch_time_start:.3f} seconds, {points_per_second:.3f} points per second" + output_vol, output_surf = model(preprocessed_data) + + ###################################################### + # unnormalize the outputs with the datapipe + # Whatever settings are configured for normalizing the + # output fields - even though we don't have ground + # truth here - are reused to undo that for the predictions + ###################################################### + output_vol, output_surf = datapipe.unscale_model_outputs( + output_vol, output_surf ) + + ###################################################### + # Peel off pressure, velocity, nut, shear, etc. + # Also compute drag, lift forces. + ###################################################### + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + + total_points_processed += batch_size + + current_loop_time = time.perf_counter() + + gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle) + gpu_memory_used = gpu_info.used / (1024**3) + + logging_string = f"Device {device}, batch {i_batch} processed {total_points_processed} points of {total_points}\n" + logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" + logging_string += f" Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n" + logging_string += f" iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n" + logging_string += f" Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second (includes IO)\n" + + logger.info(logging_string) + procesing_time_end = time.perf_counter() - print( - f"Processing {i_batch} time: {procesing_time_end - procesing_time_start:.3f} seconds" + logger.info( + f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds" + ) + + output_start_time = time.perf_counter() + ###################################################### + # Save the outputs to file: + ###################################################### + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + output_end_time = time.perf_counter() + logger.info( + f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds" ) - if i_batch > 20: - break - print(sample_batched.keys()) - return 0.0 + if i_batch > 5: + break @hydra.main(version_base="1.3", config_path="conf", config_name="config") def main(cfg: DictConfig) -> None: - ################################ + ###################################################### # initialize distributed manager - ################################ + ###################################################### DistributedManager.initialize() dist = DistributedManager() - ################################ + ###################################################### # Initialize NVML - ################################ + ###################################################### nvmlInit() gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index) - ################################ + ###################################################### # Initialize logger - ################################ + ###################################################### logger = PythonLogger("Train") logger = RankZeroLoggingWrapper(logger, dist) logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") - ################################ + ###################################################### # Get scaling factors - ################################ + # Likely, you want to reuse the scaling factors from training. + ###################################################### pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" try: @@ -239,8 +335,13 @@ def main(cfg: DictConfig) -> None: f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." ) - model_type = cfg.model.model_type + vol_factors = scaling_factors.mean["volume_fields"] + surf_factors = scaling_factors.mean["surface_fields"] + ###################################################### + # Configure the model + ###################################################### + model_type = cfg.model.model_type num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type) if model_type == "combined" or model_type == "surface": @@ -253,11 +354,41 @@ def main(cfg: DictConfig) -> None: else: volume_variable_names = [] - vol_factors = scaling_factors.mean["volume_fields"] - surf_factors = scaling_factors.mean["surface_fields"] - vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + ###################################################### + # Check that the sample size is equal. + # unequal samples could be done but they aren't, here.s + ###################################################### + if cfg.model.model_type == "combined": + if cfg.model.volume_points_sample != cfg.model.surface_points_sample: + raise ValueError( + "Volume and surface points sample must be equal for combined model" + ) + + # Get the number of sample points: + sample_points = ( + cfg.model.surface_points_sample + if cfg.model.model_type == "surface" + else cfg.model.volume_points_sample + ) - bounding_box = None + ###################################################### + # If the batch size doesn't evenly divide + # the num points, that's ok. But print a warning + # that the total points will get tweaked. + ###################################################### + if cfg.eval.num_points % sample_points != 0: + logger.warning( + f"Batch size {sample_points} doesn't evenly divide num points {cfg.eval.num_points}." + ) + logger.warning( + f"Total points will be rounded up to {((cfg.eval.num_points // sample_points) + 1) * sample_points}." + ) + + ###################################################### + # Configure the dataset + # We are applying preprocessing in a separate step + # for this - so the dataset and datapipe are separate + ###################################################### # Override the model type # For the inference pipeline, we adjust the tooling a little for the data. @@ -276,8 +407,13 @@ def main(cfg: DictConfig) -> None: # Volumetric data will be generated on the fly on the GPU. + ###################################################### + # Configure the datapipe # We _won't_ iterate over the datapipe, however, we can use the # datapipe processing tools on the sampled surface and + # volume points with the same preprocessing. + # It also is used to un-normalize the model outputs. + ###################################################### overrides = {} if hasattr(cfg.data, "gpu_preprocessing"): overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing @@ -311,6 +447,10 @@ def main(cfg: DictConfig) -> None: **overrides, ) + ###################################################### + # The sampler is used in multi-gpu inference to + # coordinate the batches used for each rank. + ###################################################### test_sampler = DistributedSampler( test_dataset, num_replicas=dist.world_size, @@ -318,6 +458,10 @@ def main(cfg: DictConfig) -> None: **cfg.train.sampler, ) + ###################################################### + # Configure the model + # and move it to the device. + ###################################################### model = DoMINO( input_features=3, output_features_vol=num_vol_vars, @@ -330,12 +474,6 @@ def main(cfg: DictConfig) -> None: # Print model summary (structure and parmeter count). logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n") - writer = SummaryWriter(os.path.join(cfg.output, "tensorboard")) - - model_save_path = os.path.join(cfg.output, "models") - param_save_path = os.path.join(cfg.output, "param") - best_model_path = os.path.join(model_save_path, "best_model") - if dist.world_size > 1: torch.distributed.barrier() @@ -354,17 +492,22 @@ def main(cfg: DictConfig) -> None: initial_integral_factor = initial_integral_factor_orig + prof = Profiler() + model.eval() epoch_start_time = time.perf_counter() - inference_epoch( - dataset=test_dataset, - sampler=test_sampler, - datapipe=test_datapipe, - model=model, - logger=logger, - gpu_handle=gpu_handle, - device=dist.device, - ) + with prof: + inference_epoch( + dataset=test_dataset, + sampler=test_sampler, + datapipe=test_datapipe, + model=model, + logger=logger, + gpu_handle=gpu_handle, + device=dist.device, + batch_size=sample_points, + total_points=cfg.eval.num_points, + ) epoch_end_time = time.perf_counter() logger.info( f"Device {dist.device}, Epoch took {epoch_end_time - epoch_start_time:.3f} seconds" diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 18d198f457..f5ed3693ac 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -51,6 +51,8 @@ pad, shuffle_array, standardize, + unnormalize, + unstandardize, ) from physicsnemo.utils.neighbors import knn from physicsnemo.utils.profiling import profile @@ -808,6 +810,38 @@ def process_data(self, data_dict, idx: int): return return_dict + def unscale_model_outputs( + self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None + ): + """ + Unscale the model outputs based on the configured scaling factors. + + The unscaling is included here to make it a consistent interface regardless + of the scaling factors and type used. + + """ + + if volume_fields is not None: + if self.config.scaling_type == "mean_std_scaling": + vol_mean = self.config.volume_factors[0] + vol_std = self.config.volume_factors[1] + volume_fields = unstandardize(volume_fields, vol_mean, vol_std) + elif self.config.scaling_type == "min_max_scaling": + vol_min = self.config.volume_factors[1] + vol_max = self.config.volume_factors[0] + volume_fields = unnormalize(volume_fields, vol_max, vol_min) + if surface_fields is not None: + if self.config.scaling_type == "mean_std_scaling": + surf_mean = self.config.surface_factors[0] + surf_std = self.config.surface_factors[1] + surface_fields = unstandardize(surface_fields, surf_mean, surf_std) + elif self.config.scaling_type == "min_max_scaling": + surf_min = self.config.surface_factors[1] + surf_max = self.config.surface_factors[0] + surface_fields = unnormalize(surface_fields, surf_max, surf_min) + + return volume_fields, surface_fields + def __getitem__(self, idx): """ Function for fetching and processing a single file's data. diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index 336a411497..95a7011976 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -256,7 +256,6 @@ def calculate_normal_positional_encoding( pos_x = torch.cat(calculate_pos_encoding(normals[:, 0] / dx, d=4), dim=-1) pos_y = torch.cat(calculate_pos_encoding(normals[:, 1] / dy, d=4), dim=-1) pos_z = torch.cat(calculate_pos_encoding(normals[:, 2] / dz, d=4), dim=-1) - print(pos_x.shape, pos_y.shape, pos_z.shape) pos_normals = torch.cat((pos_x, pos_y, pos_z), dim=0).reshape(-1, 12) return pos_normals From 0635e4d5d94967ac428b636cdca8ea3aaa8ca8db Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 10 Sep 2025 15:20:32 +0000 Subject: [PATCH 22/98] Minor tweaks to the inference script. --- .../domino/src/inference_on_stl2.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py index 9a44f996e9..1f4c2a6305 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py @@ -223,6 +223,17 @@ def inference_epoch( ###################################################### preprocessed_data = datapipe.process_data(inference_dict, i_batch) + ###################################################### + # Use the sign of the volume SDF to filter out points + # That are inside the STL mesh + ###################################################### + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + ###################################################### # Add a batch dimension to the data_dict # (normally this is added in __getitem__ of the datapipe) @@ -483,15 +494,11 @@ def main(cfg: DictConfig) -> None: device=dist.device, ) - initial_integral_factor_orig = cfg.model.integral_loss_scaling_factor - start_time = time.perf_counter() # This controls what indices to use for each epoch. test_sampler.set_epoch(0) - initial_integral_factor = initial_integral_factor_orig - prof = Profiler() model.eval() From db8cc984c409a65577b94eba0b14d87ac0d2fe4a Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 10 Sep 2025 15:26:56 +0000 Subject: [PATCH 23/98] Mark the docstring for updating. --- .../external_aerodynamics/domino/src/inference_on_stl2.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py index 1f4c2a6305..48e4f1ebc2 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py @@ -14,6 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +# TODO +# TODO +# TODO +# Update this +# TODO +# TODO +# TODO """ This code defines a distributed pipeline for training the DoMINO model on CFD datasets. It includes the computation of scaling factors, instantiating From 2a190eb0efc9520e159bd98ecf1a75b153d0969b Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:32:53 +0000 Subject: [PATCH 24/98] Spin off the stl sampling and inference loop into it's own function, for ease in downstream apps with only one stl. --- .../domino/src/inference_on_stl2.py | 308 ++++++++++-------- physicsnemo/datapipes/cae/domino_datapipe2.py | 5 +- 2 files changed, 166 insertions(+), 147 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py index 48e4f1ebc2..676f146b89 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py @@ -97,17 +97,170 @@ from utils import get_num_vars +def inference_on_single_stl( + stl_coordinates: torch.Tensor, + stl_faces: torch.Tensor, + model: DoMINO, + datapipe: DoMINODataPipe, + batch_size: int, + total_points: int, + gpu_handle: int | None = None, + logger: PythonLogger | None = None, +): + device = stl_coordinates.device + batch_start_time = time.perf_counter() + ###################################################### + # The IO only reads in "stl_faces" and "stl_coordinates". + # "stl_areas" and "stl_centers" would be computed by + # pyvista on CPU - instead, we do it on the GPU + # right here. + ###################################################### + + # Center is a mean of the 3 vertices + triangle_vertices = stl_coordinates[stl_faces.reshape((-1, 3))] + stl_centers = triangle_vertices.mean(dim=-1) + ###################################################### + # Area we compute from the cross product of two sides: + ###################################################### + d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0] + d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0] + inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1) + normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1) + inferred_mesh_normals = inferred_mesh_normals / normals_norm.unsqueeze(1) + stl_areas = 0.5 * normals_norm + + ###################################################### + # For computing the points, we take those stl objects, + # sample in chunks of `batch_size` until we've + # accumulated `total_points` predictions. + ###################################################### + + batch_output_dict = {} + N = 2 + total_points_processed = 0 + while total_points_processed < total_points: + inner_loop_start_time = time.perf_counter() + + ###################################################### + # This function will sample points on the STL surface + ###################################################### + sampled_points, sampled_faces, sampled_areas, sampled_normals = ( + sample_points_on_mesh( + stl_coordinates, + stl_faces, + batch_size, + mesh_normals=inferred_mesh_normals, + mesh_areas=stl_areas, + ) + ) + + ###################################################### + # Build up volume points too with uniform sampling + # TODO - this doesn't filter points that are + # internal to the mesh + ###################################################### + c_min = datapipe.config.bounding_box_dims[1] + c_max = datapipe.config.bounding_box_dims[0] + + sampled_volume_points = (c_max - c_min) * torch.rand( + batch_size, 3, device=device, dtype=torch.float32 + ) + c_min + + ###################################################### + # Create the dictionary as the preprocessing expects: + ###################################################### + inference_dict = { + "stl_coordinates": stl_coordinates, + "stl_faces": stl_faces, + "stl_centers": stl_centers, + "stl_areas": stl_areas, + "surface_mesh_centers": sampled_points, + "surface_normals": sampled_normals, + "surface_areas": sampled_areas, + "surface_faces": sampled_faces, + "volume_mesh_centers": sampled_volume_points, + } + + ###################################################### + # Pre-process the data with the datapipe: + ###################################################### + preprocessed_data = datapipe.process_data(inference_dict) + + ###################################################### + # Use the sign of the volume SDF to filter out points + # That are inside the STL mesh + ###################################################### + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + + ###################################################### + # Add a batch dimension to the data_dict + # (normally this is added in __getitem__ of the datapipe) + ###################################################### + preprocessed_data = {k: v.unsqueeze(0) for k, v in preprocessed_data.items()} + + ###################################################### + # Forward pass through the model: + ###################################################### + with torch.no_grad(): + output_vol, output_surf = model(preprocessed_data) + + ###################################################### + # unnormalize the outputs with the datapipe + # Whatever settings are configured for normalizing the + # output fields - even though we don't have ground + # truth here - are reused to undo that for the predictions + ###################################################### + output_vol, output_surf = datapipe.unscale_model_outputs( + output_vol, output_surf + ) + + ###################################################### + # Peel off pressure, velocity, nut, shear, etc. + # Also compute drag, lift forces. + ###################################################### + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + + total_points_processed += batch_size + + current_loop_time = time.perf_counter() + + logging_string = f"Device {device} processed {total_points_processed} points of {total_points}\n" + if gpu_handle is not None: + gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle) + gpu_memory_used = gpu_info.used / (1024**3) + logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" + + logging_string += f" Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n" + logging_string += f" iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n" + logging_string += f" Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second.\n" + + if logger is not None: + logger.info(logging_string) + else: + print(logging_string) + + def inference_epoch( dataset: DrivaerMLDataset, sampler: DistributedSampler, datapipe: DoMINODataPipe, model: DoMINO, gpu_handle: int, - device: torch.device, logger: PythonLogger, batch_size: int = 24_000, total_points: int = 1_024_000, -) -> float: +): ###################################################### # Inference can run in a distributed way by coordinating # the indices for each rank, which the sampler does @@ -151,146 +304,16 @@ def inference_epoch( ) procesing_time_start = time.perf_counter() - - ###################################################### - # The IO only reads in "stl_faces" and "stl_coordinates". - # "stl_areas" and "stl_centers" would be computed by - # pyvista on CPU - instead, we do it on the GPU - # right here. - ###################################################### - - # Center is a mean of the 3 vertices - triangle_vertices = sample_batched["stl_coordinates"][ - sample_batched["stl_faces"].reshape((-1, 3)) - ] - sample_batched["stl_centers"] = triangle_vertices.mean(dim=-1) - ###################################################### - # Area we compute from the cross product of two sides: - ###################################################### - d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0] - d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0] - inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1) - normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1) - sample_batched["stl_areas"] = 0.5 * normals_norm - - ###################################################### - # For computing the points, we take those stl objects, - # sample in chunks of `batch_size` until we've - # accumulated `total_points` predictions. - ###################################################### - - batch_output_dict = {} - N = 2 - total_points_processed = 0 - while total_points_processed < total_points: - inner_loop_start_time = time.perf_counter() - - ###################################################### - # This function will sample points on the STL surface - ###################################################### - sampled_points, sampled_faces, sampled_areas, sampled_normals = ( - sample_points_on_mesh( - sample_batched["stl_coordinates"], - sample_batched["stl_faces"], - batch_size, - mesh_normals=sample_batched["surface_normals"], - mesh_areas=sample_batched["stl_areas"], - ) - ) - - ###################################################### - # Build up volume points too with uniform sampling - # TODO - this doesn't filter points that are - # internal to the mesh - ###################################################### - c_min = datapipe.config.bounding_box_dims[1] - c_max = datapipe.config.bounding_box_dims[0] - - sampled_volume_points = (c_max - c_min) * torch.rand( - batch_size, 3, device=device, dtype=torch.float32 - ) + c_min - - ###################################################### - # Create the dictionary as the preprocessing expects: - ###################################################### - inference_dict = { - "stl_coordinates": sample_batched["stl_coordinates"], - "stl_faces": sample_batched["stl_faces"], - "stl_centers": sample_batched["stl_centers"], - "stl_areas": sample_batched["stl_areas"], - "surface_mesh_centers": sampled_points, - "surface_normals": sampled_normals, - "surface_areas": sampled_areas, - "surface_faces": sampled_faces, - "volume_mesh_centers": sampled_volume_points, - } - - ###################################################### - # Pre-process the data with the datapipe: - ###################################################### - preprocessed_data = datapipe.process_data(inference_dict, i_batch) - - ###################################################### - # Use the sign of the volume SDF to filter out points - # That are inside the STL mesh - ###################################################### - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO - - ###################################################### - # Add a batch dimension to the data_dict - # (normally this is added in __getitem__ of the datapipe) - ###################################################### - preprocessed_data = { - k: v.unsqueeze(0) for k, v in preprocessed_data.items() - } - - ###################################################### - # Forward pass through the model: - ###################################################### - with torch.no_grad(): - output_vol, output_surf = model(preprocessed_data) - - ###################################################### - # unnormalize the outputs with the datapipe - # Whatever settings are configured for normalizing the - # output fields - even though we don't have ground - # truth here - are reused to undo that for the predictions - ###################################################### - output_vol, output_surf = datapipe.unscale_model_outputs( - output_vol, output_surf - ) - - ###################################################### - # Peel off pressure, velocity, nut, shear, etc. - # Also compute drag, lift forces. - ###################################################### - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO - - total_points_processed += batch_size - - current_loop_time = time.perf_counter() - - gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle) - gpu_memory_used = gpu_info.used / (1024**3) - - logging_string = f"Device {device}, batch {i_batch} processed {total_points_processed} points of {total_points}\n" - logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" - logging_string += f" Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n" - logging_string += f" iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n" - logging_string += f" Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second (includes IO)\n" - - logger.info(logging_string) + inference_on_single_stl( + sample_batched["stl_coordinates"], + sample_batched["stl_faces"], + model, + datapipe, + batch_size, + total_points, + gpu_handle, + logger, + ) procesing_time_end = time.perf_counter() logger.info( @@ -518,7 +541,6 @@ def main(cfg: DictConfig) -> None: model=model, logger=logger, gpu_handle=gpu_handle, - device=dist.device, batch_size=sample_points, total_points=cfg.eval.num_points, ) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index f5ed3693ac..01ed70f6d8 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -727,14 +727,11 @@ def process_volume( return return_dict @torch.no_grad() - def process_data(self, data_dict, idx: int): + def process_data(self, data_dict): for key in self.keys_to_read_if_available.keys(): if key not in data_dict: data_dict[key] = self.keys_to_read_if_available[key] - if self.config.deterministic: - torch.manual_seed(idx) - # Start building the preprocessed return dict: return_dict = { "global_params_values": data_dict["global_params_values"], From 6393b5616dd8dc5d220d3f34798e56ba009fcd3d Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 10 Sep 2025 17:05:10 +0000 Subject: [PATCH 25/98] Ensure stl mesh itself gets processed too --- .../domino/src/inference_on_stl2.py | 199 +++++++++++++----- 1 file changed, 144 insertions(+), 55 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py index 676f146b89..91374b63e2 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py @@ -107,6 +107,25 @@ def inference_on_single_stl( gpu_handle: int | None = None, logger: PythonLogger | None = None, ): + """ + Perform model inference on a single STL mesh. + + This function will take the input mesh + faces and + then sample the surface and volume to produce the model outputs + at `total_points` locations in batches of `batch_size`. + + + + Args: + stl_coordinates: The coordinates of the STL mesh. + stl_faces: The faces of the STL mesh. + model: The model to use for inference. + datapipe: The datapipe to use for preprocessing. + batch_size: The batch size to use for inference. + total_points: The total number of points to process. + gpu_handle: The GPU handle to use for inference. + logger: The logger to use for logging. + """ device = stl_coordinates.device batch_start_time = time.perf_counter() ###################################################### @@ -124,9 +143,9 @@ def inference_on_single_stl( ###################################################### d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0] d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0] - inferred_mesh_normals = torch.linalg.cross(d1, d2, dim=1) - normals_norm = torch.linalg.norm(inferred_mesh_normals, dim=1) - inferred_mesh_normals = inferred_mesh_normals / normals_norm.unsqueeze(1) + stl_mesh_normals = torch.linalg.cross(d1, d2, dim=1) + normals_norm = torch.linalg.norm(stl_mesh_normals, dim=1) + stl_mesh_normals = stl_mesh_normals / normals_norm.unsqueeze(1) stl_areas = 0.5 * normals_norm ###################################################### @@ -138,33 +157,13 @@ def inference_on_single_stl( batch_output_dict = {} N = 2 total_points_processed = 0 - while total_points_processed < total_points: - inner_loop_start_time = time.perf_counter() - - ###################################################### - # This function will sample points on the STL surface - ###################################################### - sampled_points, sampled_faces, sampled_areas, sampled_normals = ( - sample_points_on_mesh( - stl_coordinates, - stl_faces, - batch_size, - mesh_normals=inferred_mesh_normals, - mesh_areas=stl_areas, - ) - ) - ###################################################### - # Build up volume points too with uniform sampling - # TODO - this doesn't filter points that are - # internal to the mesh - ###################################################### - c_min = datapipe.config.bounding_box_dims[1] - c_max = datapipe.config.bounding_box_dims[0] + # Use these lists to build up the output tensors: + surface_results = [] + volume_results = [] - sampled_volume_points = (c_max - c_min) * torch.rand( - batch_size, 3, device=device, dtype=torch.float32 - ) + c_min + while total_points_processed < total_points: + inner_loop_start_time = time.perf_counter() ###################################################### # Create the dictionary as the preprocessing expects: @@ -174,28 +173,60 @@ def inference_on_single_stl( "stl_faces": stl_faces, "stl_centers": stl_centers, "stl_areas": stl_areas, - "surface_mesh_centers": sampled_points, - "surface_normals": sampled_normals, - "surface_areas": sampled_areas, - "surface_faces": sampled_faces, - "volume_mesh_centers": sampled_volume_points, } + # If the surface data is part of the model, sample the surface: + + if datapipe.model_type == "surface" or datapipe.model_type == "combined": + ###################################################### + # This function will sample points on the STL surface + ###################################################### + sampled_points, sampled_faces, sampled_areas, sampled_normals = ( + sample_points_on_mesh( + stl_coordinates, + stl_faces, + batch_size, + mesh_normals=stl_mesh_normals, + mesh_areas=stl_areas, + ) + ) + + inference_dict["surface_mesh_centers"] = sampled_points + inference_dict["surface_normals"] = sampled_normals + inference_dict["surface_areas"] = sampled_areas + inference_dict["surface_faces"] = sampled_faces + + # If the volume data is part of the model, sample the volume: + if datapipe.model_type == "volume" or datapipe.model_type == "combined": + ###################################################### + # Build up volume points too with uniform sampling + # TODO - this doesn't filter points that are + # internal to the mesh + ###################################################### + c_min = datapipe.config.bounding_box_dims[1] + c_max = datapipe.config.bounding_box_dims[0] + + sampled_volume_points = (c_max - c_min) * torch.rand( + batch_size, 3, device=device, dtype=torch.float32 + ) + c_min + + inference_dict["volume_mesh_centers"] = (sampled_volume_points,) + ###################################################### # Pre-process the data with the datapipe: ###################################################### preprocessed_data = datapipe.process_data(inference_dict) - ###################################################### - # Use the sign of the volume SDF to filter out points - # That are inside the STL mesh - ###################################################### - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO + if datapipe.model_type == "volume" or datapipe.model_type == "combined": + ###################################################### + # Use the sign of the volume SDF to filter out points + # That are inside the STL mesh + ###################################################### + sdf_nodes = preprocessed_data["sdf_nodes"] + valid_volume_idx = sdf_nodes > 0 + preprocessed_data["volume_mesh_centers"] = preprocessed_data[ + "volume_mesh_centers" + ][valid_volume_idx] ###################################################### # Add a batch dimension to the data_dict @@ -219,17 +250,8 @@ def inference_on_single_stl( output_vol, output_surf ) - ###################################################### - # Peel off pressure, velocity, nut, shear, etc. - # Also compute drag, lift forces. - ###################################################### - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO + surface_results.append(output_surf) + volume_results.append(output_vol) total_points_processed += batch_size @@ -250,6 +272,61 @@ def inference_on_single_stl( else: print(logging_string) + ###################################################### + # Here at the end, get the values for the stl centers + # by updating the previous inference dict + # Only do this if the surface is part of the computation + # Comments are shorter here - it's a condensed version + # of the above logic. + ###################################################### + if datapipe.model_type == "surface" or datapipe.model_type == "combined": + stl_inference_dict = { + "stl_coordinates": stl_coordinates, + "stl_faces": stl_faces, + "stl_centers": stl_centers, + "stl_areas": stl_areas, + } + inference_dict["surface_mesh_centers"] = stl_centers + inference_dict["surface_normals"] = stl_mesh_normals + inference_dict["surface_areas"] = stl_areas + inference_dict["surface_faces"] = stl_faces + + # Just reuse the previous volume samples here if needed: + if datapipe.model_type == "combined": + inference_dict["volume_mesh_centers"] = sampled_volume_points + + # Preprocess: + preprocessed_data = datapipe.process_data(inference_dict) + + # Pull out the invalid volume points again, if needed: + if datapipe.model_type == "combined": + sdf_nodes = preprocessed_data["sdf_nodes"] + valid_volume_idx = sdf_nodes > 0 + preprocessed_data["volume_mesh_centers"] = preprocessed_data[ + "volume_mesh_centers" + ][valid_volume_idx] + + # Run the model forward: + with torch.no_grad(): + preprocessed_data = { + k: v.unsqueeze(0) for k, v in preprocessed_data.items() + } + _, output_surf = model(preprocessed_data) + + # Unnormalize the outputs: + _, stl_center_results = datapipe.unscale_model_outputs(None, output_surf) + + else: + stl_center_results = None + + # Stack up the results into one big tensor for surface and volume: + if all([s is not None for s in surface_results]): + surface_results = torch.cat(surface_results, dim=1) + if all([v is not None for v in volume_results]): + volume_results = torch.cat(volume_results, dim=0) + + return stl_center_results, surface_results, volume_results + def inference_epoch( dataset: DrivaerMLDataset, @@ -304,7 +381,7 @@ def inference_epoch( ) procesing_time_start = time.perf_counter() - inference_on_single_stl( + stl_center_resulst, surface_results, volume_results = inference_on_single_stl( sample_batched["stl_coordinates"], sample_batched["stl_faces"], model, @@ -315,6 +392,18 @@ def inference_epoch( logger, ) + ###################################################### + # Peel off pressure, velocity, nut, shear, etc. + # Also compute drag, lift forces. + ###################################################### + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + procesing_time_end = time.perf_counter() logger.info( f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds" From f7e9ea24739e7e293419bd081a28228c92dca2c3 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 10 Sep 2025 17:10:48 +0000 Subject: [PATCH 26/98] Update docstring for inference file. --- .../domino/src/inference_on_stl2.py | 38 +++++++------------ 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py index 91374b63e2..9ff5b62d0d 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py @@ -14,24 +14,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO -# TODO -# TODO -# Update this -# TODO -# TODO -# TODO """ -This code defines a distributed pipeline for training the DoMINO model on -CFD datasets. It includes the computation of scaling factors, instantiating -the DoMINO model and datapipe, automatically loading the most recent checkpoint, -training the model in parallel using DistributedDataParallel across multiple -GPUs, calculating the loss and updating model parameters using mixed precision. -This is a common recipe that enables training of combined models for surface and -volume as well either of them separately. Validation is also conducted every epoch, -where predictions are compared against ground truth values. The code logs training -and validation metrics to TensorBoard. The train tab in config.yaml can be used to -specify batch size, number of epochs and other training parameters. +This code shows how to use a trained DoMINO model, with it's corresponding +preprocessing pipeline, to infer values on and around an STL mesh file. + +This script uses the meshes from the DrivaerML dataset, however, the logic +is largely the same. As an overview: +- Load the model +- Set up the preprocessor +- Loop over meshes +- In each mesh, sample random points on the surface, volume, or both +- Preprocess the points and run them through the model +- Process the STL mesh centers, too +- Collect the results and return +- Save the results to file. """ import time @@ -346,11 +342,6 @@ def inference_epoch( # Convert the indices right to a list: epoch_indices = list(sampler) - # n_steps = total_points // batch_size - # if n_steps * batch_size < total_points: - # n_steps += 1 - # last_batch_size = total_points - n_steps * batch_size - ###################################################### # Assuming here there are more than two target meshes # This will get the IO pipe running in the background @@ -424,9 +415,6 @@ def inference_epoch( f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds" ) - if i_batch > 5: - break - @hydra.main(version_base="1.3", config_path="conf", config_name="config") def main(cfg: DictConfig) -> None: From d784a801a110f26ff1c1ae679c44451d94ea7845 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 15 Sep 2025 19:37:49 +0000 Subject: [PATCH 27/98] Enable shard tensor for zarr datasets, both with or without tensorstore --- physicsnemo/datapipes/cae/domino_datapipe2.py | 244 ++++------- .../datapipes/cae/drivaer_ml_dataset.py | 403 +++++++++++++++--- 2 files changed, 432 insertions(+), 215 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 01ed70f6d8..472c09be24 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -29,7 +29,7 @@ from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from pathlib import Path -from typing import Literal, Optional, Protocol, Sequence, Union +from typing import Iterable, Literal, Optional, Protocol, Sequence, Union import numpy as np import torch @@ -299,41 +299,7 @@ def __init__( dtype=torch.float32, ) - # Always read these keys: - self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"] - - self.keys_to_read_if_available = { - "global_params_values": torch.tensor( - [[30.0], [1.226]], device=self.preproc_device - ), - "global_params_reference": torch.tensor( - [[30.0], [1.226]], device=self.preproc_device - ), - } - - self.volume_keys = ["volume_mesh_centers", "volume_fields"] - self.surface_keys = [ - "surface_mesh_centers", - "surface_normals", - "surface_areas", - "surface_fields", - ] - - if self.model_type == "volume" or self.model_type == "combined": - self.keys_to_read.extend(self.volume_keys) - if self.model_type == "surface" or self.model_type == "combined": - self.keys_to_read.extend(self.surface_keys) - - if self.config.data_path is not None: - self.dataset = DrivaerMLDataset( - data_dir=self.config.data_path, - keys_to_read=self.keys_to_read, - output_device=self.preproc_device, - pin_memory=pin_memory, - consumer_stream=torch.cuda.default_stream(), - ) - else: - self.dataset = None + self.dataset = None # This is thread storage for data preprocessing: self._preprocess_queue = {} @@ -341,21 +307,6 @@ def __init__( self.preprocess_depth = 2 self.preprocess_executor = ThreadPoolExecutor(max_workers=1) - def set_indices(self, indices: list[int]): - """ - Set the indices for the dataset for this epoch. - """ - - # TODO - this needs to block while anything is in the preprocess queue. - - self.indices = indices - - def __len__(self): - if self.dataset is not None: - return len(self.dataset) - else: - return 0 - def compute_stl_scaling( self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None ): @@ -728,10 +679,6 @@ def process_volume( @torch.no_grad() def process_data(self, data_dict): - for key in self.keys_to_read_if_available.keys(): - if key not in data_dict: - data_dict[key] = self.keys_to_read_if_available[key] - # Start building the preprocessed return dict: return_dict = { "global_params_values": data_dict["global_params_values"], @@ -839,6 +786,15 @@ def unscale_model_outputs( return volume_fields, surface_fields + def set_dataset(self, dataset: Iterable) -> None: + self.dataset = dataset + + def __len__(self): + if self.dataset is not None: + return len(self.dataset) + else: + return 0 + def __getitem__(self, idx): """ Function for fetching and processing a single file's data. @@ -850,17 +806,27 @@ def __getitem__(self, idx): if self.dataset is None: raise ValueError("Dataset is not present") - index = self.idx_to_index(idx) + # Get the data from the dataset. + # Under the hood, this may be fetching preloaded data. + data_dict = self.dataset[idx] + + return self.__call__(data_dict) + + def __call__(self, data_dict: dict) -> dict: + """ + Process the incoming data dictionary. + - Processes the data + - moves it to GPU + - adds a batch dimension - # Get the preprocessed data: - data_dict = self.get_preprocessed(idx) - if data_dict is None: - # If no preprocessing was done for this index, process it now + Args: + data_dict: Dictionary containing the data to process as torch.Tensors. - # Get the data from the dataset. - # Under the hood, this may be fetching preloaded data. - data_dict = self.dataset[index] - data_dict = self.process_data(data_dict, idx) + Returns: + Dictionary containing the processed data as torch.Tensors. + + """ + data_dict = self.process_data(data_dict) # If the data is not on the target device, put it there: for key, value in data_dict.items(): @@ -872,101 +838,9 @@ def __getitem__(self, idx): return data_dict - def idx_to_index(self, idx): - if hasattr(self, "indices"): - return self.indices[idx] - - return idx - - def preprocess(self, idx: int) -> None: - """ - Start preprocessing for the given index (1 step ahead). - This processes preloaded data or loads it if not available. - """ - if self.dataset is None: - raise ValueError("Dataset is not present") - - if idx in self._preprocess_queue: - # Skip items that are already being preprocessed - return - - def _preprocess_worker(): - index = self.idx_to_index(idx) - # Try to get preloaded data first - data_dict = self.dataset[index] - # Process the data - return self.process_data(data_dict, idx) - - # Submit preprocessing task to thread pool - self._preprocess_queue[idx] = self.preprocess_executor.submit( - _preprocess_worker - ) - - def get_preprocessed(self, idx: int) -> dict | None: - """ - Retrieve preprocessed data (blocking if not ready). - Returns None if no preprocessing is in progress for this index. - """ - if idx not in self._preprocess_queue: - return None - - result = self._preprocess_queue[idx].result() # Block until ready - self._preprocess_queue.pop(idx) # Clear after getting result - - return result - - def __next__(self): - # To iterate through the data efficiently, he have to implement the - # following, assuming a steady state - - # - start the dataset loading at idx + 2 - # - start the preprocessing pipe at idx + 1 - # - the preprocessing pipe has to implicitly wait for idx +1 in the dataset - # - wait for the preprocessing pipe at idx to finish - # return the data. - - if self.dataset is None: - raise ValueError("Dataset is not present") - - N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) - - if self.i >= N: - self.i = 0 - raise StopIteration - - current_idx = self.i - - # Start loading two ahead: - - if N > current_idx + 2: - self.dataset.preload(self.idx_to_index(current_idx + 1)) - self.dataset.preload(self.idx_to_index(current_idx + 2)) - - # If no preprocessing was done for this index, process it now - data = self.__getitem__(current_idx) - - self.i += 1 - return data - def __iter__(self): - # When starting the iterator method, start loading the data - # at idx = 0, idx = 1 - # Start preprocessing at idx = 0, when the load completes - - if self.dataset is None: - raise ValueError("Dataset is not present") - - self.i = 0 - - N = len(self.indices) if hasattr(self, "indices") else len(self.dataset) - - # Trigger the dataset to start loading index 0: - if N > 1: - self.dataset.preload(self.idx_to_index(self.i)) - if N > 2: - self.dataset.preload(self.idx_to_index(self.i + 1)) - - return self + for i, batch in enumerate(self.dataset): + yield self.__call__(batch) def compute_scaling_factors( @@ -1151,23 +1025,28 @@ def __getitem__(self, idx): def create_domino_dataset( cfg: DictConfig, phase: Literal["train", "val", "test"], - volume_variable_names: list[str], - surface_variable_names: list[str], + keys_to_read: list[str], + keys_to_read_if_available: dict[str, torch.Tensor], vol_factors: list[float], surf_factors: list[float], normalize_coordinates: bool = True, sample_in_bbox: bool = True, sampling: bool = True, + device_mesh: torch.distributed.DeviceMesh | None = None, + placements: dict[str, torch.distributed.tensor.Placement] | None = None, ): if phase == "train": input_path = cfg.data.input_dir model_type = cfg.model.model_type + dataloader_cfg = cfg.train.dataloader elif phase == "val": input_path = cfg.data.input_dir_val model_type = cfg.model.model_type + dataloader_cfg = cfg.val.dataloader elif phase == "test": input_path = cfg.eval.test_path model_type = "inference" + dataloader_cfg = None else: raise ValueError(f"Invalid phase {phase}") @@ -1183,6 +1062,15 @@ def create_domino_dataset( surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, ) else: + # The dataset path works in two pieces: + # There is a core "dataset" which is loading data and moving to GPU + # And there is the preprocess step, here. + + # Optionally, and for backwards compatibility, the preprocess + # object can accept a dataset which will enable it as an iterator. + # The iteration function will loop over the dataset, preprocess the + # output, and return it. + overrides = {} if hasattr(cfg.data, "gpu_preprocessing"): overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing @@ -1190,12 +1078,38 @@ def create_domino_dataset( if hasattr(cfg.data, "gpu_output"): overrides["gpu_output"] = cfg.data.gpu_output - return DoMINODataPipe( + dm = DistributedManager() + + if cfg.data.gpu_preprocessing: + device = dm.device + consumer_stream = torch.cuda.default_stream() + else: + device = torch.device("cpu") + consumer_stream = None + + if dataloader_cfg is not None: + preload_depth = dataloader_cfg.preload_depth + pin_memory = dataloader_cfg.pin_memory + else: + preload_depth = 2 + pin_memory = False + + dataset = DrivaerMLDataset( + data_dir=input_path, + keys_to_read=keys_to_read, + keys_to_read_if_available=keys_to_read_if_available, + output_device=device, + preload_depth=preload_depth, + pin_memory=pin_memory, + device_mesh=device_mesh, + placements=placements, + consumer_stream=consumer_stream, + ) + + datapipe = DoMINODataPipe( input_path, phase=phase, grid_resolution=cfg.model.interp_res, - volume_variables=volume_variable_names, - surface_variables=surface_variable_names, normalize_coordinates=normalize_coordinates, sampling=sampling, sample_in_bbox=sample_in_bbox, @@ -1216,6 +1130,10 @@ def create_domino_dataset( **overrides, ) + datapipe.set_dataset(dataset) + + return datapipe + if __name__ == "__main__": fm_data = DoMINODataPipe( diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index 17f486fb6c..0acec3b7a5 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -21,7 +21,9 @@ import numpy as np import torch +import torch.distributed as dist import zarr +from torch.distributed.tensor import Replicate, Shard try: import tensorstore as ts @@ -38,9 +40,7 @@ PV_AVAILABLE = False from physicsnemo.distributed import ShardTensor, ShardTensorSpec - -# from physicsnemo.distributed.utils import compute_split_shapes - +from physicsnemo.distributed.utils import compute_split_shapes # Abstractions: # - want to read npy/npz/.zarr/.stl/.vtp files @@ -67,11 +67,16 @@ class BackendReader(ABC): Abstract base class for backend readers. """ - def __init__(self, keys_to_read: list[str] | None) -> None: + def __init__( + self, + keys_to_read: list[str] | None, + keys_to_read_if_available: dict[str, torch.Tensor] | None, + ) -> None: """ Initialize the backend reader. """ self.keys_to_read = keys_to_read + self.keys_to_read_if_available = keys_to_read_if_available @abstractmethod def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: @@ -82,21 +87,76 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: @abstractmethod def read_file_sharded( - self, filename: pathlib.Path, parallel_rank: int, parallel_size: int - ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh + ) -> tuple[dict[str, torch.Tensor], dict[str, dict]]: """ - Read a file and return a dictionary of tensors. + Read a file and return a dictionary of tensors ready to convert to ShardTensors. + + NOTE: this function does not actually convert torch tensors to ShardTensors. + It's possible that the conversion, in some cases, can be a collective function. + Due to the async nature of the loader, we don't rely on any ordering of + collectives and defer them to the last possible minute. + + Additionally, these functions return CPU tensors and we don't actually + define shard tensors on cpu. + + So, the dataset itself will convert a local tensor + shard info to shard tensor + after the cpu-> gpu movement. """ pass + def fill_optional_keys( + self, data: dict[str, torch.Tensor] + ) -> dict[str, torch.Tensor]: + """ + Fill missing keys with the keys from the keys_to_read_if_available dictionary. + """ + for key in self.keys_to_read_if_available: + if key not in data.keys(): + data[key] = self.keys_to_read_if_available[key] + return data + + def _get_slice_boundaries( + self, array_shape: tuple[int], this_rank: int, n_splits: int, split_dim: int = 0 + ) -> tuple[int, int, tuple | None]: + """ + For an array, determine the slice boundaries for parallel reading. + + Args: + array_shape: The total shape of the target array. + this_rank: The rank of the distributed process. + n_splits: The size of the distributed process. + split_dim: The dimension to split, default is 0. + + Returns: + The slice boundaries for parallel reading. + """ + # Determine what slice this rank should read + + sections = compute_split_shapes(array_shape[split_dim], n_splits) + + global_chunk_start = sum(sections[:this_rank]) + global_chunk_stop = global_chunk_start + sections[this_rank] + + chunk_sizes = tuple( + array_shape[:split_dim] + (section,) + array_shape[split_dim + 1 :] + for section in sections + ) + + return global_chunk_start, global_chunk_stop, chunk_sizes + class NpyFileReader(BackendReader): """ Reader for numpy files. """ - def __init__(self, keys_to_read: list[str] | None) -> None: - super().__init__(keys_to_read) + def __init__( + self, + keys_to_read: list[str] | None, + keys_to_read_if_available: dict[str, torch.Tensor] | None, + ) -> None: + super().__init__(keys_to_read, keys_to_read_if_available) def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: """ @@ -111,11 +171,11 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: data = {key: torch.from_numpy(data[key]) for key in self.keys_to_read} - return data + return self.fill_optional_keys(data) def read_file_sharded( - self, filename: pathlib.Path, parallel_rank: int, parallel_size: int - ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh + ) -> dict[str, ShardTensor]: pass @@ -124,8 +184,12 @@ class NpzFileReader(BackendReader): Reader for npz files. """ - def __init__(self, keys_to_read: list[str] | None) -> None: - super().__init__(keys_to_read) + def __init__( + self, + keys_to_read: list[str] | None, + keys_to_read_if_available: dict[str, torch.Tensor] | None, + ) -> None: + super().__init__(keys_to_read, keys_to_read_if_available) def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: """ @@ -140,11 +204,11 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: data = {key: torch.from_numpy(in_data[key][:]) for key in self.keys_to_read} - return data + return self.fill_optional_keys(data) def read_file_sharded( - self, filename: pathlib.Path, parallel_rank: int, parallel_size: int - ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh + ) -> dict[str, ShardTensor]: pass @@ -153,8 +217,12 @@ class ZarrFileReader(BackendReader): Reader for zarr files. """ - def __init__(self, keys_to_read: list[str] | None) -> None: - super().__init__(keys_to_read) + def __init__( + self, + keys_to_read: list[str] | None, + keys_to_read_if_available: dict[str, torch.Tensor] | None, + ) -> None: + super().__init__(keys_to_read, keys_to_read_if_available) def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: """ @@ -170,15 +238,78 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: # This is a slower basic way to do this, to be improved: data = {key: torch.from_numpy(group[key][:]) for key in self.keys_to_read} - return data + return self.fill_optional_keys(data) def read_file_sharded( - self, filename: pathlib.Path, parallel_rank: int, parallel_size: int - ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh + ) -> tuple[dict[str, torch.Tensor], dict[str, dict]]: """ Read a file and return a dictionary of tensors. """ - raise NotImplementedError("Not implemented yet.") + + # We need the coordinates of this GPU: + this_rank = device_mesh.get_local_rank() + domain_size = dist.get_world_size(group=device_mesh.get_group()) + + group = zarr.open_group(filename, mode="r") + + missing_keys = set(self.keys_to_read) - set(group.keys()) + + if len(missing_keys) > 0: + raise ValueError(f"Keys {missing_keys} not found in file {filename}") + + data = {} + specs = {} + for key in self.keys_to_read: + # Open the array in zarr without reading it and get info: + zarr_array = group[key] + array_shape = zarr_array.shape + if array_shape == (): + # Read scalars from every rank and use replicate sharding + raw_data = torch.from_numpy(zarr_array[:]) + placement = [ + Replicate(), + ] + chunk_sizes = None + else: + target_dim = 0 + if array_shape[target_dim] < domain_size: + # If the array is smaller than the number of ranks, + # again read and use replicate sharding: + raw_data = torch.from_numpy(zarr_array[:]) + placement = [ + Replicate(), + ] + chunk_sizes = None + else: + # Read partially from the data and use Shard(target_dim) sharding + chunk_start, chunk_stop, chunk_sizes = self._get_slice_boundaries( + zarr_array.shape, this_rank, domain_size + ) + raw_data = torch.from_numpy(zarr_array[chunk_start:chunk_stop]) + placement = [ + Shard(target_dim), + ] + + # Turn chunk sizes into a dict over mesh dim 0: + chunk_sizes = {0: chunk_sizes} + + # + data[key] = raw_data + specs[key] = (placement, chunk_sizes) + + # Patch in the optional keys: + data = self.fill_optional_keys(data) + for key in data.keys(): + if key not in specs: + specs[key] = ( + [ + Replicate(), + ], + {}, + ) + + return data, specs if PV_AVAILABLE: @@ -188,8 +319,12 @@ class VTKFileReader(BackendReader): Reader for vtk files. """ - def __init__(self, keys_to_read: list[str] | None) -> None: - super().__init__(keys_to_read) + def __init__( + self, + keys_to_read: list[str] | None, + keys_to_read_if_available: dict[str, torch.Tensor] | None, + ) -> None: + super().__init__(keys_to_read, keys_to_read_if_available) self.stl_file_keys = [ "stl_coordinates", @@ -254,7 +389,7 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: if any(key in self.vtu_file_keys for key in self.keys_to_read): raise NotImplementedError("VTU files are not supported yet.") - return return_data + return self.fill_optional_keys(return_data) def read_file_sharded( self, filename: pathlib.Path, parallel_rank: int, parallel_size: int @@ -309,8 +444,12 @@ class TensorStoreZarrReader(BackendReader): Reader for tensorstore zarr files. """ - def __init__(self, keys_to_read: list[str] | None) -> None: - super().__init__(keys_to_read) + def __init__( + self, + keys_to_read: list[str] | None, + keys_to_read_if_available: dict[str, torch.Tensor] | None, + ) -> None: + super().__init__(keys_to_read, keys_to_read_if_available) self.spec_template = { "driver": "zarr2", @@ -331,6 +470,8 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: """ Read a file and return a dictionary of tensors. """ + + # Trigger an async open of each data item: read_futures = {} for key in self.keys_to_read: spec = self.spec_template.copy() @@ -340,24 +481,111 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: spec, create=False, open=True, context=self.context ) - results = { - key: np.array(read_futures[key].result()) for key in self.keys_to_read + # Wait for all the opens to conclude: + read_futures = { + key: read_futures[key].result() for key in read_futures.keys() + } + + # Trigger an async read of each data item: + # (Each item will be a numpy ndarray after this:) + read_futures = { + key: read_futures[key].read() for key in read_futures.keys() } + # Convert them to torch tensors: + # (make sure to block for the result) data = { - key: torch.as_tensor(results[key], dtype=torch.float32) + key: torch.as_tensor(read_futures[key].result(), dtype=torch.float32) for key in self.keys_to_read } - return data + return self.fill_optional_keys(data) def read_file_sharded( - self, filename: pathlib.Path, parallel_rank: int, parallel_size: int - ) -> tuple[dict[str, torch.Tensor], dict[str, ShardTensorSpec]]: + self, filename: pathlib.Path, device_mesh: torch.distributed.DeviceMesh + ) -> tuple[dict[str, torch.Tensor], dict[str, dict]]: """ Read a file and return a dictionary of tensors. """ - pass + + # We need the coordinates of this GPU: + this_rank = device_mesh.get_local_rank() + domain_size = dist.get_world_size(group=device_mesh.get_group()) + + # This pulls a list of store objects in tensorstore: + stores = {} + for key in self.keys_to_read: + spec = self.spec_template.copy() + spec["kvstore"]["path"] = str(filename) + "/" + str(key) + + stores[key] = ts.open( + spec, create=False, open=True, context=self.context + ) + + stores = {key: stores[key].result() for key in stores.keys()} + + data = {} + specs = {} + for key in self.keys_to_read: + # Open the array in zarr without reading it and get info: + store = stores[key] + array_shape = store.shape + if array_shape == (): + # Read scalars from every rank and use replicate sharding + _slice = np.s_[:] + # raw_data = torch.from_numpy(store[:]) + placement = [ + Replicate(), + ] + chunk_sizes = None + else: + target_dim = 0 + if array_shape[target_dim] < domain_size: + # If the array is smaller than the number of ranks, + # again read and use replicate sharding: + _slice = np.s_[:] + # raw_data = torch.from_numpy(store[:]) + placement = [ + Replicate(), + ] + chunk_sizes = None + else: + # Read partially from the data and use Shard(target_dim) sharding + chunk_start, chunk_stop, chunk_sizes = ( + self._get_slice_boundaries( + store.shape, this_rank, domain_size + ) + ) + _slice = np.s_[chunk_start:chunk_stop] + # raw_data = torch.from_numpy(zarr_array[chunk_start:chunk_stop]) + placement = [ + Shard(target_dim), + ] + + # Turn chunk sizes into a dict over mesh dim 0: + chunk_sizes = {0: chunk_sizes} + + # Trigger the reads as async: + data[key] = store[_slice].read() + specs[key] = (placement, chunk_sizes) + + # Finally, await the full data read: + for key in self.keys_to_read: + data[key] = torch.as_tensor(data[key].result()) + + # Patch in the optional keys: + data = self.fill_optional_keys(data) + for key in data.keys(): + if key not in specs: + specs[key] = ( + [ + Replicate(), + ], + {}, + ) + + return data, specs + else: class TensorStoreZarrReader(BackendReader): @@ -365,7 +593,11 @@ class TensorStoreZarrReader(BackendReader): Null reader for tensorstore zarr files. """ - def __init__(self, keys_to_read: list[str] | None) -> None: + def __init__( + self, + keys_to_read: list[str] | None, + keys_to_read_if_available: dict[str, torch.Tensor] | None, + ) -> None: # Raise an exception on construction if we get here: raise NotImplementedError( "TensorStoreZarrReader is not available without tensorstore. `pip install tensorstore`." @@ -417,6 +649,7 @@ def __init__( self, data_dir: str | pathlib.Path, keys_to_read: list[str] | None, + keys_to_read_if_available: dict[str, torch.Tensor] | None, output_device: torch.device, preload_depth: int = 2, pin_memory: bool = False, @@ -436,6 +669,8 @@ def __init__( raise NotADirectoryError(f"Data directory {data_dir} is not a directory") self._keys_to_read = keys_to_read + self._keys_to_read_if_available = keys_to_read_if_available + self.file_reader, self._filenames = self._infer_file_type_and_filenames( data_dir ) @@ -449,25 +684,46 @@ def __init__( self.output_device = output_device if output_device.type == "cuda": - # self._data_loader_stream = torch.cuda.default_stream() self._data_loader_stream = torch.cuda.Stream() else: self._data_loader_stream = None self.device_mesh = device_mesh self.placements = placements + # This tracks global tensor info + # so we can convert to ShardTensor at the right time. + self.shard_spec = {} + + if self.device_mesh is not None: + if self.device_mesh.ndim != 1: + raise ValueError("Device mesh must be one dimensional") # This is thread storage for data preloading: self._preload_queue = {} self._transfer_events = {} self.preload_depth = preload_depth - self.preload_executor = ThreadPoolExecutor(max_workers=preload_depth) + self.preload_executor = ThreadPoolExecutor(max_workers=max(1, preload_depth)) if consumer_stream is None and self.output_device.type == "cuda": consumer_stream = torch.cuda.current_stream() self.consumer_stream = consumer_stream + def set_indices(self, indices: list[int]): + """ + Set the indices for the dataset for this epoch. + """ + + # TODO - this needs to block while anything is in the preprocess queue. + + self.indices = indices + + def idx_to_index(self, idx): + if hasattr(self, "indices"): + return self.indices[idx] + + return idx + def _infer_file_type_and_filenames( self, data_dir: pathlib.Path ) -> tuple[str, list[str]]: @@ -485,19 +741,29 @@ def _infer_file_type_and_filenames( # But others benefit from having a state, so we use classes: if all(file.suffix == ".npy" for file in files): - file_reader = NpyFileReader(self._keys_to_read) + file_reader = NpyFileReader( + self._keys_to_read, self._keys_to_read_if_available + ) return file_reader, files elif all(file.suffix == ".npz" for file in files): - file_reader = NpzFileReader(self._keys_to_read) + file_reader = NpzFileReader( + self._keys_to_read, self._keys_to_read_if_available + ) return file_reader, files elif all(file.suffix == ".zarr" and file.is_dir() for file in files): if TENSORSTORE_AVAILABLE: - file_reader = TensorStoreZarrReader(self._keys_to_read) + file_reader = TensorStoreZarrReader( + self._keys_to_read, self._keys_to_read_if_available + ) else: - file_reader = ZarrFileReader(self._keys_to_read) + file_reader = ZarrFileReader( + self._keys_to_read, self._keys_to_read_if_available + ) return file_reader, files elif all(is_vtk_directory(file) for file in files): - file_reader = VTKFileReader(self._keys_to_read) + file_reader = VTKFileReader( + self._keys_to_read, self._keys_to_read_if_available + ) return file_reader, files # Each "file" here is a directory of .vtp, stl, etc. else: @@ -541,7 +807,9 @@ def _move_to_gpu( return result def _convert_to_shard_tensors( - self, tensors: dict[str, torch.Tensor] + self, + tensors: dict[str, torch.Tensor], + filename: str, ) -> dict[str, ShardTensor]: """Convert tensors to ShardTensor objects for distributed training. @@ -555,7 +823,19 @@ def _convert_to_shard_tensors( if self.device_mesh is None: return tensors - raise NotImplementedError("Converting to ShardTensor here not implemented yet.") + spec_dict = self.shard_spec.pop(filename) + result = {} + for key in tensors.keys(): + placement, chunk_sizes = spec_dict[key] + + result[key] = ShardTensor.from_local( + local_tensor=tensors[key], + device_mesh=self.device_mesh, + placements=placement, + sharding_shapes=chunk_sizes, + ) + + return result # result = {} @@ -628,18 +908,30 @@ def get_preloaded(self, idx: int) -> dict[str, torch.Tensor] | None: return result def __iter__(self): + # When starting the iterator method, start loading the data + # at idx = 0, idx = 1 + # Start preprocessing at idx = 0, when the load completes + self.i = 0 + + N = len(self.indices) if hasattr(self, "indices") else len(self) + for i in range(self.preload_depth): + # Trigger the dataset to start loading index 0: + if N > i + 1: + self.preload(self.idx_to_index(self.i + i)) + return self def __next__(self): - if self.i >= len(self._filenames): + N = len(self.indices) if hasattr(self, "indices") else len(self._filenames) + + if self.i >= N: self.i = 0 raise StopIteration - if self.preload_depth > 0 and self.i + 1 < len(self._filenames): - self.preload(self.i + 1) - if self.preload_depth > 1 and self.i + 2 < len(self._filenames): - self.preload(self.i + 2) + for i in range(self.preload_depth): + if N > i + 1: + self.preload(self.i + i) data = self.__getitem__(self.i) @@ -654,7 +946,14 @@ def _read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: """ Read a file and return a dictionary of tensors. """ - return self.file_reader.read_file(filename) + if self.device_mesh is not None: + tensor_dict, spec_dict = self.file_reader.read_file_sharded( + filename, self.device_mesh + ) + self.shard_spec[filename] = spec_dict + return tensor_dict + else: + return self.file_reader.read_file(filename) def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]: """ @@ -693,7 +992,7 @@ def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]: # Convert to ShardTensors if using domain parallelism if self.device_mesh is not None: - data = self._convert_to_shard_tensors(data) + data = self._convert_to_shard_tensors(data, self._filenames[idx]) return data From 7c27a8eb8c99a1f6731ad0f4f401feb0d73d7a6c Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 15 Sep 2025 20:26:31 +0000 Subject: [PATCH 28/98] Updating and further documenting scripts --- .../domino/src/benchmark_dataloader.py | 139 +++++-------- .../external_aerodynamics/domino/src/train.py | 191 +++++++++++++----- .../domino/src/train_sharded.py | 2 +- .../external_aerodynamics/domino/src/utils.py | 122 +++++++++++ 4 files changed, 307 insertions(+), 147 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index b1f5184fc6..80c4b9e3cf 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -35,11 +35,21 @@ from typing import Literal, Any -import apex -import numpy as np + import hydra from hydra.utils import to_absolute_path from omegaconf import DictConfig, OmegaConf + +DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", "False") +if not DISABLE_RMM: + import rmm + from rmm.allocators.torch import rmm_torch_allocator + import torch + + rmm.reinitialize(pool_allocator=True) + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + import torch.distributed as dist from torch.cuda.amp import GradScaler, autocast from torch.nn.parallel import DistributedDataParallel @@ -66,6 +76,8 @@ from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo import time +from utils import ScalingFactors, get_keys_to_read, coordinate_distributed_environment + # Initialize NVML nvmlInit() @@ -73,10 +85,8 @@ from physicsnemo.utils.profiling import profile, Profiler -@profile -def train_epoch( +def benchmark_io_epoch( dataloader, - sampler, logger, gpu_handle, epoch_index, @@ -84,8 +94,6 @@ def train_epoch( ): dist = DistributedManager() - indices = list(iter(sampler)) - print(f"indices: {indices}") # If you tell the dataloader the indices in advance, it will preload # and pre-preprocess data # dataloader.set_indices(indices) @@ -93,11 +101,8 @@ def train_epoch( gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) start_time = time.perf_counter() for i_batch, sample_batched in enumerate(dataloader): - # sampled_batched = dict_to_device(sample_batched, device) - # if i_batch == 7: - # break - # for key in sampled_batched.keys(): - # print(f"{key}: {sampled_batched[key].shape}") + # for key in sample_batched.keys(): + # print(f"{key}: {sample_batched[key].shape}") # Gather data and report elapsed_time = time.perf_counter() - start_time @@ -116,80 +121,6 @@ def train_epoch( return -def get_or_compute_scaling_factors( - cfg: DictConfig, -) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """Get or compute scaling factors for volume and surface fields normalization. - - This function either loads pre-computed scaling factors from disk or computes them - if they don't exist. The scaling factors are used for normalizing volume and surface - fields data based on the specified normalization method in the config. - - Args: - cfg (DictConfig): Configuration object containing: - - project.name: Project name for saving/loading scaling factors - - model.normalization: Type of normalization ("min_max_scaling" or "mean_std_scaling") - - data.input_dir: Input directory path - - data_processor.use_cache: Whether to use cached data - - Returns: - tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: A tuple containing: - - vol_factors: Scaling factors for volume fields (max/min or mean/std) - - surf_factors: Scaling factors for surface fields (max/min or mean/std) - Each factor is a numpy array containing the respective scaling values. - - Raises: - ValueError: If an invalid normalization type is specified in the config. - """ - # Compute or load the scaling factors: - vol_save_path = os.path.join( - "outputs", cfg.project.name, "volume_scaling_factors.npy" - ) - surf_save_path = os.path.join( - "outputs", cfg.project.name, "surface_scaling_factors.npy" - ) - - if not os.path.exists(vol_save_path) or not os.path.exists(surf_save_path): - # Save the scaling factors if needed: - mean, std, min_val, max_val = compute_scaling_factors( - cfg=cfg, - input_path=cfg.data.input_dir, - use_cache=cfg.data_processor.use_cache, - ) - - v_mean = mean["volume_fields"].cpu().numpy() - v_std = std["volume_fields"].cpu().numpy() - v_min = min_val["volume_fields"].cpu().numpy() - v_max = max_val["volume_fields"].cpu().numpy() - - s_mean = mean["surface_fields"].cpu().numpy() - s_std = std["surface_fields"].cpu().numpy() - s_min = min_val["surface_fields"].cpu().numpy() - s_max = max_val["surface_fields"].cpu().numpy() - - np.save(vol_save_path, [v_mean, v_std, v_min, v_max]) - np.save(surf_save_path, [s_mean, s_std, s_min, s_max]) - else: - v_mean, v_std, v_min, v_max = np.load(vol_save_path) - s_mean, s_std, s_min, s_max = np.load(surf_save_path) - - if cfg.model.normalization == "min_max_scaling": - vol_factors = [v_max, v_min] - elif cfg.model.normalization == "mean_std_scaling": - vol_factors = [v_mean, v_std] - else: - raise ValueError(f"Invalid normalization type: {cfg.model.normalization}") - - if cfg.model.normalization == "min_max_scaling": - surf_factors = [s_max, s_min] - elif cfg.model.normalization == "mean_std_scaling": - surf_factors = [s_mean, s_std] - else: - raise ValueError(f"Invalid normalization type: {cfg.model.normalization}") - - return vol_factors, surf_factors - - @hydra.main(version_base="1.3", config_path="conf", config_name="config") def main(cfg: DictConfig) -> None: # initialize distributed manager @@ -208,15 +139,38 @@ def main(cfg: DictConfig) -> None: logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") - vol_factors, surf_factors = get_or_compute_scaling_factors(cfg) + ################################ + # Get scaling factors + ################################ + pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" + + try: + scaling_factors = ScalingFactors.load(pickle_path) + logger.info(f"Scaling factors loaded from: {pickle_path}") + except FileNotFoundError: + raise FileNotFoundError( + f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." + ) + + vol_factors = scaling_factors.mean["volume_fields"] + surf_factors = scaling_factors.mean["surface_fields"] + vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + + keys_to_read, keys_to_read_if_available = get_keys_to_read( + cfg, model_type, get_ground_truth=True + ) + + domain_mesh, data_mesh, placements = coordinate_distributed_environment(cfg) train_dataset = create_domino_dataset( cfg, phase="train", - volume_variable_names="volume_fields", - surface_variable_names="surface_fields", + keys_to_read=keys_to_read, + keys_to_read_if_available=keys_to_read_if_available, vol_factors=vol_factors, surf_factors=surf_factors, + device_mesh=domain_mesh, + placements=placements, ) train_sampler = DistributedSampler( train_dataset, num_replicas=dist.world_size, rank=dist.rank @@ -232,11 +186,14 @@ def main(cfg: DictConfig) -> None: start_time = time.perf_counter() logger.info(f"Device {dist.device}, epoch {epoch}:") + train_sampler.set_epoch(epoch) + print(f"indices: {list(train_sampler)}") + train_dataset.dataset.set_indices(list(train_sampler)) + epoch_start_time = time.perf_counter() with Profiler(): - train_epoch( + benchmark_io_epoch( dataloader=train_dataset, - sampler=train_sampler, logger=logger, gpu_handle=gpu_handle, epoch_index=epoch, diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 5a155fc198..1e0ad5d80c 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -70,7 +70,7 @@ from physicsnemo.models.domino.model import DoMINO from physicsnemo.utils.domino.utils import * -from utils import ScalingFactors +from utils import ScalingFactors, get_keys_to_read, coordinate_distributed_environment # This is included for GPU memory tracking: from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo @@ -84,9 +84,6 @@ from physicsnemo.utils.profiling import profile, Profiler -# Profiler().enable("torch") -# Profiler().initialize() - from loss import compute_loss_dict from utils import get_num_vars @@ -255,30 +252,34 @@ def train_epoch( @hydra.main(version_base="1.3", config_path="conf", config_name="config") def main(cfg: DictConfig) -> None: - ################################ + ###################################################### # initialize distributed manager - ################################ + ###################################################### DistributedManager.initialize() dist = DistributedManager() + # DoMINO supports domain parallel training. This function helps coordinate + # how to set that up, if needed. + domain_mesh, data_mesh, placements = coordinate_distributed_environment(cfg) + ################################ # Initialize NVML ################################ nvmlInit() gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index) - ################################ + ###################################################### # Initialize logger - ################################ + ###################################################### logger = PythonLogger("Train") logger = RankZeroLoggingWrapper(logger, dist) logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") - ################################ - # Get scaling factors - ################################ + ###################################################### + # Get scaling factors - precompute them if this fails! + ###################################################### pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" try: @@ -289,18 +290,14 @@ def main(cfg: DictConfig) -> None: f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." ) - model_type = cfg.model.model_type - - # Get physics imports conditionally - add_physics_loss = getattr(cfg.train, "add_physics_loss", False) - - if add_physics_loss: - from physicsnemo.sym.eq.pde import PDE - from physicsnemo.sym.eq.ls.grads import FirstDeriv - from physicsnemo.sym.eq.pdes.navier_stokes import IncompressibleNavierStokes - else: - PDE = FirstDeriv = IncompressibleNavierStokes = None + vol_factors = scaling_factors.mean["volume_fields"] + surf_factors = scaling_factors.mean["surface_fields"] + vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + ###################################################### + # Configure the model + ###################################################### + model_type = cfg.model.model_type num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type) if model_type == "combined" or model_type == "surface": @@ -313,10 +310,28 @@ def main(cfg: DictConfig) -> None: else: volume_variable_names = [] - vol_factors = scaling_factors.mean["volume_fields"] - surf_factors = scaling_factors.mean["surface_fields"] - vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + ###################################################### + # Configure physics loss + # Unless enabled, these are null-ops + ###################################################### + add_physics_loss = getattr(cfg.train, "add_physics_loss", False) + + if add_physics_loss: + from physicsnemo.sym.eq.pde import PDE + from physicsnemo.sym.eq.ls.grads import FirstDeriv + from physicsnemo.sym.eq.pdes.navier_stokes import IncompressibleNavierStokes + else: + PDE = FirstDeriv = IncompressibleNavierStokes = None + # Initialize physics components conditionally + first_deriv = None + eqn = None + if add_physics_loss: + first_deriv = FirstDeriv(dim=3, direct_input=True) + eqn = IncompressibleNavierStokes(rho=1.226, nu="nu", dim=3, time=False) + eqn = eqn.make_nodes(return_as_dict=True) + + # The bounding box is used in calculating the physics loss: bounding_box = None if add_physics_loss: bounding_box = cfg.data.bounding_box @@ -328,37 +343,95 @@ def main(cfg: DictConfig) -> None: .to(dist.device) ) - train_dataset = create_domino_dataset( + ###################################################### + # Configure the dataset + ###################################################### + + # This helper function is to determine which keys to read from the data + # (and which to use default values for, if they aren't present - like + # air_density, for example) + keys_to_read, keys_to_read_if_available = get_keys_to_read( + cfg, model_type, get_ground_truth=True + ) + + # The dataset actually works in two pieces + # The core dataset just reads data from disk, and puts it on the GPU if needed. + # The data processesing pipeline will preprocess that data and prepare it for the model. + # Obviously, you need both, so this function will return the datapipeline in + # a way that can be iterated over. + # + # To properly shuffle the data, we use a distributed sampler too. + # It's configured properly for optional domain parallelism, and you have + # to make sure to call set_epoch below. + + train_dataloader = create_domino_dataset( cfg, phase="train", - volume_variable_names=volume_variable_names, - surface_variable_names=surface_variable_names, + keys_to_read=keys_to_read, + keys_to_read_if_available=keys_to_read_if_available, vol_factors=vol_factors, surf_factors=surf_factors, + device_mesh=domain_mesh, + placements=placements, + ) + train_sampler = DistributedSampler( + train_dataloader, + num_replicas=data_mesh.size(), + rank=data_mesh.get_local_rank(), + **cfg.train.sampler, ) - val_dataset = create_domino_dataset( + + val_dataloader = create_domino_dataset( cfg, phase="val", - volume_variable_names=volume_variable_names, - surface_variable_names=surface_variable_names, + keys_to_read=keys_to_read, + keys_to_read_if_available=keys_to_read_if_available, vol_factors=vol_factors, surf_factors=surf_factors, + device_mesh=domain_mesh, + placements=placements, ) - - train_sampler = DistributedSampler( - train_dataset, - num_replicas=dist.world_size, - rank=dist.rank, - **cfg.train.sampler, - ) - val_sampler = DistributedSampler( - val_dataset, - num_replicas=dist.world_size, - rank=dist.rank, + val_dataloader, + num_replicas=data_mesh.size(), + rank=data_mesh.get_local_rank(), **cfg.val.sampler, ) + # train_dataloader = create_domino_dataset( + # cfg, + # phase="train", + # volume_variable_names=volume_variable_names, + # surface_variable_names=surface_variable_names, + # vol_factors=vol_factors, + # surf_factors=surf_factors, + # ) + # val_dataloader = create_domino_dataset( + # cfg, + # phase="val", + # volume_variable_names=volume_variable_names, + # surface_variable_names=surface_variable_names, + # vol_factors=vol_factors, + # surf_factors=surf_factors, + # ) + + # train_sampler = DistributedSampler( + # train_dataloader, + # num_replicas=dist.world_size, + # rank=dist.rank, + # **cfg.train.sampler, + # ) + + # val_sampler = DistributedSampler( + # val_dataloader, + # num_replicas=dist.world_size, + # rank=dist.rank, + # **cfg.val.sampler, + # ) + + ###################################################### + # Configure the model + ###################################################### model = DoMINO( input_features=3, output_features_vol=num_vol_vars, @@ -382,23 +455,23 @@ def main(cfg: DictConfig) -> None: static_graph=True, ) - # optimizer = apex.optimizers.FusedAdam(model.parameters(), lr=0.001) + ###################################################### + # Initialize optimzer and gradient scaler + ###################################################### + optimizer = torch.optim.Adam(model.parameters(), lr=0.001) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[50, 100, 200, 250, 300, 350, 400, 450], gamma=0.5 ) - # Initialize physics components conditionally - first_deriv = None - eqn = None - if add_physics_loss: - first_deriv = FirstDeriv(dim=3, direct_input=True) - eqn = IncompressibleNavierStokes(rho=1.226, nu="nu", dim=3, time=False) - eqn = eqn.make_nodes(return_as_dict=True) - # Initialize the scaler for mixed precision scaler = GradScaler() + ###################################################### + # Initialize output tools + ###################################################### + + # Tensorboard Writer to track training. writer = SummaryWriter(os.path.join(cfg.output, "tensorboard")) epoch_number = 0 @@ -414,6 +487,10 @@ def main(cfg: DictConfig) -> None: if dist.world_size > 1: torch.distributed.barrier() + ###################################################### + # Load checkpoint if available + ###################################################### + init_epoch = load_checkpoint( to_absolute_path(cfg.resume_dir), models=model, @@ -439,6 +516,10 @@ def main(cfg: DictConfig) -> None: initial_integral_factor_orig = cfg.model.integral_loss_scaling_factor + ###################################################### + # Begin Training loop over epochs + ###################################################### + for epoch in range(init_epoch, cfg.train.epochs): start_time = time.perf_counter() logger.info(f"Device {dist.device}, epoch {epoch_number}:") @@ -451,8 +532,8 @@ def main(cfg: DictConfig) -> None: # This controls what indices to use for each epoch. train_sampler.set_epoch(epoch) val_sampler.set_epoch(epoch) - train_dataset.set_indices(list(train_sampler)) - val_dataset.set_indices(list(val_sampler)) + train_dataloader.dataset.set_indices(list(train_sampler)) + val_dataloader.dataset.set_indices(list(val_sampler)) initial_integral_factor = initial_integral_factor_orig @@ -464,7 +545,7 @@ def main(cfg: DictConfig) -> None: model.train(True) epoch_start_time = time.perf_counter() avg_loss = train_epoch( - dataloader=train_dataset, + dataloader=train_dataloader, model=model, optimizer=optimizer, scaler=scaler, @@ -491,7 +572,7 @@ def main(cfg: DictConfig) -> None: model.eval() avg_vloss = validation_step( - dataloader=val_dataset, + dataloader=val_dataloader, model=model, device=dist.device, logger=logger, diff --git a/examples/cfd/external_aerodynamics/domino/src/train_sharded.py b/examples/cfd/external_aerodynamics/domino/src/train_sharded.py index f321f50b12..3b1c818cc2 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train_sharded.py +++ b/examples/cfd/external_aerodynamics/domino/src/train_sharded.py @@ -79,7 +79,7 @@ from physicsnemo.launch.utils import load_checkpoint, save_checkpoint from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper -from physicsnemo.datapipes.cae.domino_datapipe import ( +from physicsnemo.datapipes.cae.domino_datapipe2 import ( compute_scaling_factors, create_domino_dataset, ) diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py index 6befff00bb..6d05c90bfc 100644 --- a/examples/cfd/external_aerodynamics/domino/src/utils.py +++ b/examples/cfd/external_aerodynamics/domino/src/utils.py @@ -21,6 +21,13 @@ import pickle from pathlib import Path from typing import Literal +from omegaconf import DictConfig +from physicsnemo.distributed import DistributedManager + +from torch.distributed.tensor.placement_types import ( + Shard, + Replicate, +) def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"]): @@ -82,6 +89,121 @@ def get_num_vars(cfg: dict, model_type: Literal["volume", "surface", "combined"] return num_vol_vars, num_surf_vars, num_global_features +def get_keys_to_read( + cfg: dict, + model_type: Literal["volume", "surface", "combined"], + get_ground_truth: bool = True, +): + """ + This function helps configure the keys to read from the dataset. + + And, if some global parameter values are provided in the config, + they are also read here and passed to the dataset. + + """ + + # Always read these keys: + keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"] + + # If these keys are in the config, use them, else provide defaults in + # case they aren't in the dataset: + # TODO + keys_to_read_if_available = { + "global_params_values": torch.tensor([[30.0], [1.226]]), + "global_params_reference": torch.tensor([[30.0], [1.226]]), + } + + # Volume keys: + volume_keys = [ + "volume_mesh_centers", + ] + if get_ground_truth: + volume_keys.append("volume_fields") + + # Surface keys: + surface_keys = [ + "surface_mesh_centers", + "surface_normals", + "surface_areas", + ] + if get_ground_truth: + surface_keys.append("surface_fields") + + if model_type == "volume" or model_type == "combined": + keys_to_read.extend(volume_keys) + if model_type == "surface" or model_type == "combined": + keys_to_read.extend(surface_keys) + + return keys_to_read, keys_to_read_if_available + + +def coordinate_distributed_environment(cfg: DictConfig): + """ + Initialize the distributed env for DoMINO. This is actually always a 2D Mesh: + one dimension is the data-parallel dimension (DDP), and the other is the + domain dimension. + + For the training scripts, we need to know the rank, size of each dimension, + and return the domain_mesh and placements for the loader. + + Args: + cfg: Configuration object containing the domain parallelism configuration. + + Returns: + domain_mesh: torch.distributed.DeviceMesh: The domain mesh for the domain parallel dimension. + data_mesh: torch.distributed.DeviceMesh: The data mesh for the data parallel dimension. + placements: dict[str, torch.distributed.tensor.Placement]: The placements for the data set + """ + + DistributedManager.initialize() + dist = DistributedManager() + + # Default to no domain parallelism: + domain_size = cfg.get("domain_parallelism", {}).get("domain_size", 1) + + # Initialize the device mesh: + mesh = dist.initialize_mesh( + mesh_shape=(-1, domain_size), mesh_dim_names=("ddp", "domain") + ) + domain_mesh = mesh["domain"] + data_mesh = mesh["ddp"] + + if domain_size > 1: + # Define the default placements for each tensor that might show up in + # the data. Note that we'll define placements for all keys, even if + # they aren't actually used. + + # Note that placements are defined for pre-batched data, no batch index! + + grid_like_placement = [ + Shard(0), + ] + point_like_placement = [ + Shard(0), + ] + replicate_placement = [ + Replicate(), + ] + placements = { + "stl_coordinates": point_like_placement, + "stl_centers": point_like_placement, + "stl_faces": point_like_placement, + "stl_areas": point_like_placement, + "surface_fields": point_like_placement, + "volume_mesh_centers": point_like_placement, + "volume_fields": point_like_placement, + "surface_mesh_centers": point_like_placement, + "surface_normals": point_like_placement, + "surface_areas": point_like_placement, + "surface_fields": point_like_placement, + } + else: + domain_mesh = None + placements = None + + return domain_mesh, data_mesh, placements + + @dataclass class ScalingFactors: """ From 7f01ddc9f5f3e578641f2595e8888fbbc2c8c42a Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 15 Sep 2025 21:12:13 +0000 Subject: [PATCH 29/98] Remove bug in sdf fake function --- physicsnemo/utils/sdf.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py index 08f9c8c4c2..9abbafa460 100644 --- a/physicsnemo/utils/sdf.py +++ b/physicsnemo/utils/sdf.py @@ -195,11 +195,6 @@ def _( if mesh_vertices.device != mesh_indices.device: raise RuntimeError("mesh_vertices and mesh_indices must be on the same device") - if mesh_vertices.shape[0] != mesh_indices.shape[0]: - raise RuntimeError( - "mesh_vertices and mesh_indices must have the same number of points" - ) - N = input_points.shape[0] sdf_output = torch.empty(N, 1, device=input_points.device, dtype=input_points.dtype) From 1d03ab754309549a4007ecf8d1e3d1c9d2fb5018 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:07:15 +0000 Subject: [PATCH 30/98] Restructure datapipe to make the logical flow simpler and clearer. Focus is on readability and maintainabiltiy without performance loss. --- physicsnemo/datapipes/cae/domino_datapipe2.py | 487 +++++++++++------- physicsnemo/models/domino/model.py | 33 +- 2 files changed, 308 insertions(+), 212 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 472c09be24..781402a5b3 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -26,7 +26,6 @@ variable names, domain resolution, sampling size etc. are configurable in config.yaml. """ -from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from pathlib import Path from typing import Iterable, Literal, Optional, Protocol, Sequence, Union @@ -231,13 +230,17 @@ def __init__( dist = DistributedManager() + # Set devices for the preprocessing and IO target self.preproc_device = ( dist.device if self.config.gpu_preprocessing else torch.device("cpu") ) + # The drivaer_ml_dataset will automatically target this device + # In an async transfer. self.output_device = ( dist.device if self.config.gpu_output else torch.device("cpu") ) + # Model type determines whether we process surface, volume, or both. self.model_type = model_type # Update the arrays for bounding boxes: @@ -256,12 +259,13 @@ def __init__( dtype=torch.float32, ), ] - self.volume_grid = create_grid( + self.default_volume_grid = create_grid( self.config.bounding_box_dims[0], self.config.bounding_box_dims[1], self.config.grid_resolution, ) + # And, do the surface bounding box if supplied: if hasattr(self.config.bounding_box_dims_surf, "max") and hasattr( self.config.bounding_box_dims_surf, "min" ): @@ -278,7 +282,7 @@ def __init__( ), ] - self.surf_grid = create_grid( + self.default_surface_grid = create_grid( self.config.bounding_box_dims_surf[0], self.config.bounding_box_dims_surf[1], self.config.grid_resolution, @@ -301,56 +305,71 @@ def __init__( self.dataset = None - # This is thread storage for data preprocessing: - self._preprocess_queue = {} - self._preprocess_events = {} - self.preprocess_depth = 2 - self.preprocess_executor = ThreadPoolExecutor(max_workers=1) - - def compute_stl_scaling( - self, stl_vertices: torch.Tensor, bounding_box_dims_surf: torch.Tensor | None - ): + def compute_stl_scaling_and_surface_grids( + self, + stl_vertices: torch.Tensor, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Compute the min and max for the defining mesh. - """ + If the user supplies a bounding box, we use that. Otherwise, + it's created dynamically from the min/max of the stl vertices. - s_min = torch.amin(stl_vertices, 0) - s_max = torch.amax(stl_vertices, 0) + The returned min/max and grid are used for surface data. + """ - # if dynamic_bbox_scaling: # Check the bounding box is not unit length - if bounding_box_dims_surf is not None: - s_max = bounding_box_dims_surf[0] - s_min = bounding_box_dims_surf[1] - surf_grid = self.surf_grid + if self.config.bounding_box_dims_surf is not None: + s_max = self.config.bounding_box_dims_surf[0] + s_min = self.config.bounding_box_dims_surf[1] + surf_grid = self.default_surface_grid else: - # Create the grid: - surf_grid = create_grid(s_max, s_min, self.grid_resolution) + # Create the grid dynamically + s_min = torch.amin(stl_vertices, 0) + s_max = torch.amax(stl_vertices, 0) + surf_grid = create_grid(s_max, s_min, self.config.grid_resolution) + + return s_min, s_max, surf_grid + + def compute_volume_scaling_and_grids( + self, s_min: torch.Tensor, s_max: torch.Tensor + ): + """ + Compute the min and max and grid for volume data. + + If the user supplies a bounding box, we use that. Otherwise, + it's created dynamically from the surface min/max. - surf_grid_max_min = torch.stack([s_min, s_max]) + This will be 2x longer in x and y and the same in z as the surface bounding box. + """ - return s_min, s_max, surf_grid_max_min, surf_grid + # Determine the volume min / max locations + if self.config.bounding_box_dims is not None: + c_max = self.config.bounding_box_dims[0] + c_min = self.config.bounding_box_dims[1] + volume_grid = self.default_volume_grid + + else: + # Create the grid based on the surface grid + c_max = s_max + (s_max - s_min) / 2 + c_min = s_min - (s_max - s_min) / 2 + c_min[2] = s_min[2] + volume_grid = create_grid(c_max, c_min, self.config.grid_resolution) + + return c_min, c_max, volume_grid @profile - def process_combined( + def downsample_geometry( self, - s_min, - s_max, - surf_grid, stl_vertices, - mesh_indices_flattened, - ): - # SDF calculation on the grid using WARP - nx, ny, nz = self.config.grid_resolution + ) -> torch.Tensor: + """ + Downsample the geometry to the desired number of points. - sdf_surf_grid, _ = signed_distance_field( - stl_vertices, - mesh_indices_flattened, - surf_grid, - use_sign_winding_number=True, - ) + Args: + stl_vertices: The vertices of the surface. + """ if self.config.sampling: geometry_points = self.config.geom_points_sample @@ -365,31 +384,41 @@ def process_combined( else: geom_centers = stl_vertices - return (sdf_surf_grid, geom_centers) + return geom_centers def process_surface( self, s_min: torch.Tensor, s_max: torch.Tensor, + c_min: torch.Tensor, + c_max: torch.Tensor, + *, # Forcing the rest by keyword only since it's a long list ... center_of_mass: torch.Tensor, surf_grid: torch.Tensor, surface_coordinates: torch.Tensor, surface_normals: torch.Tensor, surface_sizes: torch.Tensor, + stl_vertices: torch.Tensor, + stl_indices: torch.Tensor, surface_fields: torch.Tensor | None, ) -> dict[str, torch.Tensor]: nx, ny, nz = self.config.grid_resolution return_dict = {} + ######################################################################## # Remove any sizes <= 0: + ######################################################################## idx = surface_sizes > 0 surface_sizes = surface_sizes[idx] - if surface_fields is not None: - surface_fields = surface_fields[idx] surface_normals = surface_normals[idx] surface_coordinates = surface_coordinates[idx] + if surface_fields is not None: + surface_fields = surface_fields[idx] + ######################################################################## + # Surface resampling ... + ######################################################################## if self.config.resample_surfaces: if self.config.resampling_points > surface_coordinates.shape[0]: resampling_points = surface_coordinates.shape[0] @@ -404,9 +433,10 @@ def process_surface( if surface_fields is not None: surface_fields = surface_fields[idx_s] - c_max = self.config.bounding_box_dims[0] - c_min = self.config.bounding_box_dims[1] - + ######################################################################## + # Reject surface points outside of the Bounding Box + # NOTE - this is using the VOLUME bounding box! + ######################################################################## if self.config.sample_in_bbox: ids_min = surface_coordinates[:] > c_min ids_max = surface_coordinates[:] < c_max @@ -433,12 +463,20 @@ def process_surface( else: pos_normals_com_surface = surface_coordinates - center_of_mass + ######################################################################## + # Perform Down sampling of the surface fields. + # Note that we snapshot the full surface coordinates for + # use in the kNN in the next step. + ######################################################################## + + full_surface_coordinates = surface_coordinates + full_surface_normals = surface_normals + full_surface_sizes = surface_sizes + if self.config.sampling: # Perform the down sampling: - if self.config.surface_sampling_algorithm == "area_weighted": weights = surface_sizes - else: weights = None @@ -458,66 +496,56 @@ def process_surface( # Select out the sampled points for non-neighbor arrays: surface_fields = surface_fields[idx_surface] pos_normals_com_surface = pos_normals_com_surface[idx_surface] - - # Now, perform the kNN on the sampled points: - if self.config.num_surface_neighbors > 1: - neighbor_indices, neighbor_distances = knn( - points=surface_coordinates, - queries=surface_coordinates_sampled, - k=self.config.num_surface_neighbors, - ) - - # Pull out the neighbor elements. Note that ii is the index into the original - # points - but only exists for the sampled points - # In other words, a point from `surface_coordinates_sampled` has neighbors - # from the full `surface_coordinates` array. - surface_neighbors = surface_coordinates[neighbor_indices][:, 1:] - surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:] - surface_neighbors_sizes = surface_sizes[neighbor_indices][:, 1:] - else: - surface_neighbors = surface_coordinates - surface_neighbors_normals = surface_normals - surface_neighbors_sizes = surface_sizes - # Subsample the normals and sizes: surface_normals = surface_normals[idx_surface] surface_sizes = surface_sizes[idx_surface] - # Update the coordinates to the sampled points: surface_coordinates = surface_coordinates_sampled - else: - neighbor_indices, _ = knn( - points=surface_coordinates, + ######################################################################## + # Perform a kNN on the surface to find the neighbor information + ######################################################################## + if self.config.num_surface_neighbors > 1: + # Perform the kNN: + neighbor_indices, neighbor_distances = knn( + points=full_surface_coordinates, queries=surface_coordinates, k=self.config.num_surface_neighbors, ) - # Construct the neighbors arrays: - surface_neighbors = surface_coordinates[neighbor_indices][:, 1:] - surface_neighbors_normals = surface_normals[neighbor_indices][:, 1:] - surface_neighbors_sizes = surface_sizes[neighbor_indices][:, 1:] + # Pull out the neighbor elements. + # Note that `neighbor_indices` is the index into the original, + # full sized tensors (full_surface_coordinates, etc). + surface_neighbors = full_surface_coordinates[neighbor_indices][:, 1:] + surface_neighbors_normals = full_surface_normals[neighbor_indices][:, 1:] + surface_neighbors_sizes = full_surface_sizes[neighbor_indices][:, 1:] - # Have to normalize neighbors after the kNN and sampling + # Better to normalize everything after the kNN and sampling if self.config.normalize_coordinates: surf_grid = normalize(surf_grid, s_max, s_min) surface_coordinates = normalize(surface_coordinates, s_max, s_min) surface_neighbors = normalize(surface_neighbors, s_max, s_min) + # This is for the SDF Later: + normed_vertices = normalize(stl_vertices, s_max, s_min) + else: + normed_vertices = stl_vertices + + ######################################################################## + # Apply scaling to the targets, if desired: + ######################################################################## + if self.config.scaling_type is not None and surface_fields is not None: + surface_fields = self.scale_model_targets( + surface_fields, self.config.surface_factors + ) - if self.config.scaling_type is not None: - if self.config.surface_factors is not None: - if self.config.scaling_type == "mean_std_scaling": - surf_mean = self.config.surface_factors[0] - surf_std = self.config.surface_factors[1] - if surface_fields is not None: - surface_fields = standardize( - surface_fields, surf_mean, surf_std - ) - elif self.config.scaling_type == "min_max_scaling": - surf_min = self.config.surface_factors[1] - surf_max = self.config.surface_factors[0] - if surface_fields is not None: - surface_fields = normalize(surface_fields, surf_max, surf_min) + # Compute signed distance function for the surface grid: + sdf_surf_grid, _ = signed_distance_field( + mesh_vertices=normed_vertices, + mesh_indices=stl_indices, + input_points=surf_grid, + use_sign_winding_number=True, + ) + return_dict["sdf_surf_grid"] = sdf_surf_grid return_dict.update( { @@ -537,27 +565,27 @@ def process_surface( def process_volume( self, - s_min: torch.Tensor, - s_max: torch.Tensor, + c_min: torch.Tensor, + c_max: torch.Tensor, volume_coordinates: torch.Tensor, - volume_fields: torch.Tensor | None, - stl_vertices: torch.Tensor, - mesh_indices_flattened: torch.Tensor, + volume_grid: torch.Tensor, center_of_mass: torch.Tensor, + stl_vertices: torch.Tensor, + stl_indices: torch.Tensor, + volume_fields: torch.Tensor | None, ) -> dict[str, torch.Tensor]: - return_dict = {} + """ + Preprocess the volume data. - nx, ny, nz = self.config.grid_resolution + First, if configured, we reject points not in the volume bounding box. - # Determine the volume min / max locations - if self.config.bounding_box_dims is None: - c_max = s_max + (s_max - s_min) / 2 - c_min = s_min - (s_max - s_min) / 2 - c_min[2] = s_min[2] - else: - c_max = self.config.bounding_box_dims[0] - c_min = self.config.bounding_box_dims[1] + Next, if sampling is enabled, we sample the volume points and apply that + sampling to the ground truth too, if it's present. + """ + ######################################################################## + # Reject points outside the volumetric BBox + ######################################################################## if self.config.sample_in_bbox: # Remove points in the volume that are outside # of the bbox area. @@ -571,27 +599,9 @@ def process_volume( if volume_fields is not None: volume_fields = volume_fields[ids_in_bbox] - dx, dy, dz = ( - (c_max[0] - c_min[0]) / nx, - (c_max[1] - c_min[1]) / ny, - (c_max[2] - c_min[2]) / nz, - ) - - # TODO - we need to make sure if the bbox is dynamic, - # the bounds on the grid are correct - - # # Generate a grid of specified resolution to map the bounding box - # # The grid is used for capturing structured geometry features and SDF representation of geometry - # grid = create_grid(c_max, c_min, [nx, ny, nz]) - # grid_reshaped = grid.reshape(nx * ny * nz, 3) - - # SDF calculation on the volume grid using WARP - sdf_grid, _ = signed_distance_field( - stl_vertices, - mesh_indices_flattened, - self.volume_grid, - use_sign_winding_number=True, - ) + ######################################################################## + # Apply sampling to the volume coordinates and fields + ######################################################################## if self.config.sampling: # Generate a series of idx to sample the volume @@ -602,6 +612,8 @@ def process_volume( ) volume_coordinates_sampled = volume_coordinates[idx_volume] + # In case too few points are in the sampled data (because the + # inputs were too few), pad the outputs: if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample: padding_size = ( self.config.volume_points_sample @@ -613,20 +625,93 @@ def process_volume( mode="constant", value=-10.0, ) + + # Apply the same sampling to the targets, too: if volume_fields is not None: volume_fields = volume_fields[idx_volume] + volume_coordinates = volume_coordinates_sampled + ######################################################################## + # Apply normalization to the coordinates, if desired: + ######################################################################## + if self.config.normalize_coordinates: + volume_coordinates = normalize(volume_coordinates, c_max, c_min) + grid = normalize(volume_grid, c_max, c_min) + # This is used later in the SDF, apply the same scaling to the mesh + # coordinates: + normed_vertices = normalize(stl_vertices, c_max, c_min) + else: + grid = volume_grid + normed_vertices = stl_vertices + + ######################################################################## + # Apply scaling to the targets, if desired: + ######################################################################## + if self.config.scaling_type is not None and volume_fields is not None: + volume_fields = self.scale_model_targets( + volume_fields, self.config.volume_factors + ) + + ######################################################################## + # Compute Signed Distance Function for volumetric quantities + # Note - the SDF happens here, after volume data processing finishes, + # because we need to use the (maybe) normalized volume coordinates and grid + ######################################################################## + + # SDF calculation on the volume grid using WARP + sdf_grid, _ = signed_distance_field( + normed_vertices, + stl_indices, + grid, + use_sign_winding_number=True, + ) + # Get the SDF of all the selected volume coordinates, # And keep the closest point to each one. sdf_nodes, sdf_node_closest_point = signed_distance_field( - stl_vertices, - mesh_indices_flattened, + normed_vertices, + stl_indices, volume_coordinates, use_sign_winding_number=True, ) sdf_nodes = sdf_nodes.reshape((-1, 1)) + # Use the closest point from the mesh to compute the volume encodings: + pos_normals_closest_vol, pos_normals_com_vol = self.calculate_volume_encoding( + c_min, c_max, volume_coordinates, sdf_node_closest_point, center_of_mass + ) + + return_dict = { + "volume_mesh_centers": volume_coordinates, + "sdf_nodes": sdf_nodes, + "grid": grid, + "sdf_grid": sdf_grid, + "pos_volume_closest": pos_normals_closest_vol, + "pos_volume_center_of_mass": pos_normals_com_vol, + } + + if volume_fields is not None: + return_dict["volume_fields"] = volume_fields + + return return_dict + + def calculate_volume_encoding( + self, + c_min: torch.Tensor, + c_max: torch.Tensor, + volume_coordinates: torch.Tensor, + sdf_node_closest_point: torch.Tensor, + center_of_mass: torch.Tensor, + ): + nx, ny, nz = self.config.grid_resolution + + dx, dy, dz = ( + (c_max[0] - c_min[0]) / nx, + (c_max[1] - c_min[1]) / ny, + (c_max[2] - c_min[2]) / nz, + ) + if self.config.positional_encoding: pos_normals_closest_vol = calculate_normal_positional_encoding( volume_coordinates, @@ -640,42 +725,7 @@ def process_volume( pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point pos_normals_com_vol = volume_coordinates - center_of_mass - if self.config.normalize_coordinates: - volume_coordinates = normalize(volume_coordinates, c_max, c_min) - grid = normalize(self.volume_grid, c_max, c_min) - else: - grid = self.volume_grid - - if self.config.scaling_type is not None: - if self.config.volume_factors is not None: - if self.config.scaling_type == "mean_std_scaling": - vol_mean = self.config.volume_factors[0] - vol_std = self.config.volume_factors[1] - if volume_fields is not None: - volume_fields = standardize(volume_fields, vol_mean, vol_std) - elif self.config.scaling_type == "min_max_scaling": - vol_min = self.config.volume_factors[1] - vol_max = self.config.volume_factors[0] - if volume_fields is not None: - volume_fields = normalize(volume_fields, vol_max, vol_min) - - vol_grid_max_min = torch.stack([c_min, c_max]) - - return_dict.update( - { - "pos_volume_closest": pos_normals_closest_vol, - "pos_volume_center_of_mass": pos_normals_com_vol, - "grid": grid, - "sdf_grid": sdf_grid, - "sdf_nodes": sdf_nodes, - "volume_mesh_centers": volume_coordinates, - "volume_min_max": vol_grid_max_min, - } - ) - if volume_fields is not None: - return_dict["volume_fields"] = volume_fields - - return return_dict + return pos_normals_closest_vol, pos_normals_com_vol @torch.no_grad() def process_data(self, data_dict): @@ -685,75 +735,103 @@ def process_data(self, data_dict): "global_params_reference": data_dict["global_params_reference"], } + ######################################################################## + # Process the core STL information + ######################################################################## + # This function gets information about the surface scale, # and decides what the surface grid will be: - (s_min, s_max, surf_grid_max_min, surf_grid) = self.compute_stl_scaling( - data_dict["stl_coordinates"], self.config.bounding_box_dims_surf + s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids( + data_dict["stl_coordinates"] ) + return_dict["surf_grid"] = surf_grid + + # Store this only if normalization is active: + if self.model_type == "surface" or self.model_type == "combined": + if self.config.normalize_coordinates: + return_dict["surface_min_max"] = torch.stack([s_min, s_max]) # This is a center of mass computation for the stl surface, # using the size of each mesh point as weight. - center_of_mass = calculate_center_of_mass( data_dict["stl_centers"], data_dict["stl_areas"] ) - # For SDF calculations, make sure the mesh_indices_flattened is an integer array: - mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) - - return_dict.update( - { - "surface_min_max": surf_grid_max_min, - } - ) - - # This will compute the sdf on the surface grid and apply downsampling if needed - sdf_surf_grid, geom_centers = self.process_combined( - s_min, - s_max, - surf_grid, + # This will apply downsampling if needed to the geometry coordinates + geom_centers = self.downsample_geometry( stl_vertices=data_dict["stl_coordinates"], - mesh_indices_flattened=mesh_indices_flattened, ) - return_dict["surf_grid"] = surf_grid - - return_dict["sdf_surf_grid"] = sdf_surf_grid return_dict["geometry_coordinates"] = geom_centers - # Up to here works all in torch! + ######################################################################## + # Determine the volumetric bounds of the data: + ######################################################################## + # Compute the min/max for volume an the unnomralized grid: + c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids(s_min, s_max) + + # For volume data, we store this only if normalizing coordinates: + if self.model_type == "volume" or self.model_type == "combined": + if self.config.normalize_coordinates: + return_dict["volume_min_max"] = torch.stack([c_min, c_max]) + + # For SDF calculations, make sure the mesh_indices_flattened is an integer array: + mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) if self.model_type == "volume" or self.model_type == "combined": + volume_fields_raw = ( + data_dict["volume_fields"] if "volume_fields" in data_dict else None + ) volume_dict = self.process_volume( - s_min, - s_max, + c_min, + c_max, volume_coordinates=data_dict["volume_mesh_centers"], - volume_fields=data_dict["volume_fields"] - if "volume_fields" in data_dict - else None, - stl_vertices=data_dict["stl_coordinates"], - mesh_indices_flattened=mesh_indices_flattened, + volume_grid=volume_grid, center_of_mass=center_of_mass, + stl_vertices=data_dict["stl_coordinates"], + stl_indices=mesh_indices_flattened, + volume_fields=volume_fields_raw, ) return_dict.update(volume_dict) if self.model_type == "surface" or self.model_type == "combined": + surface_fields_raw = ( + data_dict["surface_fields"] if "surface_fields" in data_dict else None + ) surface_dict = self.process_surface( s_min, s_max, - center_of_mass, - surf_grid, + c_min, + c_max, + center_of_mass=center_of_mass, + surf_grid=surf_grid, surface_coordinates=data_dict["surface_mesh_centers"], surface_normals=data_dict["surface_normals"], surface_sizes=data_dict["surface_areas"], - surface_fields=data_dict["surface_fields"] - if "surface_fields" in data_dict - else None, + stl_vertices=data_dict["stl_coordinates"], + stl_indices=mesh_indices_flattened, + surface_fields=surface_fields_raw, ) + return_dict.update(surface_dict) return return_dict + def scale_model_targets( + self, fields: torch.Tensor, factors: torch.Tensor + ) -> torch.Tensor: + """ + Scale the model targets based on the configured scaling factors. + """ + if self.config.scaling_type == "mean_std_scaling": + field_mean = self.config.volume_factors[0] + field_std = self.config.volume_factors[1] + return standardize(fields, field_mean, field_std) + elif self.config.scaling_type == "min_max_scaling": + field_min = self.config.volume_factors[1] + field_max = self.config.volume_factors[0] + return normalize(fields, field_max, field_min) + def unscale_model_outputs( self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None ): @@ -787,6 +865,9 @@ def unscale_model_outputs( return volume_fields, surface_fields def set_dataset(self, dataset: Iterable) -> None: + """ + Pass a dataset to the datapipe to enable iterating over both in one pass. + """ self.dataset = dataset def __len__(self): @@ -801,8 +882,9 @@ def __getitem__(self, idx): Domino, in general, expects one example per file and the files are relatively large due to the mesh size. - """ + Requires the user to have set a dataset via `set_dataset`. + """ if self.dataset is None: raise ValueError("Dataset is not present") @@ -812,7 +894,7 @@ def __getitem__(self, idx): return self.__call__(data_dict) - def __call__(self, data_dict: dict) -> dict: + def __call__(self, data_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: """ Process the incoming data dictionary. - Processes the data @@ -839,6 +921,11 @@ def __call__(self, data_dict: dict) -> dict: return data_dict def __iter__(self): + if self.dataset is None: + raise ValueError( + "Dataset is not present, can not use the datapipe as an iterator." + ) + for i, batch in enumerate(self.dataset): yield self.__call__(batch) diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index ff0a5482c8..24588b123c 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -1884,9 +1884,6 @@ def forward(self, data_dict, return_volume_neighbors=False): # Bounding box grid s_grid = data_dict["surf_grid"] sdf_surf_grid = data_dict["sdf_surf_grid"] - # Scaling factors - surf_max = data_dict["surface_min_max"][:, 1] - surf_min = data_dict["surface_min_max"][:, 0] # Parameters global_params_values = data_dict["global_params_values"] @@ -1897,12 +1894,17 @@ def forward(self, data_dict, return_volume_neighbors=False): # Computational domain grid p_grid = data_dict["grid"] sdf_grid = data_dict["sdf_grid"] - # Scaling factors - vol_max = data_dict["volume_min_max"][:, 1] - vol_min = data_dict["volume_min_max"][:, 0] - - # Normalize based on computational domain - geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 + if "volume_min_max" in data_dict.keys(): + # Scaling factors + vol_max = data_dict["volume_min_max"][:, 1] + vol_min = data_dict["volume_min_max"][:, 0] + + # Normalize based on computational domain + geo_centers_vol = ( + 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 + ) + else: + geo_centers_vol = geo_centers encoding_g_vol = self.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid) @@ -1926,9 +1928,16 @@ def forward(self, data_dict, return_volume_neighbors=False): if self.output_features_surf is not None: # Represent geometry on bounding box - geo_centers_surf = ( - 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1 - ) + if "surface_min_max" in data_dict.keys(): + # Scaling factors + surf_max = data_dict["surface_min_max"][:, 1] + surf_min = data_dict["surface_min_max"][:, 0] + geo_centers_surf = ( + 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1 + ) + else: + geo_centers_surf = geo_centers + encoding_g_surf = self.geo_rep_surface( geo_centers_surf, s_grid, sdf_surf_grid ) From b7b7a65901642fffb9cfd7cf03e28124b7dcfeaf Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:09:46 +0000 Subject: [PATCH 31/98] Ensure RMM is actually used... --- .../external_aerodynamics/domino/src/benchmark_dataloader.py | 2 +- .../cfd/external_aerodynamics/domino/src/inference_on_stl2.py | 2 +- examples/cfd/external_aerodynamics/domino/src/train.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index 80c4b9e3cf..f24e0ffe16 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -40,7 +40,7 @@ from hydra.utils import to_absolute_path from omegaconf import DictConfig, OmegaConf -DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", "False") +DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", False) if not DISABLE_RMM: import rmm from rmm.allocators.torch import rmm_torch_allocator diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py index 9ff5b62d0d..3c6acc3ccd 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py @@ -42,7 +42,7 @@ from omegaconf import DictConfig, OmegaConf import torch -DISABLE_RMM = os.environ.get("DISABLE_RMM", "False") +DISABLE_RMM = os.environ.get("DISABLE_RMM", False) if not DISABLE_RMM: import rmm from rmm.allocators.torch import rmm_torch_allocator diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 1e0ad5d80c..0176084082 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -39,7 +39,7 @@ from omegaconf import DictConfig, OmegaConf -DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", "False") +DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", False) if not DISABLE_RMM: import rmm from rmm.allocators.torch import rmm_torch_allocator From c5e1db817e9fc3977489c37b28cc5bd1aa3f0578 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:35:17 +0000 Subject: [PATCH 32/98] Add sharded implementations of both kNN and SDF, as well as tests for them. --- .../distributed/shard_utils/__init__.py | 10 +- physicsnemo/distributed/shard_utils/knn.py | 212 ++++++++++++++++++ .../distributed/shard_utils/mesh_ops.py | 146 ++++++++++++ test/distributed/shard_tensor/ops/test_knn.py | 72 ++++++ test/distributed/shard_tensor/ops/test_sdf.py | 119 ++++++++++ test/distributed/shard_tensor/ops/utils.py | 44 ++++ 6 files changed, 600 insertions(+), 3 deletions(-) create mode 100644 physicsnemo/distributed/shard_utils/knn.py create mode 100644 physicsnemo/distributed/shard_utils/mesh_ops.py create mode 100644 test/distributed/shard_tensor/ops/test_knn.py create mode 100644 test/distributed/shard_tensor/ops/test_sdf.py diff --git a/physicsnemo/distributed/shard_utils/__init__.py b/physicsnemo/distributed/shard_utils/__init__.py index 36ec8e63bb..e332307869 100644 --- a/physicsnemo/distributed/shard_utils/__init__.py +++ b/physicsnemo/distributed/shard_utils/__init__.py @@ -21,7 +21,12 @@ # Prevent importing this module if the minimum version of pytorch is not met. try: check_module_requirements("physicsnemo.distributed.shard_tensor") + SHARD_TENSOR_AVAILABLE = True +except ImportError: + pass + +if SHARD_TENSOR_AVAILABLE: from physicsnemo.distributed.shard_tensor import ShardTensor def register_shard_wrappers(): @@ -32,6 +37,8 @@ def register_shard_wrappers(): sharded_select_backward_helper, sharded_select_helper, ) + from .knn import knn_sharded_wrapper + from .mesh_ops import sharded_signed_distance_field_wrapper # Currently disabled until wrapt is removed # from .natten_patches import na2d_wrapper @@ -40,6 +47,3 @@ def register_shard_wrappers(): from .pooling_patches import generic_avg_pool_nd_wrapper from .unary_ops import unsqueeze_wrapper from .unpooling_patches import generic_interpolate_wrapper - -except ImportError: - pass diff --git a/physicsnemo/distributed/shard_utils/knn.py b/physicsnemo/distributed/shard_utils/knn.py new file mode 100644 index 0000000000..689223000c --- /dev/null +++ b/physicsnemo/distributed/shard_utils/knn.py @@ -0,0 +1,212 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Callable + +import numpy as np +import torch +import torch.distributed as dist + +from physicsnemo.utils.neighbors.knn._cuml_impl import knn_impl +from physicsnemo.utils.version_check import check_module_requirements + +check_module_requirements("physicsnemo.distributed.shard_tensor") + +from physicsnemo.distributed import ShardTensor # noqa: E402 +from physicsnemo.distributed.shard_utils.patch_core import ( # noqa: E402 + MissingShardPatch, +) +from physicsnemo.distributed.shard_utils.ring import ( # noqa: E402 + RingPassingConfig, + perform_ring_iteration, +) + + +def ring_knn( + points: ShardTensor, queries: ShardTensor, k: int +) -> tuple[torch.Tensor, torch.Tensor]: + """ + Ring based kNN implementation, where the points travel around a ring and the + queries stay local. + """ + # Each tensor has a _spec attribute, which contains information about the tensor's placement + # and the devices it lives on: + points_spec = points._spec + queries_spec = queries._spec + + # ** In general ** you want to do some checking on the placements, since each + # point cloud might be sharded differently. By construction, I know they're both + # sharded along the points axis here (and not, say, replicated). + + if not points_spec.mesh == queries_spec.mesh: + raise NotImplementedError("Tensors must be sharded on the same mesh") + + mesh = points_spec.mesh + local_group = mesh.get_group(0) + local_size = dist.get_world_size(group=local_group) + mesh_rank = mesh.get_local_rank() + + # points and queries are both sharded - and since we're returning the nearest + # neighbors to points, let's make sure the output keeps that sharding too. + + # One memory-efficient way to do this is with with a ring computation. + # We'll compute the knn on the local tensors, get the distances and outputs, + # then shuffle the queries shards along the mesh. + + # we'll need to sort the results and make sure we have just the top-k, + # which is a little extra computation. + + # Physics nemo has a ring passing utility we can use. + ring_config = RingPassingConfig( + mesh_dim=0, + mesh_size=local_size, + ring_direction="forward", + communication_method="p2p", + ) + + local_points, local_queries = points.to_local(), queries.to_local() + current_dists = None + current_topk_idx = None + + points_spec = points._spec + + points_sharding_shapes = points_spec.sharding_shapes()[0] + + sharding_dim = points_spec.placements[0].dim + + # This is to help specify the offset from local to global tensor. + points_strides_along_ring = [s[sharding_dim] for s in points_sharding_shapes] + points_strides_along_ring = np.cumsum(points_strides_along_ring) + points_strides_along_ring = [ + 0, + ] + list(points_strides_along_ring[0:-1]) + + for i in range(local_size): + source_rank = (mesh_rank - i) % local_size + + # For point clouds, we need to pass the size of the incoming shard. + next_source_rank = (source_rank - 1) % local_size + recv_shape = points_sharding_shapes[next_source_rank] + if i != local_size - 1: + # Don't do a ring on the last iteration. + next_local_points = perform_ring_iteration( + local_points, + mesh, + ring_config, + recv_shape=recv_shape, + ) + + # Compute the knn on the local tensors: + local_idx, local_distances = knn_impl(local_points, local_queries, k) + + # The local_idx indexes into the _local_ tensor, but for + # Correctness we need it to index into the _global_ tensor. + # Make sure to index using the rank the points came from! + offset = points_strides_along_ring[source_rank] + local_idx = local_idx + offset + + if current_dists is None: + current_dists = local_distances + current_topk_idx = local_idx + else: + # Combine with the topk so far: + current_dists = torch.cat([current_dists, local_distances], dim=1) + current_topk_idx = torch.cat([current_topk_idx, local_idx], dim=1) + # And take the topk again: + current_dists, running_indexes = torch.topk( + current_dists, k=k, dim=1, sorted=True, largest=False + ) + + # This creates proper indexing to select specific elements along dim 1 + + current_topk_idx = torch.gather(current_topk_idx, 1, running_indexes) + + if i != local_size - 1: + # Don't do a ring on the last iteration. + local_points = next_local_points + + return current_topk_idx, current_dists + + +def extract_knn_args(points, queries, k, *args, **kwargs): + return points, queries, k + + +def knn_sharded_wrapper( + func: Callable, types: Any, args: tuple, kwargs: dict +) -> tuple[ShardTensor, ShardTensor]: + """ + Dispatch the proper kNN tools based on the input sharding. + """ + + points, queries, k = extract_knn_args(*args, **kwargs) + + # kNN will only work with 1D sharding + if points._spec.mesh != queries._spec.mesh: + raise MissingShardPatch( + "sharded knn: All point inputs must be on the same mesh" + ) + + # make sure all meshes are 1D + if points._spec.mesh.ndim != 1: + raise MissingShardPatch( + "point_cloud_ops.radius_search_wrapper: All point inputs must be on 1D meshes" + ) + + # Do we need a ring? + points_placement = points._spec.placements[0] + + if points_placement.is_shard(): + # We need a ring + idx, distances = ring_knn(points, queries, k) + else: + # No ring is needed. Get the local tensors and compute directly: + local_points = points.to_local() # This is replicated, getting all of it + local_queries = queries.to_local() # This sharding doesn't matter! + idx, distances = knn_impl(local_points, local_queries, k) + + # The outputs only depend on the local queries shape + input_queries_spec = queries._spec + # The global output tensor will be (N_q, k) + + output_queries_shard_shapes = { + mesh_dim: tuple( + torch.Size((s[0], k)) + for s in input_queries_spec.sharding_shapes()[mesh_dim] + ) + for mesh_dim in input_queries_spec.sharding_shapes().keys() + } + + # Convert the selected points and indexes to shards: + shard_idx = ShardTensor.from_local( + idx, + queries._spec.mesh, + queries._spec.placements, + sharding_shapes=output_queries_shard_shapes, + ) + shard_distances = ShardTensor.from_local( + distances, + queries._spec.mesh, + queries._spec.placements, + sharding_shapes=output_queries_shard_shapes, + ) + + return shard_idx, shard_distances + + +ShardTensor.register_named_function_handler( + "physicsnemo.knn_cuml.default", knn_sharded_wrapper +) diff --git a/physicsnemo/distributed/shard_utils/mesh_ops.py b/physicsnemo/distributed/shard_utils/mesh_ops.py new file mode 100644 index 0000000000..c04ad66a89 --- /dev/null +++ b/physicsnemo/distributed/shard_utils/mesh_ops.py @@ -0,0 +1,146 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +import torch + +from physicsnemo.utils.sdf import signed_distance_field +from physicsnemo.utils.version_check import check_module_requirements + +check_module_requirements("physicsnemo.distributed.shard_tensor") + + +from physicsnemo.distributed import ShardTensor # noqa: E402 + + +def sharded_signed_distance_field( + mesh_vertices: ShardTensor, + mesh_indices: ShardTensor, + input_points: ShardTensor, + max_dist: float = 1e8, + use_sign_winding_number: bool = False, +) -> tuple[ShardTensor, ShardTensor]: + """ + Compute the signed distance field for a (possibly sharded) mesh. + + Args: + mesh_vertices: Sharded tensor of mesh vertices + mesh_indices: Sharded tensor of mesh indices + input_points: Sharded tensor of input points + max_dist: Maximum distance for the signed distance field + use_sign_winding_number: Whether to use sign winding number + """ + + # We can not actually compute the signed distance function on a sharded mesh. + # So, in this case, force the mesh to replicate placement if necessary: + + local_mesh_vertices = mesh_vertices.full_tensor() + local_mesh_indices = mesh_indices.full_tensor() + + # For the input points, though, it doesn't matter - they can be sharded. + # No communication is necessary + + local_input_points = input_points.to_local() + + local_sdf, local_sdf_hit_point = signed_distance_field( + local_mesh_vertices, + local_mesh_indices, + local_input_points, + max_dist, + use_sign_winding_number, + ) + + # Then, construct the output shard tensors: + + if input_points._spec.placements[0].is_shard(): + # Compute the output sharding shapes + + # Output shape is always (N, 1), hit point is (N, 3) + input_shard_shapes = input_points._spec.sharding_shapes() + + output_shard_shapes = { + mesh_dim: tuple(torch.Size((s[0],)) for s in input_shard_shapes[mesh_dim]) + for mesh_dim in input_shard_shapes.keys() + } + + sharded_sdf_output = ShardTensor.from_local( + local_sdf, + input_points._spec.mesh, + input_points._spec.placements, + sharding_shapes=output_shard_shapes, + ).reshape(input_points.shape[:-1]) + + sharded_sdf_hit_point_output = ShardTensor.from_local( + local_sdf_hit_point, + input_points._spec.mesh, + input_points._spec.placements, + sharding_shapes=input_shard_shapes, + ).reshape(input_points.shape) + + else: + # The input points were replicated, use that for output: + sharded_sdf_output = ShardTensor.from_local( + local_sdf, + input_points._spec.mesh, + input_points._spec.placements, + ) + sharded_sdf_hit_point_output = ShardTensor.from_local( + local_sdf_hit_point, + input_points._spec.mesh, + input_points._spec.placements, + ) + + return sharded_sdf_output, sharded_sdf_hit_point_output + + +def repackage_radius_search_wrapper_args( + mesh_vertices: torch.Tensor, + mesh_indices: torch.Tensor, + input_points: torch.Tensor, + max_dist: float = 1e8, + use_sign_winding_number: bool = False, + *args, + **kwargs, +) -> tuple[ShardTensor, ShardTensor, dict]: + """Repackages sdf arguments into a standard format.""" + # Extract any additional parameters that might be in kwargs + # or use defaults if not provided + return_kwargs = { + "max_dist": max_dist, + "use_sign_winding_number": use_sign_winding_number, + } + + # Add any explicitly passed parameters + if kwargs: + return_kwargs.update(kwargs) + + return mesh_vertices, mesh_indices, input_points, return_kwargs + + +def sharded_signed_distance_field_wrapper( + func: Any, type: Any, args: tuple, kwargs: dict +) -> tuple[ShardTensor, ShardTensor]: + """ + Wrapper for sharded_signed_distance_field to support sharded tensors. + """ + + return sharded_signed_distance_field(*args, **kwargs) + + +ShardTensor.register_named_function_handler( + "physicsnemo.signed_distance_field.default", sharded_signed_distance_field_wrapper +) diff --git a/test/distributed/shard_tensor/ops/test_knn.py b/test/distributed/shard_tensor/ops/test_knn.py new file mode 100644 index 0000000000..b41b62748a --- /dev/null +++ b/test/distributed/shard_tensor/ops/test_knn.py @@ -0,0 +1,72 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch +from torch.distributed.tensor.placement_types import Replicate, Shard + +from physicsnemo.distributed import DistributedManager, scatter_tensor +from physicsnemo.utils.neighbors import knn + +from .utils import numerical_shard_tensor_check + + +class kNNModule(torch.nn.Module): + def __init__( + self, + num_neighbors=4, + ): + super().__init__() + + self.num_neighbors = num_neighbors + + def forward(self, points, queries): + return knn(points, queries, self.num_neighbors) + + +@pytest.mark.multigpu_static +@pytest.mark.parametrize("scatter_points", [True, False]) +@pytest.mark.parametrize("scatter_queries", [True, False]) +def test_knn_1dmesh( + distributed_mesh, + scatter_points: bool, + scatter_queries: bool, +): + dm = DistributedManager() + + # Generate random points for the points and queries + points = torch.randn(1043, 3).to(dm.device) + queries = torch.randn(2198, 3).to(dm.device) + + # points = torch.randn(10, 3).to(dm.device) + # queries = torch.randn(8, 3).to(dm.device) + + # Distribute the inputs: + points_placements = (Shard(0),) if scatter_points else (Replicate(),) + queries_placements = (Shard(0),) if scatter_queries else (Replicate(),) + + sharded_points = scatter_tensor(points, 0, distributed_mesh, points_placements) + sharded_queries = scatter_tensor(queries, 0, distributed_mesh, queries_placements) + + module = kNNModule() + + numerical_shard_tensor_check( + distributed_mesh, + module, + [sharded_points, sharded_queries], + {}, + check_grads=False, + ) diff --git a/test/distributed/shard_tensor/ops/test_sdf.py b/test/distributed/shard_tensor/ops/test_sdf.py new file mode 100644 index 0000000000..079127a229 --- /dev/null +++ b/test/distributed/shard_tensor/ops/test_sdf.py @@ -0,0 +1,119 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pytest +import torch +from scipy.spatial import ConvexHull +from torch.distributed.tensor.placement_types import Replicate, Shard + +from physicsnemo.distributed import DistributedManager, scatter_tensor +from physicsnemo.utils.sdf import signed_distance_field + +from .utils import numerical_shard_tensor_check + + +# This is from the domino datapipe, too: +def random_sample_on_unit_sphere(n_points): + # Random points on the sphere: + phi = np.random.uniform(0, 2 * np.pi, n_points) + cos_theta = np.random.uniform(-1, 1, n_points) + theta = np.arccos(cos_theta) + + # Convert to x/y/z and stack: + x = np.sin(theta) * np.cos(phi) + y = np.sin(theta) * np.sin(phi) + z = np.cos(theta) + points = np.stack([x, y, z], axis=1) + return points + + +def mesh_vertices_and_indices(n_points): + # We are generating a mesh on a random sphere. + stl_points = random_sample_on_unit_sphere(n_points) + + # Generate the triangles with ConvexHull: + hull = ConvexHull(stl_points) + faces = hull.simplices # (M, 3) + + return stl_points, faces + + +class SDFModule(torch.nn.Module): + """ + This is a test module to run the SDF function ... don't use it elsewhere. + """ + + def __init__(self, max_dist=1e8, use_sign_winding_number=False): + super().__init__() + + self.max_dist = max_dist + self.use_sign_winding_number = use_sign_winding_number + + def forward(self, mesh_vertices, mesh_indices, input_points): + return signed_distance_field( + mesh_vertices, + mesh_indices, + input_points, + self.max_dist, + self.use_sign_winding_number, + ) + + +@pytest.mark.multigpu_static +@pytest.mark.parametrize("scatter_mesh", [True, False]) +@pytest.mark.parametrize("scatter_inputs", [True, False]) +def test_sdf_1dmesh( + distributed_mesh, + scatter_mesh: bool, + scatter_inputs: bool, +): + dm = DistributedManager() + + # Generate a mesh on a unit sphere: + mesh_vertices, mesh_indices = mesh_vertices_and_indices(932) + + # Cast the vertices and indices to tensors: + mesh_vertices = torch.tensor(mesh_vertices).to(dm.device) + mesh_indices = torch.tensor(mesh_indices.flatten()).to(dm.device) + + # Distribute the inputs: + mesh_placements = (Shard(0),) if scatter_mesh else (Replicate(),) + input_placements = (Shard(0),) if scatter_inputs else (Replicate(),) + + sharded_mesh_vertices = scatter_tensor( + mesh_vertices, 0, distributed_mesh, mesh_placements + ) + sharded_mesh_indices = scatter_tensor( + mesh_indices, 0, distributed_mesh, mesh_placements + ) + + # Generate random points in the volume: + input_points = torch.randn(1043, 3).to(dm.device) + + sharded_input_points = scatter_tensor( + input_points, 0, distributed_mesh, input_placements + ) + + module = SDFModule() + + numerical_shard_tensor_check( + distributed_mesh, + module, + [sharded_mesh_vertices, sharded_mesh_indices, sharded_input_points], + {}, + check_grads=False, + ) diff --git a/test/distributed/shard_tensor/ops/utils.py b/test/distributed/shard_tensor/ops/utils.py index 19e3de4d73..cdece254e7 100644 --- a/test/distributed/shard_tensor/ops/utils.py +++ b/test/distributed/shard_tensor/ops/utils.py @@ -18,6 +18,7 @@ from collections.abc import Iterable import torch +import torch.distributed as dist from torch.distributed.tensor import DTensor, distribute_module from torch.distributed.tensor.device_mesh import DeviceMesh @@ -83,6 +84,18 @@ def sharded_to_local(container): def default_tensor_comparison(output, d_output, atol, rtol): # We assume a single output! + if not isinstance(output, torch.Tensor): + if isinstance(output, Iterable): + return all( + [ + default_tensor_comparison(item, d_item, atol, rtol) + for item, d_item in zip(output, d_output) + ] + ) + + if isinstance(d_output, ShardTensor): + validate_shard_tensor_spec(d_output) + local_output = sharded_to_local(d_output) # Check forward agreement: @@ -95,6 +108,37 @@ def default_loss_fn(output): return output.mean() +def validate_shard_tensor_spec(shard_tensor): + # Take a shard tensor and cross check on the dimensions. + # Take care about assertions here, since this is a collective + + # Check out shard shapes + # The local shard shape needs to match the local tensor shape: + sharding_shapes = shard_tensor._spec.sharding_shapes() + mesh = shard_tensor._spec.mesh + + for mesh_dim in range(mesh.ndim): + mesh_rank = mesh.get_local_rank(mesh_dim) + mesh_size = dist.get_world_size(mesh.get_group(mesh_dim)) + + # Is this axis sharded? + this_placement = shard_tensor._spec.placements[mesh_dim] + if this_placement.is_shard(): + # This axis is sharded. the mesh dim should be in the shapes + assert mesh_dim in sharding_shapes.keys() + + # The length of the sharding shapes should match the mesh size: + assert len(sharding_shapes[mesh_dim]) == mesh_size + + # The local shape should match the listed shape for this rank: + # this_shape = shard_tensor._spec.sharding_shapes()[mesh_dim] + # print(f"local tensor shape: {shard_tensor._local_tensor.shape}") + # print(f"sharding shapes: {sharding_shapes[mesh_dim][mesh_rank]}") + assert ( + sharding_shapes[mesh_dim][mesh_rank] == shard_tensor._local_tensor.shape + ) + + def numerical_shard_tensor_check( mesh: DeviceMesh, module: torch.nn.Module, From ee0c728becb364127036fece3075344b0e578c1d Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 19 Sep 2025 13:24:12 +0000 Subject: [PATCH 33/98] First domino refactor: consolidate all MLP implementations, move several classes to their own files for organization. --- physicsnemo/models/domino/ball_query.py | 106 ++++++ physicsnemo/models/domino/encodings.py | 106 ++++++ physicsnemo/models/domino/mlps.py | 113 ++++++ physicsnemo/models/domino/model.py | 437 +++--------------------- 4 files changed, 381 insertions(+), 381 deletions(-) create mode 100644 physicsnemo/models/domino/ball_query.py create mode 100644 physicsnemo/models/domino/encodings.py create mode 100644 physicsnemo/models/domino/mlps.py diff --git a/physicsnemo/models/domino/ball_query.py b/physicsnemo/models/domino/ball_query.py new file mode 100644 index 0000000000..681fe80733 --- /dev/null +++ b/physicsnemo/models/domino/ball_query.py @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code contains the DoMINO model architecture. +The DoMINO class contains an architecture to model both surface and +volume quantities together as well as separately (controlled using +the config.yaml file) +""" + +import torch +import torch.nn as nn +from einops import rearrange + +from physicsnemo.utils.neighbors import radius_search + + +class BQWarp(nn.Module): + """ + Warp-based ball-query layer for finding neighboring points within a specified radius. + + This layer uses an accelerated ball query implementation to efficiently find points + within a specified radius of query points. + """ + + def __init__( + self, + radius: float = 0.25, + neighbors_in_radius: int = 10, + ): + """ + Initialize the BQWarp layer. + + Args: + radius: Radius for ball query operation + neighbors_in_radius: Maximum number of neighbors to return within radius + """ + super().__init__() + + self.radius = radius + self.neighbors_in_radius = neighbors_in_radius + + def forward( + self, x: torch.Tensor, p_grid: torch.Tensor, reverse_mapping: bool = True + ) -> tuple[torch.Tensor, torch.Tensor]: + """ + Performs ball query operation to find neighboring points and their features. + + This method uses the Warp-accelerated ball query implementation to find points + within a specified radius. It can operate in two modes: + - Forward mapping: Find points from x that are near p_grid points (reverse_mapping=False) + - Reverse mapping: Find points from p_grid that are near x points (reverse_mapping=True) + + Args: + x: Tensor of shape (batch_size, num_points, 3+features) containing point coordinates + and their features + p_grid: Tensor of shape (batch_size, grid_x, grid_y, grid_z, 3) containing grid point + coordinates + reverse_mapping: Boolean flag to control the direction of the mapping: + - True: Find p_grid points near x points + - False: Find x points near p_grid points + + Returns: + tuple containing: + - mapping: Tensor containing indices of neighboring points + - outputs: Tensor containing coordinates of the neighboring points + """ + + if p_grid.ndim != 3: + p_grid = rearrange(p_grid, "b nx ny nz c -> b (nx ny nz) c") + + if reverse_mapping: + mapping, outputs = radius_search( + x[0], + p_grid[0], + self.radius, + self.neighbors_in_radius, + return_points=True, + ) + mapping = mapping.unsqueeze(0) + outputs = outputs.unsqueeze(0) + else: + mapping, outputs = radius_search( + p_grid[0], + x[0], + self.radius, + self.neighbors_in_radius, + return_points=True, + ) + mapping = mapping.unsqueeze(0) + outputs = outputs.unsqueeze(0) + + return mapping, outputs diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py new file mode 100644 index 0000000000..e4236cc8e6 --- /dev/null +++ b/physicsnemo/models/domino/encodings.py @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code contains the DoMINO model architecture. +The DoMINO class contains an architecture to model both surface and +volume quantities together as well as separately (controlled using +the config.yaml file) +""" + +import math + +import torch +import torch.nn as nn + +from .mlps import MLP + + +def fourier_encode_vectorized( + coords: torch.Tensor, freqs: torch.Tensor +) -> torch.Tensor: + """Vectorized Fourier feature encoding + + Args: + coords: Tensor containing coordinates, of shape (batch_size, D) + freqs: Tensor containing frequencies, of shape (F,) (num frequencies) + + Returns: + Tensor containing Fourier features, of shape (batch_size, D * 2 * F) + """ + + D = coords.shape[-1] + F = freqs.shape[0] + + freqs = freqs[None, None, :, None] # reshape to [*, F, 1] for broadcasting + + coords = coords.unsqueeze(-2) # [*, 1, D] + scaled = (coords * freqs).reshape(*coords.shape[:-2], D * F) # [*, D, F] + features = torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1) # [*, D, 2F] + + return features.reshape(*coords.shape[:-2], D * 2 * F) # [*, D * 2F] + + +class EncodingMLP(nn.Module): + """ + This is an MLP that will, optionally, fourier encode the input features. + + The encoded features are concatenated to the original inputs, and then + processed with an MLP. + + Args: + input_features: The number of input features to the MLP. + base_layer: The number of neurons in the hidden layer of the MLP. + fourier_features: Whether to fourier encode the input features. + num_modes: The number of modes to use for the fourier encoding. + activation: The activation function to use in the MLP. + + """ + + def __init__( + self, + input_features: int, + base_layer: int, + fourier_features: bool, + num_modes: int, + activation: nn.Module, + ): + super().__init__() + self.fourier_features = fourier_features + + # self.num_modes = model_parameters.num_modes + + if self.fourier_features: + input_features_calculated = input_features + input_features * num_modes * 2 + self.register_buffer( + "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes)) + ) + else: + input_features_calculated = input_features + + self.mlp = MLP( + input_features=input_features_calculated, + base_layer=base_layer, + output_features=base_layer, + activation=activation, + n_layers=3, + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.fourier_features: + x = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1) + + return self.mlp(x) diff --git a/physicsnemo/models/domino/mlps.py b/physicsnemo/models/domino/mlps.py new file mode 100644 index 0000000000..e74583dea0 --- /dev/null +++ b/physicsnemo/models/domino/mlps.py @@ -0,0 +1,113 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code contains the DoMINO model architecture. +The DoMINO class contains an architecture to model both surface and +volume quantities together as well as separately (controlled using +the config.yaml file) +""" + +import torch +import torch.nn as nn + + +class MLP(nn.Module): + """ + FlexibleMulti-layer perceptron (MLP) module. + + This is reused in various domino layers to simplify and unify + the MLP implementations. + """ + + def __init__( + self, + input_features: int, + output_features: int, + base_layer: int, + activation: nn.Module, + n_layers: int, + ): + super(MLP, self).__init__() + self.input_features = input_features + + modules = [] + + if n_layers == 1: + # Single layer: input_features -> output_features + modules.append(nn.Linear(input_features, output_features)) + else: + # First layer: input_features -> base_layer + modules.append(nn.Linear(input_features, base_layer)) + modules.append(activation) + + # Hidden layers: base_layer -> base_layer + for _ in range(n_layers - 2): + modules.append(nn.Linear(base_layer, base_layer)) + modules.append(activation) + + # Final layer: base_layer -> output_features (no activation) + modules.append(nn.Linear(base_layer, output_features)) + + self.mlp_modules = torch.nn.Sequential(*modules) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.mlp_modules(x) + + +class AggregationModel(MLP): + """ + Neural network module to aggregate local geometry encoding with basis functions. + + This module combines basis function representations with geometry encodings + to predict the final output quantities. It serves as the final prediction layer + that integrates all available information sources. + + """ + + def __init__( + self, + input_features: int, + output_features: int, + base_layer: int, + activation: nn.Module, + ): + super().__init__( + input_features=input_features, + output_features=output_features, + base_layer=base_layer, + activation=activation, + n_layers=5, + ) + + +class LocalPointConv(MLP): + """Layer for local geometry point kernel""" + + def __init__( + self, + input_features: int, + base_layer: int, + output_features: int, + activation: nn.Module, + ): + super().__init__( + input_features=input_features, + base_layer=base_layer, + output_features=output_features, + activation=activation, + n_layers=2, + ) diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index c95f971e97..0b658ffbd0 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -31,57 +31,28 @@ from einops import rearrange from physicsnemo.models.unet import UNet -from physicsnemo.utils.neighbors import radius_search from physicsnemo.utils.profiling import profile +from .ball_query import BQWarp +from .encodings import ( + EncodingMLP, + fourier_encode_vectorized, +) +from .mlps import AggregationModel, LocalPointConv + def get_activation(activation: Literal["relu", "gelu"]) -> Callable: """ Return a PyTorch activation function corresponding to the given name. """ if activation == "relu": - return F.relu + return nn.ReLU() elif activation == "gelu": - return F.gelu + return nn.GELU() else: raise ValueError(f"Activation function {activation} not found") -def fourier_encode(coords, num_freqs): - """Function to caluculate fourier features""" - # Create a range of frequencies - freqs = torch.exp(torch.linspace(0, math.pi, num_freqs, device=coords.device)) - # Generate sine and cosine features - features = [torch.sin(coords * f) for f in freqs] + [ - torch.cos(coords * f) for f in freqs - ] - ret = torch.cat(features, dim=-1) - return ret - - -def fourier_encode_vectorized(coords, freqs): - """Vectorized Fourier feature encoding""" - D = coords.shape[-1] - F = freqs.shape[0] - - freqs = freqs[None, None, :, None] # reshape to [*, F, 1] for broadcasting - - coords = coords.unsqueeze(-2) # [*, 1, D] - scaled = (coords * freqs).reshape(*coords.shape[:-2], D * F) # [*, D, F] - features = torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1) # [*, D, 2F] - - return features.reshape(*coords.shape[:-2], D * 2 * F) # [*, D * 2F] - - -def calculate_pos_encoding(nx, d=8): - """Function to caluculate positional encoding""" - vec = [] - for k in range(int(d / 2)): - vec.append(torch.sin(nx / 10000 ** (2 * (k) / d))) - vec.append(torch.cos(nx / 10000 ** (2 * (k) / d))) - return vec - - def scale_sdf(sdf: torch.Tensor) -> torch.Tensor: """ Scale a signed distance function (SDF) to emphasize surface regions. @@ -99,90 +70,6 @@ def scale_sdf(sdf: torch.Tensor) -> torch.Tensor: return sdf / (0.4 + torch.abs(sdf)) -class BQWarp(nn.Module): - """ - Warp-based ball-query layer for finding neighboring points within a specified radius. - - This layer uses an accelerated ball query implementation to efficiently find points - within a specified radius of query points. - """ - - def __init__( - self, - grid_resolution=None, - radius: float = 0.25, - neighbors_in_radius: int = 10, - ): - """ - Initialize the BQWarp layer. - - Args: - grid_resolution: Resolution of the grid in each dimension [nx, ny, nz] - radius: Radius for ball query operation - neighbors_in_radius: Maximum number of neighbors to return within radius - """ - super().__init__() - if grid_resolution is None: - grid_resolution = [256, 96, 64] - - self.radius = radius - self.neighbors_in_radius = neighbors_in_radius - self.grid_resolution = grid_resolution - - def forward( - self, x: torch.Tensor, p_grid: torch.Tensor, reverse_mapping: bool = True - ) -> tuple[torch.Tensor, torch.Tensor]: - """ - Performs ball query operation to find neighboring points and their features. - - This method uses the Warp-accelerated ball query implementation to find points - within a specified radius. It can operate in two modes: - - Forward mapping: Find points from x that are near p_grid points (reverse_mapping=False) - - Reverse mapping: Find points from p_grid that are near x points (reverse_mapping=True) - - Args: - x: Tensor of shape (batch_size, num_points, 3+features) containing point coordinates - and their features - p_grid: Tensor of shape (batch_size, grid_x, grid_y, grid_z, 3) containing grid point - coordinates - reverse_mapping: Boolean flag to control the direction of the mapping: - - True: Find p_grid points near x points - - False: Find x points near p_grid points - - Returns: - tuple containing: - - mapping: Tensor containing indices of neighboring points - - outputs: Tensor containing coordinates of the neighboring points - """ - batch_size = x.shape[0] - nx, ny, nz = self.grid_resolution - - p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3)) - - if reverse_mapping: - mapping, outputs = radius_search( - x[0], - p_grid[0], - self.radius, - self.neighbors_in_radius, - return_points=True, - ) - mapping = mapping.unsqueeze(0) - outputs = outputs.unsqueeze(0) - else: - mapping, outputs = radius_search( - p_grid[0], - x[0], - self.radius, - self.neighbors_in_radius, - return_points=True, - ) - mapping = mapping.unsqueeze(0) - outputs = outputs.unsqueeze(0) - - return mapping, outputs - - class GeoConvOut(nn.Module): """ Geometry layer to project STL geometry data onto regular grids. @@ -419,7 +306,6 @@ def __init__( for j in range(len(radii)): self.bq_warp.append( BQWarp( - grid_resolution=model_parameters.interp_res, radius=radii[j], neighbors_in_radius=neighbors_in_radius[j], ) @@ -632,247 +518,6 @@ def forward( return encoding_g -class NNBasisFunctions(nn.Module): - """Basis function layer for point clouds""" - - def __init__(self, input_features: int, model_parameters=None): - super(NNBasisFunctions, self).__init__() - base_layer = model_parameters.base_layer - self.fourier_features = model_parameters.fourier_features - self.num_modes = model_parameters.num_modes - - if self.fourier_features: - input_features_calculated = ( - input_features + input_features * self.num_modes * 2 - ) - else: - input_features_calculated = input_features - - self.fc1 = nn.Linear(input_features_calculated, base_layer) - self.fc2 = nn.Linear(base_layer, int(base_layer)) - self.fc3 = nn.Linear(int(base_layer), int(base_layer)) - - self.activation = get_activation(model_parameters.activation) - - if self.fourier_features: - self.register_buffer( - "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes)) - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """ - Transform point features into a basis function representation. - - Args: - x: Input tensor containing point features - - Returns: - Tensor containing basis function coefficients - """ - if self.fourier_features: - facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1) - else: - facets = x - facets = self.activation(self.fc1(facets)) - facets = self.activation(self.fc2(facets)) - facets = self.fc3(facets) - - return facets - - -class ParameterModel(nn.Module): - """ - Neural network module to encode simulation parameters. - - This module encodes physical global parameters into a learned - latent representation that can be incorporated into the - model'sprediction process. - """ - - def __init__(self, input_features: int, model_parameters=None): - """ - Initialize the parameter encoding network. - - Args: - input_features: Number of input parameters to encode - model_parameters: Configuration parameters for the model - """ - super(ParameterModel, self).__init__() - self.fourier_features = model_parameters.fourier_features - self.num_modes = model_parameters.num_modes - - if self.fourier_features: - input_features_calculated = ( - input_features + input_features * self.num_modes * 2 - ) - self.register_buffer( - "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes)) - ) - else: - input_features_calculated = input_features - - base_layer = model_parameters.base_layer - self.fc1 = nn.Linear(input_features_calculated, base_layer) - self.fc2 = nn.Linear(base_layer, int(base_layer)) - self.fc3 = nn.Linear(int(base_layer), int(base_layer)) - - self.activation = get_activation(model_parameters.activation) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """ - Encode physical parameters into a latent representation. - - Args: - x: Input tensor containing physical parameters (e.g., inlet velocity, air density) - - Returns: - Tensor containing encoded parameter representation - """ - if self.fourier_features: - params = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1) - else: - params = x - params = self.activation(self.fc1(params)) - params = self.activation(self.fc2(params)) - params = self.fc3(params) - - return params - - -class AggregationModel(nn.Module): - """ - Neural network module to aggregate local geometry encoding with basis functions. - - This module combines basis function representations with geometry encodings - to predict the final output quantities. It serves as the final prediction layer - that integrates all available information sources. - """ - - def __init__( - self, - input_features: int, - output_features: int, - model_parameters=None, - new_change: bool = True, - ): - """ - Initialize the aggregation model. - - Args: - input_features: Number of input feature dimensions - output_features: Number of output feature dimensions - model_parameters: Configuration parameters for the model - new_change: Flag to enable newer implementation (default: True) - """ - super(AggregationModel, self).__init__() - self.input_features = input_features - self.output_features = output_features - self.new_change = new_change - base_layer = model_parameters.base_layer - self.fc1 = nn.Linear(self.input_features, base_layer) - self.fc2 = nn.Linear(base_layer, int(base_layer)) - self.fc3 = nn.Linear(int(base_layer), int(base_layer)) - self.fc4 = nn.Linear(int(base_layer), int(base_layer)) - self.fc5 = nn.Linear(int(base_layer), self.output_features) - - self.activation = get_activation(model_parameters.activation) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """ - Process the combined input features to predict output quantities. - - This method applies a series of fully connected layers to the input, - which typically contains a combination of basis functions, geometry - encodings, and potentially parameter encodings. - - Args: - x: Input tensor containing combined features - - Returns: - Tensor containing predicted output quantities - """ - out = self.activation(self.fc1(x)) - out = self.activation(self.fc2(out)) - out = self.activation(self.fc3(out)) - out = self.activation(self.fc4(out)) - - out = self.fc5(out) - - return out - - -class LocalPointConv(nn.Module): - """Layer for local geometry point kernel""" - - def __init__( - self, - input_features, - base_layer, - output_features, - model_parameters=None, - ): - super(LocalPointConv, self).__init__() - self.input_features = input_features - self.output_features = output_features - self.fc1 = nn.Linear(self.input_features, base_layer) - self.fc2 = nn.Linear(base_layer, self.output_features) - self.activation = get_activation(model_parameters.activation) - - def forward(self, x): - out = self.activation(self.fc1(x)) - out = self.fc2(out) - - return out - - -class PositionEncoder(nn.Module): - """Positional encoding of point clouds""" - - def __init__(self, input_features: int, model_parameters=None): - super().__init__() - base_layer = model_parameters.base_neurons - self.fourier_features = model_parameters.fourier_features - self.num_modes = model_parameters.num_modes - - if self.fourier_features: - input_features_calculated = ( - input_features + input_features * self.num_modes * 2 - ) - else: - input_features_calculated = input_features - - self.fc1 = nn.Linear(input_features_calculated, base_layer) - self.fc2 = nn.Linear(base_layer, int(base_layer)) - self.fc3 = nn.Linear(int(base_layer), int(base_layer)) - - self.activation = get_activation(model_parameters.activation) - - if self.fourier_features: - self.register_buffer( - "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes)) - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """ - Transform point features into a basis function representation. - - Args: - x: Input tensor containing point features - - Returns: - Tensor containing position encoder - """ - if self.fourier_features: - facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1) - else: - facets = x - facets = self.activation(self.fc1(facets)) - facets = self.activation(self.fc2(facets)) - facets = self.fc3(facets) - - return facets - - # @dataclass # class MetaData(ModelMetaData): # name: str = "DoMINO" @@ -1134,9 +779,12 @@ def __init__( if self.encode_parameters: # Defining the parameter model base_layer_p = model_parameters.parameter_model.base_layer - self.parameter_model = ParameterModel( + self.parameter_model = EncodingMLP( input_features=self.global_features, - model_parameters=model_parameters.parameter_model, + fourier_features=model_parameters.parameter_model.fourier_features, + num_modes=model_parameters.parameter_model.num_modes, + base_layer=model_parameters.parameter_model.base_layer, + activation=get_activation(model_parameters.parameter_model.activation), ) else: base_layer_p = 0 @@ -1172,9 +820,15 @@ def __init__( self.num_variables_surf ): # Have the same basis function for each variable self.nn_basis_surf.append( - NNBasisFunctions( + EncodingMLP( input_features=input_features_surface, - model_parameters=model_parameters.nn_basis_functions, + base_layer=model_parameters.nn_basis_functions.base_layer, + fourier_features=model_parameters.nn_basis_functions.fourier_features, + num_modes=model_parameters.nn_basis_functions.num_modes, + activation=get_activation( + model_parameters.nn_basis_functions.activation + ), + # model_parameters=model_parameters.nn_basis_functions, ) ) @@ -1184,9 +838,15 @@ def __init__( self.num_variables_vol ): # Have the same basis function for each variable self.nn_basis_vol.append( - NNBasisFunctions( + EncodingMLP( input_features=input_features, - model_parameters=model_parameters.nn_basis_functions, + base_layer=model_parameters.nn_basis_functions.base_layer, + fourier_features=model_parameters.nn_basis_functions.fourier_features, + num_modes=model_parameters.nn_basis_functions.num_modes, + activation=get_activation( + model_parameters.nn_basis_functions.activation + ), + # model_parameters=model_parameters.nn_basis_functions, ) ) @@ -1200,8 +860,12 @@ def __init__( else: inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3 - self.fc_p_vol = PositionEncoder( - inp_pos_vol, model_parameters.position_encoder + self.fc_p_vol = EncodingMLP( + input_features=inp_pos_vol, + fourier_features=model_parameters.position_encoder.fourier_features, + num_modes=model_parameters.position_encoder.num_modes, + base_layer=model_parameters.position_encoder.base_neurons, + activation=get_activation(model_parameters.position_encoder.activation), ) if self.output_features_surf is not None: @@ -1210,10 +874,13 @@ def __init__( else: inp_pos_surf = 3 - self.fc_p_surf = PositionEncoder( - inp_pos_surf, model_parameters.position_encoder + self.fc_p_surf = EncodingMLP( + input_features=inp_pos_surf, + fourier_features=model_parameters.position_encoder.fourier_features, + num_modes=model_parameters.position_encoder.num_modes, + base_layer=model_parameters.position_encoder.base_neurons, + activation=get_activation(model_parameters.position_encoder.activation), ) - # BQ for surface self.surface_neighbors_in_radius = ( model_parameters.geometry_local.surface_neighbors_in_radius @@ -1236,7 +903,6 @@ def __init__( self.surface_bq_warp.append( BQWarp( - grid_resolution=model_parameters.interp_res, radius=self.surface_radius[ct], neighbors_in_radius=self.surface_neighbors_in_radius[ct], ) @@ -1246,7 +912,9 @@ def __init__( input_features=total_neighbors_in_radius, base_layer=512, output_features=self.surface_neighbors_in_radius[ct], - model_parameters=model_parameters.local_point_conv, + activation=get_activation( + model_parameters.local_point_conv.activation + ), ) ) @@ -1272,7 +940,6 @@ def __init__( self.volume_bq_warp.append( BQWarp( - grid_resolution=model_parameters.interp_res, radius=self.volume_radius[ct], neighbors_in_radius=self.volume_neighbors_in_radius[ct], ) @@ -1282,7 +949,9 @@ def __init__( input_features=total_neighbors_in_radius, base_layer=512, output_features=self.volume_neighbors_in_radius[ct], - model_parameters=model_parameters.local_point_conv, + activation=get_activation( + model_parameters.local_point_conv.activation + ), ) ) @@ -1316,7 +985,10 @@ def __init__( + base_layer_geo_surf + base_layer_p, output_features=1, - model_parameters=model_parameters.aggregation_model, + base_layer=model_parameters.aggregation_model.base_layer, + activation=get_activation( + model_parameters.aggregation_model.activation + ), ) ) @@ -1335,7 +1007,10 @@ def __init__( + base_layer_geo_vol + base_layer_p, output_features=1, - model_parameters=model_parameters.aggregation_model, + base_layer=model_parameters.aggregation_model.base_layer, + activation=get_activation( + model_parameters.aggregation_model.activation + ), ) ) From 611dce4a8f8afb20557c95caa98a20ba37124f66 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 19 Sep 2025 14:09:30 +0000 Subject: [PATCH 34/98] Refactor the encodings stage of domino to standalone nn.Modules --- physicsnemo/models/domino/encodings.py | 145 +++++++++++++++++- physicsnemo/models/domino/model.py | 194 ++++--------------------- 2 files changed, 173 insertions(+), 166 deletions(-) diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py index e4236cc8e6..ca2ef34f0f 100644 --- a/physicsnemo/models/domino/encodings.py +++ b/physicsnemo/models/domino/encodings.py @@ -25,8 +25,10 @@ import torch import torch.nn as nn +from einops import rearrange -from .mlps import MLP +from .ball_query import BQWarp +from .mlps import MLP, LocalPointConv def fourier_encode_vectorized( @@ -104,3 +106,144 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: x = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1) return self.mlp(x) + + +class LocalGeometryEncoding(nn.Module): + """ + A local geometry encoding module. + + This will apply a ball query to the input features, mapping the point cloud + to the volume mesh, and then apply a local point convolution to the output. + + Args: + radius: The radius of the ball query. + neighbors_in_radius: The number of neighbors in the radius of the ball query. + total_neighbors_in_radius: The total number of neighbors in the radius of the ball query. + base_layer: The number of neurons in the hidden layer of the MLP. + activation: The activation function to use in the MLP. + grid_resolution: The resolution of the grid. + """ + + def __init__( + self, + radius: float, + neighbors_in_radius: int, + total_neighbors_in_radius: int, + base_layer: int, + activation: nn.Module, + grid_resolution: tuple[int, int, int], + ): + super().__init__() + self.bq_warp = BQWarp( + radius=radius, + neighbors_in_radius=neighbors_in_radius, + ) + + self.local_point_conv = LocalPointConv( + input_features=total_neighbors_in_radius, + base_layer=base_layer, + output_features=neighbors_in_radius, + activation=activation, + ) + self.grid_resolution = grid_resolution + + def forward( + self, + encoding_g: torch.Tensor, + volume_mesh_centers: torch.Tensor, + p_grid: torch.Tensor, + ) -> torch.Tensor: + batch_size = volume_mesh_centers.shape[0] + nx, ny, nz = self.grid_resolution + + p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3)) + mapping, outputs = self.bq_warp( + volume_mesh_centers, p_grid, reverse_mapping=False + ) + mapping = mapping.type(torch.int64) + mask = mapping != 0 + + encoding_g_inner = [] + for j in range(encoding_g.shape[1]): + geo_encoding = rearrange(encoding_g[:, j], "b nx ny nz -> b 1 (nx ny nz)") + + geo_encoding_sampled = torch.index_select( + geo_encoding, 2, mapping.flatten() + ) + geo_encoding_sampled = torch.reshape(geo_encoding_sampled, mask.shape) + geo_encoding_sampled = geo_encoding_sampled * mask + + encoding_g_inner.append(geo_encoding_sampled) + encoding_g_inner = torch.cat(encoding_g_inner, dim=2) + encoding_g_inner = self.local_point_conv(encoding_g_inner) + + return encoding_g_inner + + +class MultiGeometryEncoding(nn.Module): + """ + Module to apply multiple local geometry encodings + + This will stack several local geometry encodings together, and concatenate the results. + + Args: + radii: The list of radii of the local geometry encodings. + neighbors_in_radius: The list of number of neighbors in the radius of the local geometry encodings. + geo_encoding_type: The type of geometry encoding to use. Can be "both", "stl", or "sdf". + base_layer: The number of neurons in the hidden layer of the MLP. + activation: The activation function to use in the MLP. + grid_resolution: The resolution of the grid. + """ + + def __init__( + self, + radii: list[float], + neighbors_in_radius: list[int], + geo_encoding_type: str, + base_layer: int, + activation: nn.Module, + grid_resolution: tuple[int, int, int], + ): + super().__init__() + + self.local_geo_encodings = nn.ModuleList( + [ + LocalGeometryEncoding( + radius=r, + neighbors_in_radius=n, + total_neighbors_in_radius=self.calculate_total_neighbors_in_radius( + geo_encoding_type, n, radii + ), + base_layer=base_layer, + activation=activation, + grid_resolution=grid_resolution, + ) + for r, n in zip(radii, neighbors_in_radius) + ] + ) + + def calculate_total_neighbors_in_radius( + self, geo_encoding_type: str, neighbors_in_radius: int, radii: list[float] + ) -> list[int]: + if geo_encoding_type == "both": + total_neighbors_in_radius = neighbors_in_radius * (len(radii) + 1) + elif geo_encoding_type == "stl": + total_neighbors_in_radius = neighbors_in_radius * (len(radii)) + elif geo_encoding_type == "sdf": + total_neighbors_in_radius = neighbors_in_radius + + return total_neighbors_in_radius + + def forward( + self, + encoding_g: torch.Tensor, + volume_mesh_centers: torch.Tensor, + p_grid: torch.Tensor, + ) -> torch.Tensor: + return torch.cat( + [ + local_geo_encoding(encoding_g, volume_mesh_centers, p_grid) + for local_geo_encoding in self.local_geo_encodings + ], + dim=-1, + ) diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 0b658ffbd0..f4da77948e 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -36,9 +36,10 @@ from .ball_query import BQWarp from .encodings import ( EncodingMLP, + MultiGeometryEncoding, fourier_encode_vectorized, ) -from .mlps import AggregationModel, LocalPointConv +from .mlps import AggregationModel def get_activation(activation: Literal["relu", "gelu"]) -> Callable: @@ -881,79 +882,26 @@ def __init__( base_layer=model_parameters.position_encoder.base_neurons, activation=get_activation(model_parameters.position_encoder.activation), ) - # BQ for surface - self.surface_neighbors_in_radius = ( - model_parameters.geometry_local.surface_neighbors_in_radius - ) - self.surface_radius = model_parameters.geometry_local.surface_radii - self.surface_bq_warp = nn.ModuleList() - self.surface_local_point_conv = nn.ModuleList() - - for ct in range(len(self.surface_radius)): - if self.geo_encoding_type == "both": - total_neighbors_in_radius = self.surface_neighbors_in_radius[ct] * ( - len(model_parameters.geometry_rep.geo_conv.surface_radii) + 1 - ) - elif self.geo_encoding_type == "stl": - total_neighbors_in_radius = self.surface_neighbors_in_radius[ct] * ( - len(model_parameters.geometry_rep.geo_conv.surface_radii) - ) - elif self.geo_encoding_type == "sdf": - total_neighbors_in_radius = self.surface_neighbors_in_radius[ct] - - self.surface_bq_warp.append( - BQWarp( - radius=self.surface_radius[ct], - neighbors_in_radius=self.surface_neighbors_in_radius[ct], - ) - ) - self.surface_local_point_conv.append( - LocalPointConv( - input_features=total_neighbors_in_radius, - base_layer=512, - output_features=self.surface_neighbors_in_radius[ct], - activation=get_activation( - model_parameters.local_point_conv.activation - ), - ) - ) - # BQ for volume - self.volume_neighbors_in_radius = ( - model_parameters.geometry_local.volume_neighbors_in_radius + # Create a set of local geometry encodings for the surface data: + self.surface_local_geo_encodings = MultiGeometryEncoding( + radii=model_parameters.geometry_local.surface_radii, + neighbors_in_radius=model_parameters.geometry_local.surface_neighbors_in_radius, + geo_encoding_type=self.geo_encoding_type, + base_layer=512, + activation=get_activation(model_parameters.local_point_conv.activation), + grid_resolution=self.grid_resolution, ) - self.volume_radius = model_parameters.geometry_local.volume_radii - self.volume_bq_warp = nn.ModuleList() - self.volume_local_point_conv = nn.ModuleList() - - for ct in range(len(self.volume_radius)): - if self.geo_encoding_type == "both": - total_neighbors_in_radius = self.volume_neighbors_in_radius[ct] * ( - len(model_parameters.geometry_rep.geo_conv.volume_radii) + 1 - ) - elif self.geo_encoding_type == "stl": - total_neighbors_in_radius = self.volume_neighbors_in_radius[ct] * ( - len(model_parameters.geometry_rep.geo_conv.volume_radii) - ) - elif self.geo_encoding_type == "sdf": - total_neighbors_in_radius = self.volume_neighbors_in_radius[ct] - self.volume_bq_warp.append( - BQWarp( - radius=self.volume_radius[ct], - neighbors_in_radius=self.volume_neighbors_in_radius[ct], - ) - ) - self.volume_local_point_conv.append( - LocalPointConv( - input_features=total_neighbors_in_radius, - base_layer=512, - output_features=self.volume_neighbors_in_radius[ct], - activation=get_activation( - model_parameters.local_point_conv.activation - ), - ) - ) + # Create a set of local geometry encodings for the surface data: + self.volume_local_geo_encodings = MultiGeometryEncoding( + radii=model_parameters.geometry_local.volume_radii, + neighbors_in_radius=model_parameters.geometry_local.volume_neighbors_in_radius, + geo_encoding_type=self.geo_encoding_type, + base_layer=512, + activation=get_activation(model_parameters.local_point_conv.activation), + grid_resolution=self.grid_resolution, + ) # Transmitting surface to volume self.surf_to_vol_conv1 = nn.Conv3d( @@ -973,7 +921,7 @@ def __init__( if self.output_features_surf is not None: # Surface base_layer_geo_surf = 0 - for j in self.surface_neighbors_in_radius: + for j in model_parameters.geometry_local.surface_neighbors_in_radius: base_layer_geo_surf += j self.agg_model_surf = nn.ModuleList() @@ -995,7 +943,7 @@ def __init__( if self.output_features_vol is not None: # Volume base_layer_geo_vol = 0 - for j in self.volume_neighbors_in_radius: + for j in model_parameters.geometry_local.volume_neighbors_in_radius: base_layer_geo_vol += j self.agg_model_vol = nn.ModuleList() @@ -1014,83 +962,6 @@ def __init__( ) ) - def position_encoder( - self, - encoding_node: torch.Tensor, - eval_mode: Literal["surface", "volume"] = "volume", - ) -> torch.Tensor: - """ - Compute positional encoding for input points. - - Args: - encoding_node: Tensor containing node position information - eval_mode: Mode of evaluation, either "volume" or "surface" - - Returns: - Tensor containing positional encoding features - """ - if eval_mode == "volume": - x = self.fc_p_vol(encoding_node) - elif eval_mode == "surface": - x = self.fc_p_surf(encoding_node) - else: - raise ValueError( - f"`eval_mode` must be 'surface' or 'volume', got {eval_mode=}" - ) - return x - - def geo_encoding_local( - self, encoding_g, volume_mesh_centers, p_grid, mode="volume" - ): - """Function to calculate local geometry encoding from global encoding""" - - if mode == "volume": - radius = self.volume_radius - bq_warp = self.volume_bq_warp - point_conv = self.volume_local_point_conv - elif mode == "surface": - radius = self.surface_radius - bq_warp = self.surface_bq_warp - point_conv = self.surface_local_point_conv - - batch_size = volume_mesh_centers.shape[0] - nx, ny, nz = ( - self.grid_resolution[0], - self.grid_resolution[1], - self.grid_resolution[2], - ) - - encoding_outer = [] - for p in range(len(radius)): - p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3)) - mapping, outputs = bq_warp[p]( - volume_mesh_centers, p_grid, reverse_mapping=False - ) - mapping = mapping.type(torch.int64) - mask = mapping != 0 - - encoding_g_inner = [] - for j in range(encoding_g.shape[1]): - geo_encoding = rearrange( - encoding_g[:, j], "b nx ny nz -> b 1 (nx ny nz)" - ) - - geo_encoding_sampled = torch.index_select( - geo_encoding, 2, mapping.flatten() - ) - geo_encoding_sampled = torch.reshape(geo_encoding_sampled, mask.shape) - geo_encoding_sampled = geo_encoding_sampled * mask - - encoding_g_inner.append(geo_encoding_sampled) - encoding_g_inner = torch.cat(encoding_g_inner, dim=2) - encoding_g_inner = point_conv[p](encoding_g_inner) - - encoding_outer.append(encoding_g_inner) - - encoding_g = torch.cat(encoding_outer, dim=-1) - - return encoding_g - def calculate_solution_with_neighbors( self, surface_mesh_centers, @@ -1300,11 +1171,6 @@ def sample_sphere_shell(self, center, r_inner, r_outer, num_points): Tensor of shape (batch_size, num_points, num_samples, 3) containing the sampled points within the spherical shell around each center """ - # directions = torch.randn( - # size=(center.shape[0], center.shape[1], num_points, center.shape[2]), - # device=center.device, - # ) - # directions = directions / torch.norm(directions, dim=-1, keepdim=True) unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1) @@ -1587,9 +1453,7 @@ def forward(self, data_dict, return_volume_neighbors=False): encoding_node_vol = pos_volume_center_of_mass # Calculate positional encoding on volume nodes - encoding_node_vol = self.position_encoder( - encoding_node_vol, eval_mode="volume" - ) + encoding_node_vol = self.fc_p_vol(encoding_node_vol) if self.output_features_surf is not None: # Represent geometry on bounding box @@ -1605,9 +1469,7 @@ def forward(self, data_dict, return_volume_neighbors=False): encoding_node_surf = pos_surface_center_of_mass # Calculate positional encoding on surface centers - encoding_node_surf = self.position_encoder( - encoding_node_surf, eval_mode="surface" - ) + encoding_node_surf = self.fc_p_surf(encoding_node_surf) if ( self.output_features_surf is not None @@ -1622,8 +1484,10 @@ def forward(self, data_dict, return_volume_neighbors=False): # Calculate local geometry encoding for volume # Sampled points on volume volume_mesh_centers = data_dict["volume_mesh_centers"] - encoding_g_vol = self.geo_encoding_local( - 0.5 * encoding_g_vol, volume_mesh_centers, p_grid, mode="volume" + encoding_g_vol = self.volume_local_geo_encodings( + 0.5 * encoding_g_vol, + volume_mesh_centers, + p_grid, ) # Approximate solution on volume node @@ -1654,8 +1518,8 @@ def forward(self, data_dict, return_volume_neighbors=False): surface_areas = torch.unsqueeze(surface_areas, -1) surface_neighbors_areas = torch.unsqueeze(surface_neighbors_areas, -1) # Calculate local geometry encoding for surface - encoding_g_surf = self.geo_encoding_local( - 0.5 * encoding_g_surf, surface_mesh_centers, s_grid, mode="surface" + encoding_g_surf = self.surface_local_geo_encodings( + 0.5 * encoding_g_surf, surface_mesh_centers, s_grid ) # Approximate solution on surface cell center From 4038ff3190d05dc0ea9d66d91168a09440b1478e Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 19 Sep 2025 15:55:02 +0000 Subject: [PATCH 35/98] Further refactor DoMINO to put solution calculations in separate modules. Not only does this clean up the main model code, but this will enable graph capture of the solution functions which is important on hopper. --- physicsnemo/models/domino/model.py | 521 +++---------------------- physicsnemo/models/domino/solutions.py | 405 +++++++++++++++++++ 2 files changed, 455 insertions(+), 471 deletions(-) create mode 100644 physicsnemo/models/domino/solutions.py diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index f4da77948e..31547c743b 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -22,7 +22,6 @@ """ import math -from collections import defaultdict from typing import Callable, Literal, Sequence import torch @@ -40,6 +39,7 @@ fourier_encode_vectorized, ) from .mlps import AggregationModel +from .solutions import SolutionCalculatorSurface, SolutionCalculatorVolume def get_activation(activation: Literal["relu", "gelu"]) -> Callable: @@ -757,18 +757,6 @@ def __init__( else: self.num_volume_neighbors = 50 - if hasattr(model_parameters, "return_volume_neighbors"): - self.return_volume_neighbors = model_parameters.return_volume_neighbors - if ( - self.return_volume_neighbors - and self.solution_calculation_mode == "one-loop" - ): - print( - "'one-loop' solution_calculation mode not supported when return_volume_neighbors is set to true" - ) - print("Overwriting the solution_calculation mode to 'two-loop'") - self.solution_calculation_mode = "two-loop" - if self.use_surface_normals: if not self.use_surface_area: input_features_surface = input_features + 3 @@ -940,6 +928,20 @@ def __init__( ) ) + self.solution_calculator_surf = SolutionCalculatorSurface( + num_variables=self.num_variables_surf, + num_sample_points=self.num_sample_points_surface, + use_surface_normals=self.use_surface_normals, + use_surface_area=self.use_surface_area, + noise_intensity=50, + encode_parameters=self.encode_parameters, + parameter_model=self.parameter_model + if self.encode_parameters + else None, + aggregation_model=self.agg_model_surf, + nn_basis=self.nn_basis_surf, + ) + if self.output_features_vol is not None: # Volume base_layer_geo_vol = 0 @@ -961,452 +963,24 @@ def __init__( ), ) ) - - def calculate_solution_with_neighbors( - self, - surface_mesh_centers, - encoding_g, - encoding_node, - surface_mesh_neighbors, - surface_normals, - surface_neighbors_normals, - surface_areas, - surface_neighbors_areas, - global_params_values, - global_params_reference, - num_sample_points=7, - ): - """Function to approximate solution given the neighborhood information""" - num_variables = self.num_variables_surf - nn_basis = self.nn_basis_surf - agg_model = self.agg_model_surf - - if self.encode_parameters: - processed_parameters = [] - for k in range(global_params_values.shape[1]): - param = torch.unsqueeze(global_params_values[:, k, :], 1) - ref = torch.unsqueeze(global_params_reference[:, k, :], 1) - param = param.expand( - param.shape[0], - surface_mesh_centers.shape[1], - param.shape[2], - ) - param = param / ref - processed_parameters.append(param) - processed_parameters = torch.cat(processed_parameters, axis=-1) - param_encoding = self.parameter_model(processed_parameters) - - if self.use_surface_normals: - if not self.use_surface_area: - surface_mesh_centers = torch.cat( - (surface_mesh_centers, surface_normals), - dim=-1, - ) - if num_sample_points > 1: - surface_mesh_neighbors = torch.cat( - ( - surface_mesh_neighbors, - surface_neighbors_normals, - ), - dim=-1, - ) - + if hasattr(model_parameters, "return_volume_neighbors"): + return_volume_neighbors = model_parameters.return_volume_neighbors else: - surface_mesh_centers = torch.cat( - ( - surface_mesh_centers, - surface_normals, - torch.log(surface_areas) / 10, - ), - dim=-1, - ) - if num_sample_points > 1: - surface_mesh_neighbors = torch.cat( - ( - surface_mesh_neighbors, - surface_neighbors_normals, - torch.log(surface_neighbors_areas) / 10, - ), - dim=-1, - ) - - if self.solution_calculation_mode == "one-loop": - encoding_list = [ - encoding_node.unsqueeze(2).expand(-1, -1, num_sample_points, -1), - encoding_g.unsqueeze(2).expand(-1, -1, num_sample_points, -1), - ] - - for f in range(num_variables): - one_loop_centers_expanded = surface_mesh_centers.unsqueeze(2) - - one_loop_noise = one_loop_centers_expanded - ( - surface_mesh_neighbors + 1e-6 - ) - one_loop_noise = torch.norm(one_loop_noise, dim=-1, keepdim=True) - - # Doing it this way prevents the intermediate one_loop_basis_f from being stored in memory for the rest of the function. - agg_output = agg_model[f]( - torch.cat( - ( - nn_basis[f]( - torch.cat( - ( - one_loop_centers_expanded, - surface_mesh_neighbors + 1e-6, - ), - dim=2, - ) - ), - *encoding_list, - ), - dim=-1, - ) - ) - - one_loop_output_center, one_loop_output_neighbor = torch.split( - agg_output, [1, num_sample_points - 1], dim=2 - ) - one_loop_output_neighbor = one_loop_output_neighbor * ( - 1.0 / one_loop_noise - ) + return_volume_neighbors = False - one_loop_output_center = one_loop_output_center.squeeze(2) - one_loop_output_neighbor = one_loop_output_neighbor.sum(2) - one_loop_dist_sum = torch.sum(1.0 / one_loop_noise, dim=2) - - # Stop here - if num_sample_points > 1: - one_loop_output_res = ( - 0.5 * one_loop_output_center - + 0.5 * one_loop_output_neighbor / one_loop_dist_sum - ) - else: - one_loop_output_res = one_loop_output_center - if f == 0: - one_loop_output_all = one_loop_output_res - else: - one_loop_output_all = torch.cat( - (one_loop_output_all, one_loop_output_res), dim=-1 - ) - - return one_loop_output_all - - if self.solution_calculation_mode == "two-loop": - for f in range(num_variables): - for p in range(num_sample_points): - if p == 0: - volume_m_c = surface_mesh_centers - else: - volume_m_c = surface_mesh_neighbors[:, :, p - 1] + 1e-6 - noise = surface_mesh_centers - volume_m_c - dist = torch.norm(noise, dim=-1, keepdim=True) - - basis_f = nn_basis[f](volume_m_c) - output = torch.cat((basis_f, encoding_node, encoding_g), dim=-1) - if self.encode_parameters: - output = torch.cat((output, param_encoding), dim=-1) - if p == 0: - output_center = agg_model[f](output) - else: - if p == 1: - output_neighbor = agg_model[f](output) * (1.0 / dist) - dist_sum = 1.0 / dist - else: - output_neighbor += agg_model[f](output) * (1.0 / dist) - dist_sum += 1.0 / dist - if num_sample_points > 1: - output_res = 0.5 * output_center + 0.5 * output_neighbor / dist_sum - else: - output_res = output_center - if f == 0: - output_all = output_res - else: - output_all = torch.cat((output_all, output_res), dim=-1) - - return output_all - - def sample_sphere(self, center, r, num_points): - """Uniformly sample points in a 3D sphere around the center. - - This method generates random points within a sphere of radius r centered - at each point in the input tensor. The sampling is uniform in volume, - meaning points are more likely to be sampled in the outer regions of the sphere. - - Args: - center: Tensor of shape (batch_size, num_points, 3) containing center coordinates - r: Radius of the sphere for sampling - num_points: Number of points to sample per center - - Returns: - Tensor of shape (batch_size, num_points, num_samples, 3) containing - the sampled points around each center - """ - # Adjust the center points to the final shape: - unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1) - - # Generate directions like the centers: - directions = torch.randn_like(unsqueezed_center) - directions = directions / torch.norm(directions, dim=-1, keepdim=True) - - # Generate radii like the centers: - radii = r * torch.pow(torch.rand_like(unsqueezed_center), 1 / 3) - - output = unsqueezed_center + directions * radii - return output - - def sample_sphere_shell(self, center, r_inner, r_outer, num_points): - """Uniformly sample points in a 3D spherical shell around a center. - - This method generates random points within a spherical shell (annulus) - between inner radius r_inner and outer radius r_outer centered at each - point in the input tensor. The sampling is uniform in volume within the shell. - - Args: - center: Tensor of shape (batch_size, num_points, 3) containing center coordinates - r_inner: Inner radius of the spherical shell - r_outer: Outer radius of the spherical shell - num_points: Number of points to sample per center - - Returns: - Tensor of shape (batch_size, num_points, num_samples, 3) containing - the sampled points within the spherical shell around each center - """ - - unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1) - - # Generate directions like the centers: - directions = torch.randn_like(unsqueezed_center) - directions = directions / torch.norm(directions, dim=-1, keepdim=True) - - radii = ( - torch.rand_like(unsqueezed_center) * (r_outer**3 - r_inner**3) + r_inner**3 - ) - radii = torch.pow(radii, 1 / 3) - - output = unsqueezed_center + directions * radii - - return output - - def calculate_solution( - self, - volume_mesh_centers, - encoding_g, - encoding_node, - global_params_values, - global_params_reference, - eval_mode, - num_sample_points=20, - noise_intensity=50, - return_volume_neighbors=False, - ): - """Function to approximate solution sampling the neighborhood information""" - if eval_mode == "volume": - num_variables = self.num_variables_vol - nn_basis = self.nn_basis_vol - agg_model = self.agg_model_vol - elif eval_mode == "surface": - num_variables = self.num_variables_surf - nn_basis = self.nn_basis_surf - agg_model = self.agg_model_surf - - if self.encode_parameters: - processed_parameters = [] - for k in range(global_params_values.shape[1]): - param = torch.unsqueeze(global_params_values[:, k, :], 1) - ref = torch.unsqueeze(global_params_reference[:, k, :], 1) - param = param.expand( - param.shape[0], - volume_mesh_centers.shape[1], - param.shape[2], - ) - param = param / ref - processed_parameters.append(param) - processed_parameters = torch.cat(processed_parameters, axis=-1) - param_encoding = self.parameter_model(processed_parameters) - - if self.solution_calculation_mode == "one-loop": - # Stretch these out to num_sample_points - one_loop_encoding_node = encoding_node.unsqueeze(0).expand( - num_sample_points, -1, -1, -1 - ) - one_loop_encoding_g = encoding_g.unsqueeze(0).expand( - num_sample_points, -1, -1, -1 + self.solution_calculator_vol = SolutionCalculatorVolume( + num_variables=self.num_variables_vol, + num_sample_points=self.num_sample_points_volume, + noise_intensity=50, + return_volume_neighbors=return_volume_neighbors, + encode_parameters=self.encode_parameters, + parameter_model=self.parameter_model + if self.encode_parameters + else None, + aggregation_model=self.agg_model_vol, + nn_basis=self.nn_basis_vol, ) - if self.encode_parameters: - one_loop_other_terms = ( - one_loop_encoding_node, - one_loop_encoding_g, - param_encoding, - ) - else: - one_loop_other_terms = (one_loop_encoding_node, one_loop_encoding_g) - - for f in range(num_variables): - one_loop_volume_mesh_centers_expanded = volume_mesh_centers.unsqueeze( - 0 - ).expand(num_sample_points, -1, -1, -1) - # Bulk_random_noise has shape (num_sample_points, batch_size, num_points, 3) - one_loop_bulk_random_noise = torch.rand_like( - one_loop_volume_mesh_centers_expanded - ) - - one_loop_bulk_random_noise = 2 * (one_loop_bulk_random_noise - 0.5) - one_loop_bulk_random_noise = ( - one_loop_bulk_random_noise / noise_intensity - ) - one_loop_bulk_dist = torch.norm( - one_loop_bulk_random_noise, dim=-1, keepdim=True - ) - - _, one_loop_bulk_dist = torch.split( - one_loop_bulk_dist, [1, num_sample_points - 1], dim=0 - ) - - # Set the first sample point to 0.0: - one_loop_bulk_random_noise[0] = torch.zeros_like( - one_loop_bulk_random_noise[0] - ) - - # Add the noise to the expanded volume_mesh_centers: - one_loop_volume_m_c = volume_mesh_centers + one_loop_bulk_random_noise - # If this looks overly complicated - it is. - # But, this makes sure that the memory used to store the output of both nn_basis[f] - # as well as the output of torch.cat can be deallocated immediately. - # Apply the aggregation model and distance scaling: - one_loop_output = agg_model[f]( - torch.cat( - (nn_basis[f](one_loop_volume_m_c), *one_loop_other_terms), - dim=-1, - ) - ) - - # select off the first, unperturbed term: - one_loop_output_center, one_loop_output_neighbor = torch.split( - one_loop_output, [1, num_sample_points - 1], dim=0 - ) - - # Scale the neighbor terms by the distance: - one_loop_output_neighbor = one_loop_output_neighbor / one_loop_bulk_dist - - one_loop_dist_sum = torch.sum(1.0 / one_loop_bulk_dist, dim=0) - - # Adjust shapes: - one_loop_output_center = one_loop_output_center.squeeze(1) - one_loop_output_neighbor = one_loop_output_neighbor.sum(0) - - # Compare: - if num_sample_points > 1: - one_loop_output_res = ( - 0.5 * one_loop_output_center - + 0.5 * one_loop_output_neighbor / one_loop_dist_sum - ) - else: - one_loop_output_res = one_loop_output_center - if f == 0: - one_loop_output_all = one_loop_output_res - else: - one_loop_output_all = torch.cat( - (one_loop_output_all, one_loop_output_res), dim=-1 - ) - - return one_loop_output_all - - if self.solution_calculation_mode == "two-loop": - volume_m_c_perturbed = [volume_mesh_centers.unsqueeze(2)] - - if return_volume_neighbors: - num_hop1 = num_sample_points - num_hop2 = ( - num_sample_points // 2 if num_sample_points != 1 else 1 - ) # This is per 1 hop node - neighbors = defaultdict(list) - - volume_m_c_hop1 = self.sample_sphere( - volume_mesh_centers, 1 / noise_intensity, num_hop1 - ) - # 1 hop neighbors - for i in range(num_hop1): - idx = len(volume_m_c_perturbed) - volume_m_c_perturbed.append(volume_m_c_hop1[:, :, i : i + 1, :]) - neighbors[0].append(idx) - - # 2 hop neighbors - for i in range(num_hop1): - parent_idx = ( - i + 1 - ) # Skipping the first point, which is the original - parent_point = volume_m_c_perturbed[parent_idx] - - children = self.sample_sphere_shell( - parent_point.squeeze(2), - 1 / noise_intensity, - 2 / noise_intensity, - num_hop2, - ) - - for c in range(num_hop2): - idx = len(volume_m_c_perturbed) - volume_m_c_perturbed.append(children[:, :, c : c + 1, :]) - neighbors[parent_idx].append(idx) - - volume_m_c_perturbed = torch.cat(volume_m_c_perturbed, dim=2) - neighbors = dict(neighbors) - field_neighbors = {i: [] for i in range(num_variables)} - else: - volume_m_c_sample = self.sample_sphere( - volume_mesh_centers, 1 / noise_intensity, num_sample_points - ) - for i in range(num_sample_points): - volume_m_c_perturbed.append(volume_m_c_sample[:, :, i : i + 1, :]) - - volume_m_c_perturbed = torch.cat(volume_m_c_perturbed, dim=2) - - for f in range(num_variables): - for p in range(volume_m_c_perturbed.shape[2]): - volume_m_c = volume_m_c_perturbed[:, :, p, :] - if p != 0: - dist = torch.norm( - volume_m_c - volume_mesh_centers, dim=-1, keepdim=True - ) - basis_f = nn_basis[f](volume_m_c) - output = torch.cat((basis_f, encoding_node, encoding_g), dim=-1) - if self.encode_parameters: - output = torch.cat((output, param_encoding), dim=-1) - if p == 0: - output_center = agg_model[f](output) - else: - if p == 1: - output_neighbor = agg_model[f](output) * (1.0 / dist) - dist_sum = 1.0 / dist - else: - output_neighbor += agg_model[f](output) * (1.0 / dist) - dist_sum += 1.0 / dist - if return_volume_neighbors: - field_neighbors[f].append(agg_model[f](output)) - - if return_volume_neighbors: - field_neighbors[f] = torch.stack(field_neighbors[f], dim=2) - - if num_sample_points > 1: - output_res = ( - 0.5 * output_center + 0.5 * output_neighbor / dist_sum - ) # This only applies to the main point, and not the preturbed points - else: - output_res = output_center - if f == 0: - output_all = output_res - else: - output_all = torch.cat((output_all, output_res), axis=-1) - - if return_volume_neighbors: - field_neighbors = torch.cat( - [field_neighbors[i] for i in range(num_variables)], dim=3 - ) - return output_all, volume_m_c_perturbed, field_neighbors, neighbors - else: - return output_all - @profile def forward(self, data_dict, return_volume_neighbors=False): # Loading STL inputs, bounding box grids, precomputed SDF and scaling factors @@ -1417,9 +991,6 @@ def forward(self, data_dict, return_volume_neighbors=False): # Bounding box grid s_grid = data_dict["surf_grid"] sdf_surf_grid = data_dict["sdf_surf_grid"] - # Scaling factors - surf_max = data_dict["surface_min_max"][:, 1] - surf_min = data_dict["surface_min_max"][:, 0] # Parameters global_params_values = data_dict["global_params_values"] @@ -1431,11 +1002,16 @@ def forward(self, data_dict, return_volume_neighbors=False): p_grid = data_dict["grid"] sdf_grid = data_dict["sdf_grid"] # Scaling factors - vol_max = data_dict["volume_min_max"][:, 1] - vol_min = data_dict["volume_min_max"][:, 0] + if "volume_min_max" in data_dict.keys(): + vol_max = data_dict["volume_min_max"][:, 1] + vol_min = data_dict["volume_min_max"][:, 0] - # Normalize based on computational domain - geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 + # Normalize based on computational domain + geo_centers_vol = ( + 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 + ) + else: + geo_centers_vol = geo_centers encoding_g_vol = self.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid) @@ -1457,9 +1033,16 @@ def forward(self, data_dict, return_volume_neighbors=False): if self.output_features_surf is not None: # Represent geometry on bounding box - geo_centers_surf = ( - 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1 - ) + # Scaling factors + if "surface_min_max" in data_dict.keys(): + surf_max = data_dict["surface_min_max"][:, 1] + surf_min = data_dict["surface_min_max"][:, 0] + geo_centers_surf = ( + 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1 + ) + else: + geo_centers_surf = geo_centers + encoding_g_surf = self.geo_rep_surface( geo_centers_surf, s_grid, sdf_surf_grid ) @@ -1491,15 +1074,12 @@ def forward(self, data_dict, return_volume_neighbors=False): ) # Approximate solution on volume node - output_vol = self.calculate_solution( + output_vol = self.solution_calculator_vol( volume_mesh_centers, encoding_g_vol, encoding_node_vol, global_params_values, global_params_reference, - eval_mode="volume", - num_sample_points=self.num_sample_points_volume, - return_volume_neighbors=return_volume_neighbors, ) else: @@ -1523,7 +1103,7 @@ def forward(self, data_dict, return_volume_neighbors=False): ) # Approximate solution on surface cell center - output_surf = self.calculate_solution_with_neighbors( + output_surf = self.solution_calculator_surf( surface_mesh_centers, encoding_g_surf, encoding_node_surf, @@ -1534,7 +1114,6 @@ def forward(self, data_dict, return_volume_neighbors=False): surface_neighbors_areas, global_params_values, global_params_reference, - num_sample_points=self.num_sample_points_surface, ) else: output_surf = None diff --git a/physicsnemo/models/domino/solutions.py b/physicsnemo/models/domino/solutions.py new file mode 100644 index 0000000000..87c2bf0d16 --- /dev/null +++ b/physicsnemo/models/domino/solutions.py @@ -0,0 +1,405 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code contains the DoMINO model architecture. +The DoMINO class contains an architecture to model both surface and +volume quantities together as well as separately (controlled using +the config.yaml file) +""" + +from collections import defaultdict + +import torch +import torch.nn as nn + + +def sample_sphere(center, r, num_points): + """Uniformly sample points in a 3D sphere around the center. + + This method generates random points within a sphere of radius r centered + at each point in the input tensor. The sampling is uniform in volume, + meaning points are more likely to be sampled in the outer regions of the sphere. + + Args: + center: Tensor of shape (batch_size, num_points, 3) containing center coordinates + r: Radius of the sphere for sampling + num_points: Number of points to sample per center + + Returns: + Tensor of shape (batch_size, num_points, num_samples, 3) containing + the sampled points around each center + """ + # Adjust the center points to the final shape: + unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1) + + # Generate directions like the centers: + directions = torch.randn_like(unsqueezed_center) + directions = directions / torch.norm(directions, dim=-1, keepdim=True) + + # Generate radii like the centers: + radii = r * torch.pow(torch.rand_like(unsqueezed_center), 1 / 3) + + output = unsqueezed_center + directions * radii + return output + + +def sample_sphere_shell(center, r_inner, r_outer, num_points): + """Uniformly sample points in a 3D spherical shell around a center. + + This method generates random points within a spherical shell (annulus) + between inner radius r_inner and outer radius r_outer centered at each + point in the input tensor. The sampling is uniform in volume within the shell. + + Args: + center: Tensor of shape (batch_size, num_points, 3) containing center coordinates + r_inner: Inner radius of the spherical shell + r_outer: Outer radius of the spherical shell + num_points: Number of points to sample per center + + Returns: + Tensor of shape (batch_size, num_points, num_samples, 3) containing + the sampled points within the spherical shell around each center + """ + + unsqueezed_center = center.unsqueeze(2).expand(-1, -1, num_points, -1) + + # Generate directions like the centers: + directions = torch.randn_like(unsqueezed_center) + directions = directions / torch.norm(directions, dim=-1, keepdim=True) + + radii = torch.rand_like(unsqueezed_center) * (r_outer**3 - r_inner**3) + r_inner**3 + radii = torch.pow(radii, 1 / 3) + + output = unsqueezed_center + directions * radii + + return output + + +class SolutionCalculatorVolume(nn.Module): + """ + Module to calculate the output solution of the DoMINO Model for volume data. + """ + + def __init__( + self, + num_variables: int, + num_sample_points: int, + noise_intensity: float, + encode_parameters: bool, + return_volume_neighbors: bool, + parameter_model: nn.Module | None, + aggregation_model: nn.ModuleList, + nn_basis: nn.ModuleList, + ): + super().__init__() + + self.num_variables = num_variables + self.num_sample_points = num_sample_points + self.noise_intensity = noise_intensity + self.encode_parameters = encode_parameters + self.return_volume_neighbors = return_volume_neighbors + self.parameter_model = parameter_model + self.aggregation_model = aggregation_model + self.nn_basis = nn_basis + + if self.encode_parameters: + if self.parameter_model is None: + raise ValueError( + "Parameter model is required when encode_parameters is True" + ) + + def encode_parameters( + self, + mesh_centers: torch.Tensor, + global_params_values: torch.Tensor, + global_params_reference: torch.Tensor, + ) -> torch.Tensor: + processed_parameters = [] + for k in range(global_params_values.shape[1]): + param = torch.unsqueeze(global_params_values[:, k, :], 1) + ref = torch.unsqueeze(global_params_reference[:, k, :], 1) + param = param.expand( + param.shape[0], + mesh_centers.shape[1], + param.shape[2], + ) + param = param / ref + processed_parameters.append(param) + processed_parameters = torch.cat(processed_parameters, axis=-1) + param_encoding = self.parameter_model(processed_parameters) + + return param_encoding + + def forward( + self, + volume_mesh_centers: torch.Tensor, + encoding_g: torch.Tensor, + encoding_node: torch.Tensor, + global_params_values: torch.Tensor, + global_params_reference: torch.Tensor, + ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]: + """ + Forward pass of the SolutionCalculator module. + """ + if self.encode_parameters: + param_encoding = self.encode_parameters( + volume_mesh_centers, global_params_values, global_params_reference + ) + + volume_m_c_perturbed = [volume_mesh_centers.unsqueeze(2)] + + if self.return_volume_neighbors: + num_hop1 = self.num_sample_points + num_hop2 = ( + self.num_sample_points // 2 if self.num_sample_points != 1 else 1 + ) # This is per 1 hop node + neighbors = defaultdict(list) + + volume_m_c_hop1 = sample_sphere( + volume_mesh_centers, 1 / self.noise_intensity, num_hop1 + ) + # 1 hop neighbors + for i in range(num_hop1): + idx = len(volume_m_c_perturbed) + volume_m_c_perturbed.append(volume_m_c_hop1[:, :, i : i + 1, :]) + neighbors[0].append(idx) + + # 2 hop neighbors + for i in range(num_hop1): + parent_idx = i + 1 # Skipping the first point, which is the original + parent_point = volume_m_c_perturbed[parent_idx] + + children = sample_sphere_shell( + parent_point.squeeze(2), + 1 / self.noise_intensity, + 2 / self.noise_intensity, + num_hop2, + ) + + for c in range(num_hop2): + idx = len(volume_m_c_perturbed) + volume_m_c_perturbed.append(children[:, :, c : c + 1, :]) + neighbors[parent_idx].append(idx) + + volume_m_c_perturbed = torch.cat(volume_m_c_perturbed, dim=2) + neighbors = dict(neighbors) + field_neighbors = {i: [] for i in range(self.num_variables)} + else: + volume_m_c_sample = sample_sphere( + volume_mesh_centers, 1 / self.noise_intensity, self.num_sample_points + ) + for i in range(self.num_sample_points): + volume_m_c_perturbed.append(volume_m_c_sample[:, :, i : i + 1, :]) + + volume_m_c_perturbed = torch.cat(volume_m_c_perturbed, dim=2) + + for f in range(self.num_variables): + for p in range(volume_m_c_perturbed.shape[2]): + volume_m_c = volume_m_c_perturbed[:, :, p, :] + if p != 0: + dist = torch.norm( + volume_m_c - volume_mesh_centers, dim=-1, keepdim=True + ) + basis_f = self.nn_basis[f](volume_m_c) + output = torch.cat((basis_f, encoding_node, encoding_g), dim=-1) + if self.encode_parameters: + output = torch.cat((output, param_encoding), dim=-1) + if p == 0: + output_center = self.aggregation_model[f](output) + else: + if p == 1: + output_neighbor = self.aggregation_model[f](output) * ( + 1.0 / dist + ) + dist_sum = 1.0 / dist + else: + output_neighbor += self.aggregation_model[f](output) * ( + 1.0 / dist + ) + dist_sum += 1.0 / dist + if self.return_volume_neighbors: + field_neighbors[f].append(self.aggregation_model[f](output)) + + if self.return_volume_neighbors: + field_neighbors[f] = torch.stack(field_neighbors[f], dim=2) + + if self.num_sample_points > 1: + output_res = ( + 0.5 * output_center + 0.5 * output_neighbor / dist_sum + ) # This only applies to the main point, and not the preturbed points + else: + output_res = output_center + if f == 0: + output_all = output_res + else: + output_all = torch.cat((output_all, output_res), axis=-1) + + if self.return_volume_neighbors: + field_neighbors = torch.cat( + [field_neighbors[i] for i in range(self.num_variables)], dim=3 + ) + return output_all, volume_m_c_perturbed, field_neighbors, neighbors + else: + return output_all + + +class SolutionCalculatorSurface(nn.Module): + """ + Module to calculate the output solution of the DoMINO Model for surface data. + """ + + def __init__( + self, + num_variables: int, + num_sample_points: int, + noise_intensity: float, + encode_parameters: bool, + use_surface_normals: bool, + use_surface_area: bool, + parameter_model: nn.Module | None, + aggregation_model: nn.ModuleList, + nn_basis: nn.ModuleList, + ): + super().__init__() + self.num_variables = num_variables + self.num_sample_points = num_sample_points + self.noise_intensity = noise_intensity + self.encode_parameters = encode_parameters + self.use_surface_normals = use_surface_normals + self.use_surface_area = use_surface_area + self.parameter_model = parameter_model + self.aggregation_model = aggregation_model + self.nn_basis = nn_basis + + if self.encode_parameters: + if self.parameter_model is None: + raise ValueError( + "Parameter model is required when encode_parameters is True" + ) + + def encode_parameters( + self, + mesh_centers: torch.Tensor, + global_params_values: torch.Tensor, + global_params_reference: torch.Tensor, + ) -> torch.Tensor: + processed_parameters = [] + for k in range(global_params_values.shape[1]): + param = torch.unsqueeze(global_params_values[:, k, :], 1) + ref = torch.unsqueeze(global_params_reference[:, k, :], 1) + param = param.expand( + param.shape[0], + mesh_centers.shape[1], + param.shape[2], + ) + param = param / ref + processed_parameters.append(param) + processed_parameters = torch.cat(processed_parameters, axis=-1) + param_encoding = self.parameter_model(processed_parameters) + + return param_encoding + + def forward( + self, + surface_mesh_centers: torch.Tensor, + encoding_g: torch.Tensor, + encoding_node: torch.Tensor, + surface_mesh_neighbors: torch.Tensor, + surface_normals: torch.Tensor, + surface_neighbors_normals: torch.Tensor, + surface_areas: torch.Tensor, + surface_neighbors_areas: torch.Tensor, + global_params_values: torch.Tensor, + global_params_reference: torch.Tensor, + ) -> torch.Tensor: + """Function to approximate solution given the neighborhood information""" + + if self.encode_parameters: + param_encoding = self.encode_parameters( + surface_mesh_centers, global_params_values, global_params_reference + ) + + if self.use_surface_normals: + if not self.use_surface_area: + surface_mesh_centers = torch.cat( + (surface_mesh_centers, surface_normals), + dim=-1, + ) + if self.num_sample_points > 1: + surface_mesh_neighbors = torch.cat( + ( + surface_mesh_neighbors, + surface_neighbors_normals, + ), + dim=-1, + ) + + else: + surface_mesh_centers = torch.cat( + ( + surface_mesh_centers, + surface_normals, + torch.log(surface_areas) / 10, + ), + dim=-1, + ) + if self.num_sample_points > 1: + surface_mesh_neighbors = torch.cat( + ( + surface_mesh_neighbors, + surface_neighbors_normals, + torch.log(surface_neighbors_areas) / 10, + ), + dim=-1, + ) + + for f in range(self.num_variables): + for p in range(self.num_sample_points): + if p == 0: + volume_m_c = surface_mesh_centers + else: + volume_m_c = surface_mesh_neighbors[:, :, p - 1] + 1e-6 + noise = surface_mesh_centers - volume_m_c + dist = torch.norm(noise, dim=-1, keepdim=True) + + basis_f = self.nn_basis[f](volume_m_c) + output = torch.cat((basis_f, encoding_node, encoding_g), dim=-1) + if self.encode_parameters: + output = torch.cat((output, param_encoding), dim=-1) + if p == 0: + output_center = self.aggregation_model[f](output) + else: + if p == 1: + output_neighbor = self.aggregation_model[f](output) * ( + 1.0 / dist + ) + dist_sum = 1.0 / dist + else: + output_neighbor += self.aggregation_model[f](output) * ( + 1.0 / dist + ) + dist_sum += 1.0 / dist + if self.num_sample_points > 1: + output_res = 0.5 * output_center + 0.5 * output_neighbor / dist_sum + else: + output_res = output_center + if f == 0: + output_all = output_res + else: + output_all = torch.cat((output_all, output_res), dim=-1) + + return output_all From 5539c544e40140ede71e0d33290081feeb7aae1d Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 22 Sep 2025 13:49:33 +0000 Subject: [PATCH 36/98] Refactor domino model and add significant test suite expansion. Thie refactor consolidates the MLP implementations into one interface, and also makes the volume_minmax and surface_minmax rescaling contingent on the presence of those values in the input dict. --- physicsnemo/models/domino/encodings.py | 3 +- physicsnemo/models/domino/model.py | 2 +- physicsnemo/models/domino/solutions.py | 56 ++-- test/models/domino/__init__.py | 15 ++ test/models/{ => domino}/test_domino.py | 7 +- test/models/domino/test_domino_encodings.py | 148 +++++++++++ .../models/domino/test_domino_geometry_rep.py | 128 +++++++++ test/models/domino/test_domino_mlps.py | 86 ++++++ test/models/domino/test_domino_solutions.py | 244 ++++++++++++++++++ test/models/domino/utils.py | 154 +++++++++++ 10 files changed, 802 insertions(+), 41 deletions(-) create mode 100644 test/models/domino/__init__.py rename test/models/{ => domino}/test_domino.py (97%) create mode 100644 test/models/domino/test_domino_encodings.py create mode 100644 test/models/domino/test_domino_geometry_rep.py create mode 100644 test/models/domino/test_domino_mlps.py create mode 100644 test/models/domino/test_domino_solutions.py create mode 100644 test/models/domino/utils.py diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py index ca2ef34f0f..068e4b3a1f 100644 --- a/physicsnemo/models/domino/encodings.py +++ b/physicsnemo/models/domino/encodings.py @@ -88,7 +88,7 @@ def __init__( if self.fourier_features: input_features_calculated = input_features + input_features * num_modes * 2 self.register_buffer( - "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes)) + "freqs", torch.exp(torch.linspace(0, math.pi, num_modes)) ) else: input_features_calculated = input_features @@ -160,6 +160,7 @@ def forward( mapping, outputs = self.bq_warp( volume_mesh_centers, p_grid, reverse_mapping=False ) + mapping = mapping.type(torch.int64) mask = mapping != 0 diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 31547c743b..4ad6ae2856 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -127,7 +127,7 @@ def forward( Args: x: Input tensor containing coordinates of the neighboring points - (batch_size, nx*ny*nz, 3, n_points) + (batch_size, nx*ny*nz, n_points, 3) grid: Input tensor represented as a grid of shape (batch_size, nx, ny, nz, 3) diff --git a/physicsnemo/models/domino/solutions.py b/physicsnemo/models/domino/solutions.py index 87c2bf0d16..c3968e8dcf 100644 --- a/physicsnemo/models/domino/solutions.py +++ b/physicsnemo/models/domino/solutions.py @@ -122,7 +122,7 @@ def __init__( "Parameter model is required when encode_parameters is True" ) - def encode_parameters( + def apply_parameter_encoding( self, mesh_centers: torch.Tensor, global_params_values: torch.Tensor, @@ -156,7 +156,7 @@ def forward( Forward pass of the SolutionCalculator module. """ if self.encode_parameters: - param_encoding = self.encode_parameters( + param_encoding = self.apply_parameter_encoding( volume_mesh_centers, global_params_values, global_params_reference ) @@ -291,7 +291,7 @@ def __init__( "Parameter model is required when encode_parameters is True" ) - def encode_parameters( + def apply_parameter_encoding( self, mesh_centers: torch.Tensor, global_params_values: torch.Tensor, @@ -329,43 +329,29 @@ def forward( """Function to approximate solution given the neighborhood information""" if self.encode_parameters: - param_encoding = self.encode_parameters( + param_encoding = self.apply_parameter_encoding( surface_mesh_centers, global_params_values, global_params_reference ) + centers_inputs = [ + surface_mesh_centers, + ] + neighbors_inputs = [ + surface_mesh_neighbors, + ] + if self.use_surface_normals: - if not self.use_surface_area: - surface_mesh_centers = torch.cat( - (surface_mesh_centers, surface_normals), - dim=-1, - ) - if self.num_sample_points > 1: - surface_mesh_neighbors = torch.cat( - ( - surface_mesh_neighbors, - surface_neighbors_normals, - ), - dim=-1, - ) + centers_inputs.append(surface_normals) + if self.num_sample_points > 1: + neighbors_inputs.append(surface_neighbors_normals) - else: - surface_mesh_centers = torch.cat( - ( - surface_mesh_centers, - surface_normals, - torch.log(surface_areas) / 10, - ), - dim=-1, - ) - if self.num_sample_points > 1: - surface_mesh_neighbors = torch.cat( - ( - surface_mesh_neighbors, - surface_neighbors_normals, - torch.log(surface_neighbors_areas) / 10, - ), - dim=-1, - ) + if self.use_surface_area: + centers_inputs.append(torch.log(surface_areas) / 10) + if self.num_sample_points > 1: + neighbors_inputs.append(torch.log(surface_neighbors_areas) / 10) + + surface_mesh_centers = torch.cat(centers_inputs, dim=-1) + surface_mesh_neighbors = torch.cat(neighbors_inputs, dim=-1) for f in range(self.num_variables): for p in range(self.num_sample_points): diff --git a/test/models/domino/__init__.py b/test/models/domino/__init__.py new file mode 100644 index 0000000000..b2f171d4ac --- /dev/null +++ b/test/models/domino/__init__.py @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/test/models/test_domino.py b/test/models/domino/test_domino.py similarity index 97% rename from test/models/test_domino.py rename to test/models/domino/test_domino.py index 87110491d0..e606b9ecf8 100644 --- a/test/models/test_domino.py +++ b/test/models/domino/test_domino.py @@ -22,9 +22,8 @@ import torch from pytest_utils import import_or_fail -# from . import common -from .common.fwdaccuracy import save_output -from .common.utils import compare_output +from ..common.fwdaccuracy import save_output +from ..common.utils import compare_output def validate_domino( @@ -44,7 +43,7 @@ def validate_domino( if file_name is None: file_name = model.meta.name + "_output.pth" file_name = ( - Path(__file__).parents[0].resolve() / Path("data") / Path(file_name.lower()) + Path(__file__).parents[1].resolve() / Path("data") / Path(file_name.lower()) ) # If file does not exist, we will create it then error # Model should then reproduce it on next pytest run diff --git a/test/models/domino/test_domino_encodings.py b/test/models/domino/test_domino_encodings.py new file mode 100644 index 0000000000..a27e2dd0a9 --- /dev/null +++ b/test/models/domino/test_domino_encodings.py @@ -0,0 +1,148 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import pytest +import torch + +from .utils import validate_output_shape_and_values + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("fourier_features", [True, False]) +@pytest.mark.parametrize("num_modes", [3, 5, 10]) +def test_encoding_mlp(device, fourier_features, num_modes): + """Test EncodingMLP with various configurations""" + from physicsnemo.models.domino.encodings import EncodingMLP + from physicsnemo.models.domino.model import get_activation + + torch.manual_seed(0) + + model = EncodingMLP( + input_features=3, + base_layer=64, + fourier_features=fourier_features, + num_modes=num_modes, + activation=get_activation("relu"), + ).to(device) + + x = torch.randn(2, 100, 3).to(device) + output = model(x) + + validate_output_shape_and_values(output, (2, 100, 64)) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +def test_fourier_encode_vectorized(device): + """Test fourier encoding function""" + from physicsnemo.models.domino.encodings import fourier_encode_vectorized + + torch.manual_seed(0) + + coords = torch.randn(4, 20, 3).to(device) + freqs = torch.exp(torch.linspace(0, math.pi, 5)).to(device) + + output = fourier_encode_vectorized(coords, freqs) + + # Output should be [batch, points, D * 2 * F] = [4, 20, 3 * 2 * 5] = [4, 20, 30] + validate_output_shape_and_values(output, (4, 20, 30)) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +def test_local_geometry_encoding(device): + """Test LocalGeometryEncoding""" + from physicsnemo.models.domino.encodings import LocalGeometryEncoding + from physicsnemo.models.domino.model import get_activation + + BATCH_SIZE = 1 + + torch.manual_seed(0) + + N_ENCODING_CHANNELS = 3 + N_NEIGHBORS = 32 + N_MESH_POINTS = 50 + GRID_RESOLUTION = (32, 32, 32) + + model = LocalGeometryEncoding( + radius=0.1, + neighbors_in_radius=N_NEIGHBORS, + total_neighbors_in_radius=N_ENCODING_CHANNELS * N_NEIGHBORS, + base_layer=128, + activation=get_activation("relu"), + grid_resolution=GRID_RESOLUTION, + ).to(device) + + encoding_g = torch.randn(BATCH_SIZE, N_ENCODING_CHANNELS, *GRID_RESOLUTION).to( + device + ) + volume_mesh_centers = torch.randn(BATCH_SIZE, N_MESH_POINTS, 3).to(device) + p_grid = torch.randn(BATCH_SIZE, *GRID_RESOLUTION, 3).to(device) + + output = model(encoding_g, volume_mesh_centers, p_grid) + + validate_output_shape_and_values(output, (BATCH_SIZE, N_MESH_POINTS, 32)) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("geo_encoding_type", ["both", "stl", "sdf"]) +def test_multi_geometry_encoding(device, geo_encoding_type): + """Test MultiGeometryEncoding with different encoding types""" + from physicsnemo.models.domino.encodings import MultiGeometryEncoding + from physicsnemo.models.domino.model import get_activation + + torch.manual_seed(0) + + BATCH_SIZE = 1 + N_MESH_POINTS = 50 + GRID_RESOLUTION = (32, 32, 32) + + radii = [0.05, 0.1] + neighbors_in_radius = [16, 32] + + model = MultiGeometryEncoding( + radii=radii, + neighbors_in_radius=neighbors_in_radius, + geo_encoding_type=geo_encoding_type, + base_layer=64, + activation=get_activation("relu"), + grid_resolution=GRID_RESOLUTION, + ).to(device) + + if geo_encoding_type == "both": + num_channels = len(radii) + 1 + elif geo_encoding_type == "stl": + num_channels = len(radii) + else: # sdf + num_channels = 1 + + encoding_g = torch.randn(BATCH_SIZE, num_channels, *GRID_RESOLUTION).to(device) + volume_mesh_centers = torch.randn(BATCH_SIZE, N_MESH_POINTS, 3).to(device) + p_grid = torch.randn(BATCH_SIZE, *GRID_RESOLUTION, 3).to(device) + + print(f"encoding_g.shape: {encoding_g.shape}") + print(f"volume_mesh_centers.shape: {volume_mesh_centers.shape}") + print(f"p_grid.shape: {p_grid.shape}") + + output = model(encoding_g, volume_mesh_centers, p_grid) + + print(f"output.shape: {output.shape}") + + expected_output_dim = sum(neighbors_in_radius) + + validate_output_shape_and_values( + output, (BATCH_SIZE, N_MESH_POINTS, expected_output_dim) + ) diff --git a/test/models/domino/test_domino_geometry_rep.py b/test/models/domino/test_domino_geometry_rep.py new file mode 100644 index 0000000000..12ede24be5 --- /dev/null +++ b/test/models/domino/test_domino_geometry_rep.py @@ -0,0 +1,128 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass + +import pytest +import torch + +from .utils import validate_output_shape_and_values + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("act", ["relu", "gelu"]) +def test_geo_conv_out(device, act): + """Test GeoConvOut layer""" + from physicsnemo.models.domino.model import GeoConvOut + + torch.manual_seed(0) + + @dataclass + class TestParams: + base_neurons: int = 32 + base_neurons_in: int = 8 + fourier_features: bool = False + num_modes: int = 5 + activation: str = act + + params = TestParams() + grid_resolution = [32, 32, 32] + + layer = GeoConvOut( + input_features=3, model_parameters=params, grid_resolution=grid_resolution + ).to(device) + + x = torch.randn(1, 32 * 32 * 32, 10, 3).to(device) + grid = torch.randn(1, *grid_resolution, 3).to(device) + + output = layer(x, grid) + + validate_output_shape_and_values( + output, (1, params.base_neurons_in, *grid_resolution) + ) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("act", ["relu", "gelu"]) +def test_geo_processor(device, act): + """Test GeoProcessor CNN""" + from physicsnemo.models.domino.model import GeoProcessor + + torch.manual_seed(0) + + @dataclass + class TestParams: + base_filters: int = 8 + activation: str = act + + params = TestParams() + + processor = GeoProcessor( + input_filters=4, output_filters=2, model_parameters=params + ).to(device) + + x = torch.randn(2, 4, 16, 16, 16).to(device) + output = processor(x) + + validate_output_shape_and_values(output, (2, 2, 16, 16, 16)) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("geometry_encoding_type", ["both", "stl", "sdf"]) +@pytest.mark.parametrize("processor_type", ["unet", "conv"]) +def test_geometry_rep( + device, geometry_encoding_type, processor_type, base_model_params +): + """Test GeometryRep module with different configurations""" + from physicsnemo.models.domino.model import GeometryRep + + torch.manual_seed(0) + + # Modify params for this test + params = base_model_params() + params.geometry_encoding_type = geometry_encoding_type + params.geometry_rep.geo_processor.processor_type = processor_type + params.geometry_rep.geo_processor.self_attention = False + params.geometry_rep.geo_processor.cross_attention = False + params.interp_res = (16, 16, 16) # Smaller for faster testing + + radii = [0.1, 0.2] + neighbors_in_radius = [8, 16] + + geo_rep = GeometryRep( + input_features=3, + radii=radii, + neighbors_in_radius=neighbors_in_radius, + hops=1, + model_parameters=params, + ).to(device) + + # Test inputs + x = torch.randn(1, 20, 3).to(device) + p_grid = torch.randn(1, 16, 16, 16, 3).to(device) + sdf = torch.randn(1, 16, 16, 16).to(device) + + output = geo_rep(x, p_grid, sdf) + + # Determine expected output channels + if geometry_encoding_type == "both": + expected_channels = len(radii) + 1 # STL channels + SDF channel + elif geometry_encoding_type == "stl": + expected_channels = len(radii) + else: # sdf + expected_channels = 1 + + validate_output_shape_and_values(output, (1, expected_channels, 16, 16, 16)) diff --git a/test/models/domino/test_domino_mlps.py b/test/models/domino/test_domino_mlps.py new file mode 100644 index 0000000000..86d2d9a208 --- /dev/null +++ b/test/models/domino/test_domino_mlps.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch + +from .utils import validate_output_shape_and_values + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("activation", ["relu", "gelu"]) +@pytest.mark.parametrize("n_layers", [1, 2, 3, 5]) +def test_mlp(device, activation, n_layers): + """Test basic MLP functionality""" + from physicsnemo.models.domino.mlps import MLP + from physicsnemo.models.domino.model import get_activation + + torch.manual_seed(0) + + mlp = MLP( + input_features=10, + output_features=5, + base_layer=32, + activation=get_activation(activation), + n_layers=n_layers, + ).to(device) + + x = torch.randn(4, 50, 10).to(device) + output = mlp(x) + + validate_output_shape_and_values(output, (4, 50, 5)) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +def test_aggregation_model(device): + """Test AggregationModel""" + from physicsnemo.models.domino.mlps import AggregationModel + from physicsnemo.models.domino.model import get_activation + + torch.manual_seed(0) + + model = AggregationModel( + input_features=100, + output_features=1, + base_layer=64, + activation=get_activation("relu"), + ).to(device) + + x = torch.randn(2, 30, 100).to(device) + output = model(x) + + validate_output_shape_and_values(output, (2, 30, 1)) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +def test_local_point_conv(device): + """Test LocalPointConv""" + from physicsnemo.models.domino.mlps import LocalPointConv + from physicsnemo.models.domino.model import get_activation + + torch.manual_seed(0) + + model = LocalPointConv( + input_features=50, + base_layer=128, + output_features=32, + activation=get_activation("relu"), + ).to(device) + + x = torch.randn(2, 100, 50).to(device) + output = model(x) + + validate_output_shape_and_values(output, (2, 100, 32)) diff --git a/test/models/domino/test_domino_solutions.py b/test/models/domino/test_domino_solutions.py new file mode 100644 index 0000000000..be4797eafc --- /dev/null +++ b/test/models/domino/test_domino_solutions.py @@ -0,0 +1,244 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch +import torch.nn as nn + +from .utils import validate_output_shape_and_values + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("num_variables", [1, 3, 5]) +@pytest.mark.parametrize("num_sample_points", [1, 3, 7]) +@pytest.mark.parametrize("encode_parameters", [True, False]) +def test_solution_calculator_volume( + device, num_variables, num_sample_points, encode_parameters +): + """Test SolutionCalculatorVolume with various configurations""" + from physicsnemo.models.domino.encodings import EncodingMLP + from physicsnemo.models.domino.mlps import AggregationModel + from physicsnemo.models.domino.model import get_activation + from physicsnemo.models.domino.solutions import SolutionCalculatorVolume + + torch.manual_seed(0) + + activation = get_activation("relu") + + # Create parameter model if needed + parameter_model = ( + EncodingMLP( + input_features=2, + base_layer=32, + fourier_features=True, + num_modes=3, + activation=activation, + ).to(device) + if encode_parameters + else None + ) + + # Create aggregation models + aggregation_model = nn.ModuleList( + [ + AggregationModel( + input_features=64 + 32 + 32 + (32 if encode_parameters else 0), + output_features=1, + base_layer=64, + activation=activation, + ).to(device) + for _ in range(num_variables) + ] + ) + + # Create basis functions + nn_basis = nn.ModuleList( + [ + EncodingMLP( + input_features=3, + base_layer=32, + fourier_features=False, + num_modes=5, + activation=activation, + ).to(device) + for _ in range(num_variables) + ] + ) + + model = SolutionCalculatorVolume( + num_variables=num_variables, + num_sample_points=num_sample_points, + noise_intensity=50.0, + encode_parameters=encode_parameters, + return_volume_neighbors=False, + parameter_model=parameter_model, + aggregation_model=aggregation_model, + nn_basis=nn_basis, + ).to(device) + + # Test data + volume_mesh_centers = torch.randn(2, 30, 3).to(device) + encoding_g = torch.randn(2, 30, 32).to(device) + encoding_node = torch.randn(2, 30, 64).to(device) + global_params_values = torch.randn(2, 2, 1).to(device) + global_params_reference = torch.randn(2, 2, 1).to(device) + + output = model( + volume_mesh_centers, + encoding_g, + encoding_node, + global_params_values, + global_params_reference, + ) + + validate_output_shape_and_values(output, (2, 30, num_variables)) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("num_variables", [1, 3, 5]) +@pytest.mark.parametrize("use_surface_normals", [True, False]) +@pytest.mark.parametrize("use_surface_area", [True, False]) +def test_solution_calculator_surface( + device, num_variables, use_surface_normals, use_surface_area +): + """Test SolutionCalculatorSurface with various configurations""" + from physicsnemo.models.domino.encodings import EncodingMLP + from physicsnemo.models.domino.mlps import AggregationModel + from physicsnemo.models.domino.model import get_activation + from physicsnemo.models.domino.solutions import SolutionCalculatorSurface + + torch.manual_seed(0) + + activation = get_activation("relu") + + # Determine input features based on surface configuration + input_features = 3 + if use_surface_normals: + input_features += 3 + if use_surface_area: + input_features += 1 + + print(f"Input features: {input_features}") + + # Create aggregation models + aggregation_model = nn.ModuleList( + [ + AggregationModel( + input_features=64 + 32 + 32, + output_features=1, + base_layer=64, + activation=activation, + ).to(device) + for _ in range(num_variables) + ] + ) + + # Create basis functions + nn_basis = nn.ModuleList( + [ + EncodingMLP( + input_features=input_features, + base_layer=32, + fourier_features=False, + num_modes=5, + activation=activation, + ).to(device) + for _ in range(num_variables) + ] + ) + + model = SolutionCalculatorSurface( + num_variables=num_variables, + num_sample_points=3, + noise_intensity=50.0, + encode_parameters=False, + use_surface_normals=use_surface_normals, + use_surface_area=use_surface_area, + parameter_model=None, + aggregation_model=aggregation_model, + nn_basis=nn_basis, + ).to(device) + + # Test data + surface_mesh_centers = torch.randn(2, 30, 3).to(device) + encoding_g = torch.randn(2, 30, 32).to(device) + encoding_node = torch.randn(2, 30, 64).to(device) + surface_mesh_neighbors = torch.randn(2, 30, 2, 3).to(device) + surface_normals = torch.randn(2, 30, 3).to(device) + surface_neighbors_normals = torch.randn(2, 30, 2, 3).to(device) + surface_areas = torch.rand(2, 30, 1).to(device) + 1e-6 + surface_neighbors_areas = torch.rand(2, 30, 2, 1).to(device) + 1e-6 + global_params_values = torch.randn(2, 2, 1).to(device) + global_params_reference = torch.randn(2, 2, 1).to(device) + + output = model( + surface_mesh_centers, + encoding_g, + encoding_node, + surface_mesh_neighbors, + surface_normals, + surface_neighbors_normals, + surface_areas, + surface_neighbors_areas, + global_params_values, + global_params_reference, + ) + + validate_output_shape_and_values(output, (2, 30, num_variables)) + + +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("r", [0.5, 1.0, 2.0]) +@pytest.mark.parametrize("num_points", [10, 50, 100]) +def test_sample_sphere(device, r, num_points): + """Test sphere sampling function""" + from physicsnemo.models.domino.solutions import sample_sphere + + torch.manual_seed(0) + + center = torch.randn(2, 30, 3).to(device) + output = sample_sphere(center, r, num_points) + + validate_output_shape_and_values(output, (2, 30, num_points, 3)) + + # Check that points are within the sphere radius + distances = torch.norm(output - center.unsqueeze(2), dim=-1) + assert (distances <= r + 1e-6).all(), "Some sampled points are outside the sphere" + + +@pytest.mark.parametrize("device", ["cuda:0"]) +def test_sample_sphere_shell(device): + """Test spherical shell sampling function""" + from physicsnemo.models.domino.solutions import sample_sphere_shell + + torch.manual_seed(0) + + center = torch.randn(2, 30, 3).to(device) + r_inner, r_outer = 0.5, 1.5 + num_points = 50 + + output = sample_sphere_shell(center, r_inner, r_outer, num_points) + + validate_output_shape_and_values(output, (2, 30, num_points, 3)) + + # Check that points are within the shell + distances = torch.norm(output - center.unsqueeze(2), dim=-1) + assert (distances >= r_inner - 1e-6).all(), ( + "Some sampled points are inside inner radius" + ) + assert (distances <= r_outer + 1e-6).all(), ( + "Some sampled points are outside outer radius" + ) diff --git a/test/models/domino/utils.py b/test/models/domino/utils.py new file mode 100644 index 0000000000..8c5fb971f8 --- /dev/null +++ b/test/models/domino/utils.py @@ -0,0 +1,154 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Sequence + +import pytest +import torch + + +def generate_test_data(bsize, nx, ny, nz, num_neigh, device): + """Generate test data for DoMINO""" + return { + "pos_volume_closest": torch.randn(bsize, 50, 3).to(device), + "pos_volume_center_of_mass": torch.randn(bsize, 50, 3).to(device), + "pos_surface_center_of_mass": torch.randn(bsize, 50, 3).to(device), + "geometry_coordinates": torch.randn(bsize, 50, 3).to(device), + "grid": torch.randn(bsize, nx, ny, nz, 3).to(device), + "surf_grid": torch.randn(bsize, nx, ny, nz, 3).to(device), + "sdf_grid": torch.randn(bsize, nx, ny, nz).to(device), + "sdf_surf_grid": torch.randn(bsize, nx, ny, nz).to(device), + "sdf_nodes": torch.randn(bsize, 50, 1).to(device), + "surface_mesh_centers": torch.randn(bsize, 50, 3).to(device), + "surface_mesh_neighbors": torch.randn(bsize, 50, num_neigh, 3).to(device), + "surface_normals": torch.randn(bsize, 50, 3).to(device), + "surface_neighbors_normals": torch.randn(bsize, 50, num_neigh, 3).to(device), + "surface_areas": torch.rand(bsize, 50).to(device) + 1e-6, + "surface_neighbors_areas": torch.rand(bsize, 50, num_neigh).to(device) + 1e-6, + "volume_mesh_centers": torch.randn(bsize, 50, 3).to(device), + "volume_min_max": torch.randn(bsize, 2, 3).to(device), + "surface_min_max": torch.randn(bsize, 2, 3).to(device), + "global_params_values": torch.randn(bsize, 2, 1).to(device), + "global_params_reference": torch.randn(bsize, 2, 1).to(device), + } + + +@pytest.fixture +def base_model_params(): + """Base model parameters for testing""" + + @dataclass + class model_params: + @dataclass + class geometry_rep: + @dataclass + class geo_conv: + base_neurons: int = 32 + base_neurons_in: int = 8 + base_neurons_out: int = 8 + surface_hops: int = 1 + volume_hops: int = 1 + volume_radii: Sequence = (0.1, 0.5) + volume_neighbors_in_radius: Sequence = (10, 10) + surface_radii: Sequence = (0.05,) + surface_neighbors_in_radius: Sequence = (10,) + activation: str = "relu" + fourier_features: bool = False + num_modes: int = 5 + + @dataclass + class geo_processor: + base_filters: int = 8 + activation: str = "relu" + processor_type: str = "unet" + self_attention: bool = True + cross_attention: bool = False + + base_filters: int = 8 + geo_conv = geo_conv + geo_processor = geo_processor + + @dataclass + class geometry_local: + base_layer: int = 512 + volume_neighbors_in_radius: Sequence = (128, 128) + surface_neighbors_in_radius: Sequence = (128,) + volume_radii: Sequence = (0.05, 0.1) + surface_radii: Sequence = (0.05,) + + @dataclass + class nn_basis_functions: + base_layer: int = 512 + fourier_features: bool = False + num_modes: int = 5 + activation: str = "relu" + + @dataclass + class local_point_conv: + activation: str = "relu" + + @dataclass + class aggregation_model: + base_layer: int = 512 + activation: str = "relu" + + @dataclass + class position_encoder: + base_neurons: int = 512 + activation: str = "relu" + fourier_features: bool = False + num_modes: int = 5 + + @dataclass + class parameter_model: + base_layer: int = 512 + fourier_features: bool = True + num_modes: int = 5 + activation: str = "relu" + + model_type: str = "combined" + activation: str = "relu" + interp_res: Sequence = (64, 64, 64) # Smaller for testing + use_sdf_in_basis_func: bool = True + positional_encoding: bool = False + surface_neighbors: bool = True + num_neighbors_surface: int = 7 + num_neighbors_volume: int = 7 + use_surface_normals: bool = True + use_surface_area: bool = True + encode_parameters: bool = False + combine_volume_surface: bool = False + geometry_encoding_type: str = "both" + solution_calculation_mode: str = "two-loop" + geometry_rep = geometry_rep + nn_basis_functions = nn_basis_functions + aggregation_model = aggregation_model + position_encoder = position_encoder + geometry_local = geometry_local + + return model_params + + +def validate_output_shape_and_values(output, expected_shape, check_finite=True): + """Validate output tensor shape and values""" + if output is not None: + assert output.shape == expected_shape, ( + f"Expected shape {expected_shape}, got {output.shape}" + ) + if check_finite: + assert torch.isfinite(output).all(), "Output contains non-finite values" + assert not torch.isnan(output).any(), "Output contains NaN values" From 260c2405e7727ff7aaeccd5a68a572b8a3798a3b Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:18:45 +0000 Subject: [PATCH 37/98] Move geometry rep codes to a separate file for model simplicity too. --- physicsnemo/models/domino/geometry_rep.py | 505 ++++++++++++++++++ physicsnemo/models/domino/model.py | 488 +---------------- test/models/domino/conftest.py | 116 ++++ .../models/domino/test_domino_geometry_rep.py | 6 +- test/models/domino/utils.py | 100 ---- 5 files changed, 625 insertions(+), 590 deletions(-) create mode 100644 physicsnemo/models/domino/geometry_rep.py create mode 100644 test/models/domino/conftest.py diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py new file mode 100644 index 0000000000..ea77ef5f15 --- /dev/null +++ b/physicsnemo/models/domino/geometry_rep.py @@ -0,0 +1,505 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Callable, Literal, Sequence + +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange + +from physicsnemo.models.unet import UNet + +from .ball_query import BQWarp +from .encodings import fourier_encode_vectorized + + +def get_activation(activation: Literal["relu", "gelu"]) -> Callable: + """ + Return a PyTorch activation function corresponding to the given name. + """ + if activation == "relu": + return nn.ReLU() + elif activation == "gelu": + return nn.GELU() + else: + raise ValueError(f"Activation function {activation} not found") + + +def scale_sdf(sdf: torch.Tensor) -> torch.Tensor: + """ + Scale a signed distance function (SDF) to emphasize surface regions. + + This function applies a non-linear scaling to the SDF values that compresses + the range while preserving the sign, effectively giving more weight to points + near surfaces where abs(SDF) is small. + + Args: + sdf: Tensor containing signed distance function values + + Returns: + Tensor with scaled SDF values in range [-1, 1] + """ + return sdf / (0.4 + torch.abs(sdf)) + + +class GeoConvOut(nn.Module): + """ + Geometry layer to project STL geometry data onto regular grids. + """ + + def __init__( + self, + input_features: int, + model_parameters, + grid_resolution=None, + ): + """ + Initialize the GeoConvOut layer. + + Args: + input_features: Number of input feature dimensions + model_parameters: Configuration parameters for the model + grid_resolution: Resolution of the output grid [nx, ny, nz] + """ + super().__init__() + if grid_resolution is None: + grid_resolution = [256, 96, 64] + base_neurons = model_parameters.base_neurons + self.fourier_features = model_parameters.fourier_features + self.num_modes = model_parameters.num_modes + + if self.fourier_features: + input_features_calculated = input_features * (1 + 2 * self.num_modes) + else: + input_features_calculated = input_features + + self.fc1 = nn.Linear(input_features_calculated, base_neurons) + self.fc2 = nn.Linear(base_neurons, base_neurons // 2) + self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in) + + self.grid_resolution = grid_resolution + + self.activation = get_activation(model_parameters.activation) + + if self.fourier_features: + self.register_buffer( + "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes)) + ) + + def forward( + self, + x: torch.Tensor, + grid: torch.Tensor, + radius: float = 0.025, + neighbors_in_radius: int = 10, + ) -> torch.Tensor: + """ + Process and project geometric features onto a 3D grid. + + Args: + x: Input tensor containing coordinates of the neighboring points + (batch_size, nx*ny*nz, n_points, 3) + grid: Input tensor represented as a grid of shape + (batch_size, nx, ny, nz, 3) + + Returns: + Processed geometry features of shape (batch_size, base_neurons_in, nx, ny, nz) + """ + + nx, ny, nz = ( + self.grid_resolution[0], + self.grid_resolution[1], + self.grid_resolution[2], + ) + grid = grid.reshape(1, nx * ny * nz, 3, 1) + x_transposed = torch.transpose(x, 2, 3) + dist_weights = 1.0 / (1e-6 + (x_transposed - grid) ** 2.0) + dist_weights = torch.transpose(dist_weights, 2, 3) + + # x = torch.sum(x * dist_weights, 2) / torch.sum(dist_weights, 2) + # x = torch.sum(x, 2) + mask = abs(x - 0) > 1e-6 + if self.fourier_features: + facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1) + else: + facets = x + x = self.activation(self.fc1(facets)) + x = self.activation(self.fc2(x)) + x = F.tanh(self.fc3(x)) + + mask = mask[:, :, :, 0:1].expand( + mask.shape[0], mask.shape[1], mask.shape[2], x.shape[-1] + ) + + x = torch.sum(x * mask, 2) + x = rearrange(x, "b (x y z) c -> b c x y z", x=nx, y=ny, z=nz) + return x + + +class GeoProcessor(nn.Module): + """Geometry processing layer using CNNs""" + + def __init__(self, input_filters: int, output_filters: int, model_parameters): + """ + Initialize the GeoProcessor network. + + Args: + input_filters: Number of input channels + model_parameters: Configuration parameters for the model + """ + super().__init__() + base_filters = model_parameters.base_filters + self.conv1 = nn.Conv3d( + input_filters, base_filters, kernel_size=3, padding="same" + ) + self.conv2 = nn.Conv3d( + base_filters, 2 * base_filters, kernel_size=3, padding="same" + ) + self.conv3 = nn.Conv3d( + 2 * base_filters, 4 * base_filters, kernel_size=3, padding="same" + ) + self.conv3_1 = nn.Conv3d( + 4 * base_filters, 4 * base_filters, kernel_size=3, padding="same" + ) + self.conv4 = nn.Conv3d( + 4 * base_filters, 2 * base_filters, kernel_size=3, padding="same" + ) + self.conv5 = nn.Conv3d( + 4 * base_filters, base_filters, kernel_size=3, padding="same" + ) + self.conv6 = nn.Conv3d( + 2 * base_filters, input_filters, kernel_size=3, padding="same" + ) + self.conv7 = nn.Conv3d( + 2 * input_filters, input_filters, kernel_size=3, padding="same" + ) + self.conv8 = nn.Conv3d( + input_filters, output_filters, kernel_size=3, padding="same" + ) + self.avg_pool = torch.nn.AvgPool3d((2, 2, 2)) + self.max_pool = nn.MaxPool3d(2) + self.upsample = nn.Upsample(scale_factor=2, mode="nearest") + self.activation = get_activation(model_parameters.activation) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Process geometry information through the 3D CNN network. + + The network follows an encoder-decoder architecture with skip connections: + 1. Downsampling path (encoder) with three levels of max pooling + 2. Processing loop in the bottleneck + 3. Upsampling path (decoder) with skip connections from the encoder + + Args: + x: Input tensor containing grid-represented geometry of shape + (batch_size, input_filters, nx, ny, nz) + + Returns: + Processed geometry features of shape (batch_size, 1, nx, ny, nz) + """ + # Encoder + x0 = x + x = self.conv1(x) + x = self.activation(x) + x = self.max_pool(x) + + x1 = x + x = self.conv2(x) + x = self.activation(x) + x = self.max_pool(x) + + x2 = x + x = self.conv3(x) + x = self.activation(x) + x = self.max_pool(x) + + # Processor loop + x = self.activation(self.conv3_1(x)) + + # Decoder + x = self.conv4(x) + x = self.activation(x) + x = self.upsample(x) + x = torch.cat((x, x2), dim=1) + + x = self.conv5(x) + x = self.activation(x) + x = self.upsample(x) + x = torch.cat((x, x1), dim=1) + + x = self.conv6(x) + x = self.activation(x) + x = self.upsample(x) + x = torch.cat((x, x0), dim=1) + + x = self.activation(self.conv7(x)) + x = self.conv8(x) + + return x + + +class GeometryRep(nn.Module): + """ + Geometry representation module that processes STL geometry data. + + This module constructs a multiscale representation of geometry by: + 1. Computing multi-scale geometry encoding for local and global context + 2. Processing signed distance field (SDF) data for surface information + + The combined encoding enables the model to reason about both local and global + geometric properties. + """ + + def __init__( + self, + input_features: int, + radii: Sequence[float], + neighbors_in_radius, + hops=1, + model_parameters=None, + ): + """ + Initialize the GeometryRep module. + + Args: + input_features: Number of input feature dimensions + model_parameters: Configuration parameters for the model + """ + super().__init__() + geometry_rep = model_parameters.geometry_rep + self.geo_encoding_type = model_parameters.geometry_encoding_type + self.cross_attention = geometry_rep.geo_processor.cross_attention + self.self_attention = geometry_rep.geo_processor.self_attention + self.activation_conv = get_activation(geometry_rep.geo_conv.activation) + self.activation_processor = geometry_rep.geo_processor.activation + + self.bq_warp = nn.ModuleList() + self.geo_processors = nn.ModuleList() + for j in range(len(radii)): + self.bq_warp.append( + BQWarp( + radius=radii[j], + neighbors_in_radius=neighbors_in_radius[j], + ) + ) + if geometry_rep.geo_processor.processor_type == "unet": + h = geometry_rep.geo_processor.base_filters + if self.self_attention: + normalization_in_unet = "layernorm" + else: + normalization_in_unet = None + self.geo_processors.append( + UNet( + in_channels=geometry_rep.geo_conv.base_neurons_in, + out_channels=geometry_rep.geo_conv.base_neurons_out, + model_depth=3, + feature_map_channels=[ + h, + 2 * h, + 4 * h, + ], + num_conv_blocks=1, + kernel_size=3, + stride=1, + conv_activation=self.activation_processor, + padding=1, + padding_mode="zeros", + pooling_type="MaxPool3d", + pool_size=2, + normalization=normalization_in_unet, + use_attn_gate=self.self_attention, + attn_decoder_feature_maps=[4 * h, 2 * h], + attn_feature_map_channels=[2 * h, h], + attn_intermediate_channels=4 * h, + gradient_checkpointing=True, + ) + ) + elif geometry_rep.geo_processor.processor_type == "conv": + self.geo_processors.append( + nn.Sequential( + GeoProcessor( + input_filters=geometry_rep.geo_conv.base_neurons_in, + output_filters=geometry_rep.geo_conv.base_neurons_out, + model_parameters=geometry_rep.geo_processor, + ), + GeoProcessor( + input_filters=geometry_rep.geo_conv.base_neurons_in, + output_filters=geometry_rep.geo_conv.base_neurons_out, + model_parameters=geometry_rep.geo_processor, + ), + ) + ) + else: + raise ValueError("Invalid prompt. Specify unet or conv ...") + + self.geo_conv_out = nn.ModuleList() + self.geo_processor_out = nn.ModuleList() + for _ in range(len(radii)): + self.geo_conv_out.append( + GeoConvOut( + input_features=input_features, + model_parameters=geometry_rep.geo_conv, + grid_resolution=model_parameters.interp_res, + ) + ) + self.geo_processor_out.append( + nn.Conv3d( + geometry_rep.geo_conv.base_neurons_out, + 1, + kernel_size=3, + padding="same", + ) + ) + + if geometry_rep.geo_processor.processor_type == "unet": + h = geometry_rep.geo_processor.base_filters + if self.self_attention: + normalization_in_unet = "layernorm" + else: + normalization_in_unet = None + self.geo_processor_sdf = UNet( + in_channels=6, + out_channels=geometry_rep.geo_conv.base_neurons_out, + model_depth=3, + feature_map_channels=[ + h, + 2 * h, + 4 * h, + ], + num_conv_blocks=1, + kernel_size=3, + stride=1, + conv_activation=self.activation_processor, + padding=1, + padding_mode="zeros", + pooling_type="MaxPool3d", + pool_size=2, + normalization=normalization_in_unet, + use_attn_gate=self.self_attention, + attn_decoder_feature_maps=[4 * h, 2 * h], + attn_feature_map_channels=[2 * h, h], + attn_intermediate_channels=4 * h, + gradient_checkpointing=True, + ) + elif geometry_rep.geo_processor.processor_type == "conv": + self.geo_processor_sdf = nn.Sequential( + GeoProcessor( + input_filters=6, + output_filters=geometry_rep.geo_conv.base_neurons_out, + model_parameters=geometry_rep.geo_processor, + ), + GeoProcessor( + input_filters=geometry_rep.geo_conv.base_neurons_out, + output_filters=geometry_rep.geo_conv.base_neurons_out, + model_parameters=geometry_rep.geo_processor, + ), + ) + else: + raise ValueError("Invalid prompt. Specify unet or conv ...") + self.radii = radii + self.hops = hops + + self.geo_processor_sdf_out = nn.Conv3d( + geometry_rep.geo_conv.base_neurons_out, 1, kernel_size=3, padding="same" + ) + + if self.cross_attention: + self.combined_unet = UNet( + in_channels=1 + len(radii), + out_channels=1 + len(radii), + model_depth=3, + feature_map_channels=[ + h, + 2 * h, + 4 * h, + ], + num_conv_blocks=1, + kernel_size=3, + stride=1, + conv_activation=self.activation_processor, + padding=1, + padding_mode="zeros", + pooling_type="MaxPool3d", + pool_size=2, + normalization="layernorm", + use_attn_gate=True, + attn_decoder_feature_maps=[4 * h, 2 * h], + attn_feature_map_channels=[2 * h, h], + attn_intermediate_channels=4 * h, + gradient_checkpointing=True, + ) + + def forward( + self, x: torch.Tensor, p_grid: torch.Tensor, sdf: torch.Tensor + ) -> torch.Tensor: + """ + Process geometry data to create a comprehensive representation. + + This method combines short-range, long-range, and SDF-based geometry + encodings to create a rich representation of the geometry. + + Args: + x: Input tensor containing geometric point data + p_grid: Grid points for sampling + sdf: Signed distance field tensor + + Returns: + Comprehensive geometry encoding that concatenates short-range, + SDF-based, and long-range features + """ + if self.geo_encoding_type == "both" or self.geo_encoding_type == "stl": + # Calculate multi-scale geoemtry dependency + x_encoding = [] + for j in range(len(self.radii)): + mapping, k_short = self.bq_warp[j](x, p_grid) + x_encoding_inter = self.geo_conv_out[j](k_short, p_grid) + # Propagate information in the geometry enclosed BBox + for _ in range(self.hops): + dx = self.geo_processors[j](x_encoding_inter) / self.hops + x_encoding_inter = x_encoding_inter + dx + x_encoding_inter = self.geo_processor_out[j](x_encoding_inter) + x_encoding.append(x_encoding_inter) + x_encoding = torch.cat(x_encoding, dim=1) + + if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf": + # Expand SDF + sdf = torch.unsqueeze(sdf, 1) + # Scaled sdf to emphasize near surface + scaled_sdf = scale_sdf(sdf) + # Binary sdf + binary_sdf = torch.where(sdf >= 0, 0.0, 1.0) + # Gradients of SDF + sdf_x, sdf_y, sdf_z = torch.gradient(sdf, dim=[2, 3, 4]) + + # Process SDF and its computed features + sdf = torch.cat((sdf, scaled_sdf, binary_sdf, sdf_x, sdf_y, sdf_z), 1) + sdf_encoding = self.geo_processor_sdf(sdf) + sdf_encoding = self.geo_processor_sdf_out(sdf_encoding) + + if self.geo_encoding_type == "both": + # Geometry encoding comprised of short-range, long-range and SDF features + encoding_g = torch.cat((x_encoding, sdf_encoding), 1) + elif self.geo_encoding_type == "sdf": + encoding_g = sdf_encoding + elif self.geo_encoding_type == "stl": + encoding_g = x_encoding + + if self.cross_attention: + encoding_g = self.combined_unet(encoding_g) + + return encoding_g diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 4ad6ae2856..bc06289a6b 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -21,504 +21,19 @@ the config.yaml file) """ -import math -from typing import Callable, Literal, Sequence - import torch import torch.nn as nn -import torch.nn.functional as F -from einops import rearrange from physicsnemo.models.unet import UNet -from physicsnemo.utils.profiling import profile -from .ball_query import BQWarp from .encodings import ( EncodingMLP, MultiGeometryEncoding, - fourier_encode_vectorized, ) +from .geometry_rep import GeometryRep, get_activation from .mlps import AggregationModel from .solutions import SolutionCalculatorSurface, SolutionCalculatorVolume - -def get_activation(activation: Literal["relu", "gelu"]) -> Callable: - """ - Return a PyTorch activation function corresponding to the given name. - """ - if activation == "relu": - return nn.ReLU() - elif activation == "gelu": - return nn.GELU() - else: - raise ValueError(f"Activation function {activation} not found") - - -def scale_sdf(sdf: torch.Tensor) -> torch.Tensor: - """ - Scale a signed distance function (SDF) to emphasize surface regions. - - This function applies a non-linear scaling to the SDF values that compresses - the range while preserving the sign, effectively giving more weight to points - near surfaces where abs(SDF) is small. - - Args: - sdf: Tensor containing signed distance function values - - Returns: - Tensor with scaled SDF values in range [-1, 1] - """ - return sdf / (0.4 + torch.abs(sdf)) - - -class GeoConvOut(nn.Module): - """ - Geometry layer to project STL geometry data onto regular grids. - """ - - def __init__( - self, - input_features: int, - model_parameters, - grid_resolution=None, - ): - """ - Initialize the GeoConvOut layer. - - Args: - input_features: Number of input feature dimensions - model_parameters: Configuration parameters for the model - grid_resolution: Resolution of the output grid [nx, ny, nz] - """ - super().__init__() - if grid_resolution is None: - grid_resolution = [256, 96, 64] - base_neurons = model_parameters.base_neurons - self.fourier_features = model_parameters.fourier_features - self.num_modes = model_parameters.num_modes - - if self.fourier_features: - input_features_calculated = input_features * (1 + 2 * self.num_modes) - else: - input_features_calculated = input_features - - self.fc1 = nn.Linear(input_features_calculated, base_neurons) - self.fc2 = nn.Linear(base_neurons, base_neurons // 2) - self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in) - - self.grid_resolution = grid_resolution - - self.activation = get_activation(model_parameters.activation) - - if self.fourier_features: - self.register_buffer( - "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes)) - ) - - def forward( - self, - x: torch.Tensor, - grid: torch.Tensor, - radius: float = 0.025, - neighbors_in_radius: int = 10, - ) -> torch.Tensor: - """ - Process and project geometric features onto a 3D grid. - - Args: - x: Input tensor containing coordinates of the neighboring points - (batch_size, nx*ny*nz, n_points, 3) - grid: Input tensor represented as a grid of shape - (batch_size, nx, ny, nz, 3) - - Returns: - Processed geometry features of shape (batch_size, base_neurons_in, nx, ny, nz) - """ - - nx, ny, nz = ( - self.grid_resolution[0], - self.grid_resolution[1], - self.grid_resolution[2], - ) - grid = grid.reshape(1, nx * ny * nz, 3, 1) - x_transposed = torch.transpose(x, 2, 3) - dist_weights = 1.0 / (1e-6 + (x_transposed - grid) ** 2.0) - dist_weights = torch.transpose(dist_weights, 2, 3) - - # x = torch.sum(x * dist_weights, 2) / torch.sum(dist_weights, 2) - # x = torch.sum(x, 2) - mask = abs(x - 0) > 1e-6 - if self.fourier_features: - facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1) - else: - facets = x - x = self.activation(self.fc1(facets)) - x = self.activation(self.fc2(x)) - x = F.tanh(self.fc3(x)) - - mask = mask[:, :, :, 0:1].expand( - mask.shape[0], mask.shape[1], mask.shape[2], x.shape[-1] - ) - - x = torch.sum(x * mask, 2) - x = rearrange(x, "b (x y z) c -> b c x y z", x=nx, y=ny, z=nz) - return x - - -class GeoProcessor(nn.Module): - """Geometry processing layer using CNNs""" - - def __init__(self, input_filters: int, output_filters: int, model_parameters): - """ - Initialize the GeoProcessor network. - - Args: - input_filters: Number of input channels - model_parameters: Configuration parameters for the model - """ - super().__init__() - base_filters = model_parameters.base_filters - self.conv1 = nn.Conv3d( - input_filters, base_filters, kernel_size=3, padding="same" - ) - self.conv2 = nn.Conv3d( - base_filters, 2 * base_filters, kernel_size=3, padding="same" - ) - self.conv3 = nn.Conv3d( - 2 * base_filters, 4 * base_filters, kernel_size=3, padding="same" - ) - self.conv3_1 = nn.Conv3d( - 4 * base_filters, 4 * base_filters, kernel_size=3, padding="same" - ) - self.conv4 = nn.Conv3d( - 4 * base_filters, 2 * base_filters, kernel_size=3, padding="same" - ) - self.conv5 = nn.Conv3d( - 4 * base_filters, base_filters, kernel_size=3, padding="same" - ) - self.conv6 = nn.Conv3d( - 2 * base_filters, input_filters, kernel_size=3, padding="same" - ) - self.conv7 = nn.Conv3d( - 2 * input_filters, input_filters, kernel_size=3, padding="same" - ) - self.conv8 = nn.Conv3d( - input_filters, output_filters, kernel_size=3, padding="same" - ) - self.avg_pool = torch.nn.AvgPool3d((2, 2, 2)) - self.max_pool = nn.MaxPool3d(2) - self.upsample = nn.Upsample(scale_factor=2, mode="nearest") - self.activation = get_activation(model_parameters.activation) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - """ - Process geometry information through the 3D CNN network. - - The network follows an encoder-decoder architecture with skip connections: - 1. Downsampling path (encoder) with three levels of max pooling - 2. Processing loop in the bottleneck - 3. Upsampling path (decoder) with skip connections from the encoder - - Args: - x: Input tensor containing grid-represented geometry of shape - (batch_size, input_filters, nx, ny, nz) - - Returns: - Processed geometry features of shape (batch_size, 1, nx, ny, nz) - """ - # Encoder - x0 = x - x = self.conv1(x) - x = self.activation(x) - x = self.max_pool(x) - - x1 = x - x = self.conv2(x) - x = self.activation(x) - x = self.max_pool(x) - - x2 = x - x = self.conv3(x) - x = self.activation(x) - x = self.max_pool(x) - - # Processor loop - x = self.activation(self.conv3_1(x)) - - # Decoder - x = self.conv4(x) - x = self.activation(x) - x = self.upsample(x) - x = torch.cat((x, x2), dim=1) - - x = self.conv5(x) - x = self.activation(x) - x = self.upsample(x) - x = torch.cat((x, x1), dim=1) - - x = self.conv6(x) - x = self.activation(x) - x = self.upsample(x) - x = torch.cat((x, x0), dim=1) - - x = self.activation(self.conv7(x)) - x = self.conv8(x) - - return x - - -class GeometryRep(nn.Module): - """ - Geometry representation module that processes STL geometry data. - - This module constructs a multiscale representation of geometry by: - 1. Computing multi-scale geometry encoding for local and global context - 2. Processing signed distance field (SDF) data for surface information - - The combined encoding enables the model to reason about both local and global - geometric properties. - """ - - def __init__( - self, - input_features: int, - radii: Sequence[float], - neighbors_in_radius, - hops=1, - model_parameters=None, - ): - """ - Initialize the GeometryRep module. - - Args: - input_features: Number of input feature dimensions - model_parameters: Configuration parameters for the model - """ - super().__init__() - geometry_rep = model_parameters.geometry_rep - self.geo_encoding_type = model_parameters.geometry_encoding_type - self.cross_attention = geometry_rep.geo_processor.cross_attention - self.self_attention = geometry_rep.geo_processor.self_attention - self.activation_conv = get_activation(geometry_rep.geo_conv.activation) - self.activation_processor = geometry_rep.geo_processor.activation - - self.bq_warp = nn.ModuleList() - self.geo_processors = nn.ModuleList() - for j in range(len(radii)): - self.bq_warp.append( - BQWarp( - radius=radii[j], - neighbors_in_radius=neighbors_in_radius[j], - ) - ) - if geometry_rep.geo_processor.processor_type == "unet": - h = geometry_rep.geo_processor.base_filters - if self.self_attention: - normalization_in_unet = "layernorm" - else: - normalization_in_unet = None - self.geo_processors.append( - UNet( - in_channels=geometry_rep.geo_conv.base_neurons_in, - out_channels=geometry_rep.geo_conv.base_neurons_out, - model_depth=3, - feature_map_channels=[ - h, - 2 * h, - 4 * h, - ], - num_conv_blocks=1, - kernel_size=3, - stride=1, - conv_activation=self.activation_processor, - padding=1, - padding_mode="zeros", - pooling_type="MaxPool3d", - pool_size=2, - normalization=normalization_in_unet, - use_attn_gate=self.self_attention, - attn_decoder_feature_maps=[4 * h, 2 * h], - attn_feature_map_channels=[2 * h, h], - attn_intermediate_channels=4 * h, - gradient_checkpointing=True, - ) - ) - elif geometry_rep.geo_processor.processor_type == "conv": - self.geo_processors.append( - nn.Sequential( - GeoProcessor( - input_filters=geometry_rep.geo_conv.base_neurons_in, - output_filters=geometry_rep.geo_conv.base_neurons_out, - model_parameters=geometry_rep.geo_processor, - ), - GeoProcessor( - input_filters=geometry_rep.geo_conv.base_neurons_in, - output_filters=geometry_rep.geo_conv.base_neurons_out, - model_parameters=geometry_rep.geo_processor, - ), - ) - ) - else: - raise ValueError("Invalid prompt. Specify unet or conv ...") - - self.geo_conv_out = nn.ModuleList() - self.geo_processor_out = nn.ModuleList() - for _ in range(len(radii)): - self.geo_conv_out.append( - GeoConvOut( - input_features=input_features, - model_parameters=geometry_rep.geo_conv, - grid_resolution=model_parameters.interp_res, - ) - ) - self.geo_processor_out.append( - nn.Conv3d( - geometry_rep.geo_conv.base_neurons_out, - 1, - kernel_size=3, - padding="same", - ) - ) - - if geometry_rep.geo_processor.processor_type == "unet": - h = geometry_rep.geo_processor.base_filters - if self.self_attention: - normalization_in_unet = "layernorm" - else: - normalization_in_unet = None - self.geo_processor_sdf = UNet( - in_channels=6, - out_channels=geometry_rep.geo_conv.base_neurons_out, - model_depth=3, - feature_map_channels=[ - h, - 2 * h, - 4 * h, - ], - num_conv_blocks=1, - kernel_size=3, - stride=1, - conv_activation=self.activation_processor, - padding=1, - padding_mode="zeros", - pooling_type="MaxPool3d", - pool_size=2, - normalization=normalization_in_unet, - use_attn_gate=self.self_attention, - attn_decoder_feature_maps=[4 * h, 2 * h], - attn_feature_map_channels=[2 * h, h], - attn_intermediate_channels=4 * h, - gradient_checkpointing=True, - ) - elif geometry_rep.geo_processor.processor_type == "conv": - self.geo_processor_sdf = nn.Sequential( - GeoProcessor( - input_filters=6, - output_filters=geometry_rep.geo_conv.base_neurons_out, - model_parameters=geometry_rep.geo_processor, - ), - GeoProcessor( - input_filters=geometry_rep.geo_conv.base_neurons_out, - output_filters=geometry_rep.geo_conv.base_neurons_out, - model_parameters=geometry_rep.geo_processor, - ), - ) - else: - raise ValueError("Invalid prompt. Specify unet or conv ...") - self.radii = radii - self.hops = hops - - self.geo_processor_sdf_out = nn.Conv3d( - geometry_rep.geo_conv.base_neurons_out, 1, kernel_size=3, padding="same" - ) - - if self.cross_attention: - self.combined_unet = UNet( - in_channels=1 + len(radii), - out_channels=1 + len(radii), - model_depth=3, - feature_map_channels=[ - h, - 2 * h, - 4 * h, - ], - num_conv_blocks=1, - kernel_size=3, - stride=1, - conv_activation=self.activation_processor, - padding=1, - padding_mode="zeros", - pooling_type="MaxPool3d", - pool_size=2, - normalization="layernorm", - use_attn_gate=True, - attn_decoder_feature_maps=[4 * h, 2 * h], - attn_feature_map_channels=[2 * h, h], - attn_intermediate_channels=4 * h, - gradient_checkpointing=True, - ) - - def forward( - self, x: torch.Tensor, p_grid: torch.Tensor, sdf: torch.Tensor - ) -> torch.Tensor: - """ - Process geometry data to create a comprehensive representation. - - This method combines short-range, long-range, and SDF-based geometry - encodings to create a rich representation of the geometry. - - Args: - x: Input tensor containing geometric point data - p_grid: Grid points for sampling - sdf: Signed distance field tensor - - Returns: - Comprehensive geometry encoding that concatenates short-range, - SDF-based, and long-range features - """ - if self.geo_encoding_type == "both" or self.geo_encoding_type == "stl": - # Calculate multi-scale geoemtry dependency - x_encoding = [] - for j in range(len(self.radii)): - mapping, k_short = self.bq_warp[j](x, p_grid) - x_encoding_inter = self.geo_conv_out[j](k_short, p_grid) - # Propagate information in the geometry enclosed BBox - for _ in range(self.hops): - dx = self.geo_processors[j](x_encoding_inter) / self.hops - x_encoding_inter = x_encoding_inter + dx - x_encoding_inter = self.geo_processor_out[j](x_encoding_inter) - x_encoding.append(x_encoding_inter) - x_encoding = torch.cat(x_encoding, dim=1) - - if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf": - # Expand SDF - sdf = torch.unsqueeze(sdf, 1) - # Scaled sdf to emphasize near surface - scaled_sdf = scale_sdf(sdf) - # Binary sdf - binary_sdf = torch.where(sdf >= 0, 0.0, 1.0) - # Gradients of SDF - sdf_x, sdf_y, sdf_z = torch.gradient(sdf, dim=[2, 3, 4]) - - # Process SDF and its computed features - sdf = torch.cat((sdf, scaled_sdf, binary_sdf, sdf_x, sdf_y, sdf_z), 1) - sdf_encoding = self.geo_processor_sdf(sdf) - sdf_encoding = self.geo_processor_sdf_out(sdf_encoding) - - if self.geo_encoding_type == "both": - # Geometry encoding comprised of short-range, long-range and SDF features - encoding_g = torch.cat((x_encoding, sdf_encoding), 1) - elif self.geo_encoding_type == "sdf": - encoding_g = sdf_encoding - elif self.geo_encoding_type == "stl": - encoding_g = x_encoding - - if self.cross_attention: - encoding_g = self.combined_unet(encoding_g) - - return encoding_g - - # @dataclass # class MetaData(ModelMetaData): # name: str = "DoMINO" @@ -981,7 +496,6 @@ def __init__( nn_basis=self.nn_basis_vol, ) - @profile def forward(self, data_dict, return_volume_neighbors=False): # Loading STL inputs, bounding box grids, precomputed SDF and scaling factors diff --git a/test/models/domino/conftest.py b/test/models/domino/conftest.py new file mode 100644 index 0000000000..9d8c8a71d5 --- /dev/null +++ b/test/models/domino/conftest.py @@ -0,0 +1,116 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Sequence + +import pytest + + +@pytest.fixture(scope="module") +def base_model_params(): + """Base model parameters for testing""" + + @dataclass + class model_params: + @dataclass + class geometry_rep: + @dataclass + class geo_conv: + base_neurons: int = 32 + base_neurons_in: int = 8 + base_neurons_out: int = 8 + surface_hops: int = 1 + volume_hops: int = 1 + volume_radii: Sequence = (0.1, 0.5) + volume_neighbors_in_radius: Sequence = (10, 10) + surface_radii: Sequence = (0.05,) + surface_neighbors_in_radius: Sequence = (10,) + activation: str = "relu" + fourier_features: bool = False + num_modes: int = 5 + + @dataclass + class geo_processor: + base_filters: int = 8 + activation: str = "relu" + processor_type: str = "unet" + self_attention: bool = True + cross_attention: bool = False + + base_filters: int = 8 + geo_conv = geo_conv + geo_processor = geo_processor + + @dataclass + class geometry_local: + base_layer: int = 512 + volume_neighbors_in_radius: Sequence = (128, 128) + surface_neighbors_in_radius: Sequence = (128,) + volume_radii: Sequence = (0.05, 0.1) + surface_radii: Sequence = (0.05,) + + @dataclass + class nn_basis_functions: + base_layer: int = 512 + fourier_features: bool = False + num_modes: int = 5 + activation: str = "relu" + + @dataclass + class local_point_conv: + activation: str = "relu" + + @dataclass + class aggregation_model: + base_layer: int = 512 + activation: str = "relu" + + @dataclass + class position_encoder: + base_neurons: int = 512 + activation: str = "relu" + fourier_features: bool = False + num_modes: int = 5 + + @dataclass + class parameter_model: + base_layer: int = 512 + fourier_features: bool = True + num_modes: int = 5 + activation: str = "relu" + + model_type: str = "combined" + activation: str = "relu" + interp_res: Sequence = (64, 64, 64) # Smaller for testing + use_sdf_in_basis_func: bool = True + positional_encoding: bool = False + surface_neighbors: bool = True + num_neighbors_surface: int = 7 + num_neighbors_volume: int = 7 + use_surface_normals: bool = True + use_surface_area: bool = True + encode_parameters: bool = False + combine_volume_surface: bool = False + geometry_encoding_type: str = "both" + solution_calculation_mode: str = "two-loop" + geometry_rep = geometry_rep + nn_basis_functions = nn_basis_functions + aggregation_model = aggregation_model + position_encoder = position_encoder + geometry_local = geometry_local + + return model_params diff --git a/test/models/domino/test_domino_geometry_rep.py b/test/models/domino/test_domino_geometry_rep.py index 12ede24be5..813b246bae 100644 --- a/test/models/domino/test_domino_geometry_rep.py +++ b/test/models/domino/test_domino_geometry_rep.py @@ -26,7 +26,7 @@ @pytest.mark.parametrize("act", ["relu", "gelu"]) def test_geo_conv_out(device, act): """Test GeoConvOut layer""" - from physicsnemo.models.domino.model import GeoConvOut + from physicsnemo.models.domino.geometry_rep import GeoConvOut torch.manual_seed(0) @@ -59,7 +59,7 @@ class TestParams: @pytest.mark.parametrize("act", ["relu", "gelu"]) def test_geo_processor(device, act): """Test GeoProcessor CNN""" - from physicsnemo.models.domino.model import GeoProcessor + from physicsnemo.models.domino.geometry_rep import GeoProcessor torch.manual_seed(0) @@ -87,7 +87,7 @@ def test_geometry_rep( device, geometry_encoding_type, processor_type, base_model_params ): """Test GeometryRep module with different configurations""" - from physicsnemo.models.domino.model import GeometryRep + from physicsnemo.models.domino.geometry_rep import GeometryRep torch.manual_seed(0) diff --git a/test/models/domino/utils.py b/test/models/domino/utils.py index 8c5fb971f8..6f16a2c9fb 100644 --- a/test/models/domino/utils.py +++ b/test/models/domino/utils.py @@ -14,10 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass -from typing import Sequence - -import pytest import torch @@ -47,102 +43,6 @@ def generate_test_data(bsize, nx, ny, nz, num_neigh, device): } -@pytest.fixture -def base_model_params(): - """Base model parameters for testing""" - - @dataclass - class model_params: - @dataclass - class geometry_rep: - @dataclass - class geo_conv: - base_neurons: int = 32 - base_neurons_in: int = 8 - base_neurons_out: int = 8 - surface_hops: int = 1 - volume_hops: int = 1 - volume_radii: Sequence = (0.1, 0.5) - volume_neighbors_in_radius: Sequence = (10, 10) - surface_radii: Sequence = (0.05,) - surface_neighbors_in_radius: Sequence = (10,) - activation: str = "relu" - fourier_features: bool = False - num_modes: int = 5 - - @dataclass - class geo_processor: - base_filters: int = 8 - activation: str = "relu" - processor_type: str = "unet" - self_attention: bool = True - cross_attention: bool = False - - base_filters: int = 8 - geo_conv = geo_conv - geo_processor = geo_processor - - @dataclass - class geometry_local: - base_layer: int = 512 - volume_neighbors_in_radius: Sequence = (128, 128) - surface_neighbors_in_radius: Sequence = (128,) - volume_radii: Sequence = (0.05, 0.1) - surface_radii: Sequence = (0.05,) - - @dataclass - class nn_basis_functions: - base_layer: int = 512 - fourier_features: bool = False - num_modes: int = 5 - activation: str = "relu" - - @dataclass - class local_point_conv: - activation: str = "relu" - - @dataclass - class aggregation_model: - base_layer: int = 512 - activation: str = "relu" - - @dataclass - class position_encoder: - base_neurons: int = 512 - activation: str = "relu" - fourier_features: bool = False - num_modes: int = 5 - - @dataclass - class parameter_model: - base_layer: int = 512 - fourier_features: bool = True - num_modes: int = 5 - activation: str = "relu" - - model_type: str = "combined" - activation: str = "relu" - interp_res: Sequence = (64, 64, 64) # Smaller for testing - use_sdf_in_basis_func: bool = True - positional_encoding: bool = False - surface_neighbors: bool = True - num_neighbors_surface: int = 7 - num_neighbors_volume: int = 7 - use_surface_normals: bool = True - use_surface_area: bool = True - encode_parameters: bool = False - combine_volume_surface: bool = False - geometry_encoding_type: str = "both" - solution_calculation_mode: str = "two-loop" - geometry_rep = geometry_rep - nn_basis_functions = nn_basis_functions - aggregation_model = aggregation_model - position_encoder = position_encoder - geometry_local = geometry_local - - return model_params - - def validate_output_shape_and_values(output, expected_shape, check_finite=True): """Validate output tensor shape and values""" if output is not None: From 5732199eb9783b6065b68c27a6ee4e17cc309f77 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:50:13 +0000 Subject: [PATCH 38/98] This commit purges some code that was moved into another commit. --- .../distributed/shard_utils/__init__.py | 2 - physicsnemo/distributed/shard_utils/knn.py | 212 ------------------ .../distributed/shard_utils/mesh_ops.py | 146 ------------ test/distributed/shard_tensor/ops/test_knn.py | 72 ------ test/distributed/shard_tensor/ops/test_sdf.py | 119 ---------- 5 files changed, 551 deletions(-) delete mode 100644 physicsnemo/distributed/shard_utils/knn.py delete mode 100644 physicsnemo/distributed/shard_utils/mesh_ops.py delete mode 100644 test/distributed/shard_tensor/ops/test_knn.py delete mode 100644 test/distributed/shard_tensor/ops/test_sdf.py diff --git a/physicsnemo/distributed/shard_utils/__init__.py b/physicsnemo/distributed/shard_utils/__init__.py index e332307869..ef7b219aa2 100644 --- a/physicsnemo/distributed/shard_utils/__init__.py +++ b/physicsnemo/distributed/shard_utils/__init__.py @@ -37,8 +37,6 @@ def register_shard_wrappers(): sharded_select_backward_helper, sharded_select_helper, ) - from .knn import knn_sharded_wrapper - from .mesh_ops import sharded_signed_distance_field_wrapper # Currently disabled until wrapt is removed # from .natten_patches import na2d_wrapper diff --git a/physicsnemo/distributed/shard_utils/knn.py b/physicsnemo/distributed/shard_utils/knn.py deleted file mode 100644 index 689223000c..0000000000 --- a/physicsnemo/distributed/shard_utils/knn.py +++ /dev/null @@ -1,212 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Any, Callable - -import numpy as np -import torch -import torch.distributed as dist - -from physicsnemo.utils.neighbors.knn._cuml_impl import knn_impl -from physicsnemo.utils.version_check import check_module_requirements - -check_module_requirements("physicsnemo.distributed.shard_tensor") - -from physicsnemo.distributed import ShardTensor # noqa: E402 -from physicsnemo.distributed.shard_utils.patch_core import ( # noqa: E402 - MissingShardPatch, -) -from physicsnemo.distributed.shard_utils.ring import ( # noqa: E402 - RingPassingConfig, - perform_ring_iteration, -) - - -def ring_knn( - points: ShardTensor, queries: ShardTensor, k: int -) -> tuple[torch.Tensor, torch.Tensor]: - """ - Ring based kNN implementation, where the points travel around a ring and the - queries stay local. - """ - # Each tensor has a _spec attribute, which contains information about the tensor's placement - # and the devices it lives on: - points_spec = points._spec - queries_spec = queries._spec - - # ** In general ** you want to do some checking on the placements, since each - # point cloud might be sharded differently. By construction, I know they're both - # sharded along the points axis here (and not, say, replicated). - - if not points_spec.mesh == queries_spec.mesh: - raise NotImplementedError("Tensors must be sharded on the same mesh") - - mesh = points_spec.mesh - local_group = mesh.get_group(0) - local_size = dist.get_world_size(group=local_group) - mesh_rank = mesh.get_local_rank() - - # points and queries are both sharded - and since we're returning the nearest - # neighbors to points, let's make sure the output keeps that sharding too. - - # One memory-efficient way to do this is with with a ring computation. - # We'll compute the knn on the local tensors, get the distances and outputs, - # then shuffle the queries shards along the mesh. - - # we'll need to sort the results and make sure we have just the top-k, - # which is a little extra computation. - - # Physics nemo has a ring passing utility we can use. - ring_config = RingPassingConfig( - mesh_dim=0, - mesh_size=local_size, - ring_direction="forward", - communication_method="p2p", - ) - - local_points, local_queries = points.to_local(), queries.to_local() - current_dists = None - current_topk_idx = None - - points_spec = points._spec - - points_sharding_shapes = points_spec.sharding_shapes()[0] - - sharding_dim = points_spec.placements[0].dim - - # This is to help specify the offset from local to global tensor. - points_strides_along_ring = [s[sharding_dim] for s in points_sharding_shapes] - points_strides_along_ring = np.cumsum(points_strides_along_ring) - points_strides_along_ring = [ - 0, - ] + list(points_strides_along_ring[0:-1]) - - for i in range(local_size): - source_rank = (mesh_rank - i) % local_size - - # For point clouds, we need to pass the size of the incoming shard. - next_source_rank = (source_rank - 1) % local_size - recv_shape = points_sharding_shapes[next_source_rank] - if i != local_size - 1: - # Don't do a ring on the last iteration. - next_local_points = perform_ring_iteration( - local_points, - mesh, - ring_config, - recv_shape=recv_shape, - ) - - # Compute the knn on the local tensors: - local_idx, local_distances = knn_impl(local_points, local_queries, k) - - # The local_idx indexes into the _local_ tensor, but for - # Correctness we need it to index into the _global_ tensor. - # Make sure to index using the rank the points came from! - offset = points_strides_along_ring[source_rank] - local_idx = local_idx + offset - - if current_dists is None: - current_dists = local_distances - current_topk_idx = local_idx - else: - # Combine with the topk so far: - current_dists = torch.cat([current_dists, local_distances], dim=1) - current_topk_idx = torch.cat([current_topk_idx, local_idx], dim=1) - # And take the topk again: - current_dists, running_indexes = torch.topk( - current_dists, k=k, dim=1, sorted=True, largest=False - ) - - # This creates proper indexing to select specific elements along dim 1 - - current_topk_idx = torch.gather(current_topk_idx, 1, running_indexes) - - if i != local_size - 1: - # Don't do a ring on the last iteration. - local_points = next_local_points - - return current_topk_idx, current_dists - - -def extract_knn_args(points, queries, k, *args, **kwargs): - return points, queries, k - - -def knn_sharded_wrapper( - func: Callable, types: Any, args: tuple, kwargs: dict -) -> tuple[ShardTensor, ShardTensor]: - """ - Dispatch the proper kNN tools based on the input sharding. - """ - - points, queries, k = extract_knn_args(*args, **kwargs) - - # kNN will only work with 1D sharding - if points._spec.mesh != queries._spec.mesh: - raise MissingShardPatch( - "sharded knn: All point inputs must be on the same mesh" - ) - - # make sure all meshes are 1D - if points._spec.mesh.ndim != 1: - raise MissingShardPatch( - "point_cloud_ops.radius_search_wrapper: All point inputs must be on 1D meshes" - ) - - # Do we need a ring? - points_placement = points._spec.placements[0] - - if points_placement.is_shard(): - # We need a ring - idx, distances = ring_knn(points, queries, k) - else: - # No ring is needed. Get the local tensors and compute directly: - local_points = points.to_local() # This is replicated, getting all of it - local_queries = queries.to_local() # This sharding doesn't matter! - idx, distances = knn_impl(local_points, local_queries, k) - - # The outputs only depend on the local queries shape - input_queries_spec = queries._spec - # The global output tensor will be (N_q, k) - - output_queries_shard_shapes = { - mesh_dim: tuple( - torch.Size((s[0], k)) - for s in input_queries_spec.sharding_shapes()[mesh_dim] - ) - for mesh_dim in input_queries_spec.sharding_shapes().keys() - } - - # Convert the selected points and indexes to shards: - shard_idx = ShardTensor.from_local( - idx, - queries._spec.mesh, - queries._spec.placements, - sharding_shapes=output_queries_shard_shapes, - ) - shard_distances = ShardTensor.from_local( - distances, - queries._spec.mesh, - queries._spec.placements, - sharding_shapes=output_queries_shard_shapes, - ) - - return shard_idx, shard_distances - - -ShardTensor.register_named_function_handler( - "physicsnemo.knn_cuml.default", knn_sharded_wrapper -) diff --git a/physicsnemo/distributed/shard_utils/mesh_ops.py b/physicsnemo/distributed/shard_utils/mesh_ops.py deleted file mode 100644 index c04ad66a89..0000000000 --- a/physicsnemo/distributed/shard_utils/mesh_ops.py +++ /dev/null @@ -1,146 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Any - -import torch - -from physicsnemo.utils.sdf import signed_distance_field -from physicsnemo.utils.version_check import check_module_requirements - -check_module_requirements("physicsnemo.distributed.shard_tensor") - - -from physicsnemo.distributed import ShardTensor # noqa: E402 - - -def sharded_signed_distance_field( - mesh_vertices: ShardTensor, - mesh_indices: ShardTensor, - input_points: ShardTensor, - max_dist: float = 1e8, - use_sign_winding_number: bool = False, -) -> tuple[ShardTensor, ShardTensor]: - """ - Compute the signed distance field for a (possibly sharded) mesh. - - Args: - mesh_vertices: Sharded tensor of mesh vertices - mesh_indices: Sharded tensor of mesh indices - input_points: Sharded tensor of input points - max_dist: Maximum distance for the signed distance field - use_sign_winding_number: Whether to use sign winding number - """ - - # We can not actually compute the signed distance function on a sharded mesh. - # So, in this case, force the mesh to replicate placement if necessary: - - local_mesh_vertices = mesh_vertices.full_tensor() - local_mesh_indices = mesh_indices.full_tensor() - - # For the input points, though, it doesn't matter - they can be sharded. - # No communication is necessary - - local_input_points = input_points.to_local() - - local_sdf, local_sdf_hit_point = signed_distance_field( - local_mesh_vertices, - local_mesh_indices, - local_input_points, - max_dist, - use_sign_winding_number, - ) - - # Then, construct the output shard tensors: - - if input_points._spec.placements[0].is_shard(): - # Compute the output sharding shapes - - # Output shape is always (N, 1), hit point is (N, 3) - input_shard_shapes = input_points._spec.sharding_shapes() - - output_shard_shapes = { - mesh_dim: tuple(torch.Size((s[0],)) for s in input_shard_shapes[mesh_dim]) - for mesh_dim in input_shard_shapes.keys() - } - - sharded_sdf_output = ShardTensor.from_local( - local_sdf, - input_points._spec.mesh, - input_points._spec.placements, - sharding_shapes=output_shard_shapes, - ).reshape(input_points.shape[:-1]) - - sharded_sdf_hit_point_output = ShardTensor.from_local( - local_sdf_hit_point, - input_points._spec.mesh, - input_points._spec.placements, - sharding_shapes=input_shard_shapes, - ).reshape(input_points.shape) - - else: - # The input points were replicated, use that for output: - sharded_sdf_output = ShardTensor.from_local( - local_sdf, - input_points._spec.mesh, - input_points._spec.placements, - ) - sharded_sdf_hit_point_output = ShardTensor.from_local( - local_sdf_hit_point, - input_points._spec.mesh, - input_points._spec.placements, - ) - - return sharded_sdf_output, sharded_sdf_hit_point_output - - -def repackage_radius_search_wrapper_args( - mesh_vertices: torch.Tensor, - mesh_indices: torch.Tensor, - input_points: torch.Tensor, - max_dist: float = 1e8, - use_sign_winding_number: bool = False, - *args, - **kwargs, -) -> tuple[ShardTensor, ShardTensor, dict]: - """Repackages sdf arguments into a standard format.""" - # Extract any additional parameters that might be in kwargs - # or use defaults if not provided - return_kwargs = { - "max_dist": max_dist, - "use_sign_winding_number": use_sign_winding_number, - } - - # Add any explicitly passed parameters - if kwargs: - return_kwargs.update(kwargs) - - return mesh_vertices, mesh_indices, input_points, return_kwargs - - -def sharded_signed_distance_field_wrapper( - func: Any, type: Any, args: tuple, kwargs: dict -) -> tuple[ShardTensor, ShardTensor]: - """ - Wrapper for sharded_signed_distance_field to support sharded tensors. - """ - - return sharded_signed_distance_field(*args, **kwargs) - - -ShardTensor.register_named_function_handler( - "physicsnemo.signed_distance_field.default", sharded_signed_distance_field_wrapper -) diff --git a/test/distributed/shard_tensor/ops/test_knn.py b/test/distributed/shard_tensor/ops/test_knn.py deleted file mode 100644 index b41b62748a..0000000000 --- a/test/distributed/shard_tensor/ops/test_knn.py +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -import torch -from torch.distributed.tensor.placement_types import Replicate, Shard - -from physicsnemo.distributed import DistributedManager, scatter_tensor -from physicsnemo.utils.neighbors import knn - -from .utils import numerical_shard_tensor_check - - -class kNNModule(torch.nn.Module): - def __init__( - self, - num_neighbors=4, - ): - super().__init__() - - self.num_neighbors = num_neighbors - - def forward(self, points, queries): - return knn(points, queries, self.num_neighbors) - - -@pytest.mark.multigpu_static -@pytest.mark.parametrize("scatter_points", [True, False]) -@pytest.mark.parametrize("scatter_queries", [True, False]) -def test_knn_1dmesh( - distributed_mesh, - scatter_points: bool, - scatter_queries: bool, -): - dm = DistributedManager() - - # Generate random points for the points and queries - points = torch.randn(1043, 3).to(dm.device) - queries = torch.randn(2198, 3).to(dm.device) - - # points = torch.randn(10, 3).to(dm.device) - # queries = torch.randn(8, 3).to(dm.device) - - # Distribute the inputs: - points_placements = (Shard(0),) if scatter_points else (Replicate(),) - queries_placements = (Shard(0),) if scatter_queries else (Replicate(),) - - sharded_points = scatter_tensor(points, 0, distributed_mesh, points_placements) - sharded_queries = scatter_tensor(queries, 0, distributed_mesh, queries_placements) - - module = kNNModule() - - numerical_shard_tensor_check( - distributed_mesh, - module, - [sharded_points, sharded_queries], - {}, - check_grads=False, - ) diff --git a/test/distributed/shard_tensor/ops/test_sdf.py b/test/distributed/shard_tensor/ops/test_sdf.py deleted file mode 100644 index 079127a229..0000000000 --- a/test/distributed/shard_tensor/ops/test_sdf.py +++ /dev/null @@ -1,119 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import pytest -import torch -from scipy.spatial import ConvexHull -from torch.distributed.tensor.placement_types import Replicate, Shard - -from physicsnemo.distributed import DistributedManager, scatter_tensor -from physicsnemo.utils.sdf import signed_distance_field - -from .utils import numerical_shard_tensor_check - - -# This is from the domino datapipe, too: -def random_sample_on_unit_sphere(n_points): - # Random points on the sphere: - phi = np.random.uniform(0, 2 * np.pi, n_points) - cos_theta = np.random.uniform(-1, 1, n_points) - theta = np.arccos(cos_theta) - - # Convert to x/y/z and stack: - x = np.sin(theta) * np.cos(phi) - y = np.sin(theta) * np.sin(phi) - z = np.cos(theta) - points = np.stack([x, y, z], axis=1) - return points - - -def mesh_vertices_and_indices(n_points): - # We are generating a mesh on a random sphere. - stl_points = random_sample_on_unit_sphere(n_points) - - # Generate the triangles with ConvexHull: - hull = ConvexHull(stl_points) - faces = hull.simplices # (M, 3) - - return stl_points, faces - - -class SDFModule(torch.nn.Module): - """ - This is a test module to run the SDF function ... don't use it elsewhere. - """ - - def __init__(self, max_dist=1e8, use_sign_winding_number=False): - super().__init__() - - self.max_dist = max_dist - self.use_sign_winding_number = use_sign_winding_number - - def forward(self, mesh_vertices, mesh_indices, input_points): - return signed_distance_field( - mesh_vertices, - mesh_indices, - input_points, - self.max_dist, - self.use_sign_winding_number, - ) - - -@pytest.mark.multigpu_static -@pytest.mark.parametrize("scatter_mesh", [True, False]) -@pytest.mark.parametrize("scatter_inputs", [True, False]) -def test_sdf_1dmesh( - distributed_mesh, - scatter_mesh: bool, - scatter_inputs: bool, -): - dm = DistributedManager() - - # Generate a mesh on a unit sphere: - mesh_vertices, mesh_indices = mesh_vertices_and_indices(932) - - # Cast the vertices and indices to tensors: - mesh_vertices = torch.tensor(mesh_vertices).to(dm.device) - mesh_indices = torch.tensor(mesh_indices.flatten()).to(dm.device) - - # Distribute the inputs: - mesh_placements = (Shard(0),) if scatter_mesh else (Replicate(),) - input_placements = (Shard(0),) if scatter_inputs else (Replicate(),) - - sharded_mesh_vertices = scatter_tensor( - mesh_vertices, 0, distributed_mesh, mesh_placements - ) - sharded_mesh_indices = scatter_tensor( - mesh_indices, 0, distributed_mesh, mesh_placements - ) - - # Generate random points in the volume: - input_points = torch.randn(1043, 3).to(dm.device) - - sharded_input_points = scatter_tensor( - input_points, 0, distributed_mesh, input_placements - ) - - module = SDFModule() - - numerical_shard_tensor_check( - distributed_mesh, - module, - [sharded_mesh_vertices, sharded_mesh_indices, sharded_input_points], - {}, - check_grads=False, - ) From b2b10ad6a1d2895b84fb65d5ba0f1b450ac07dad Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:51:32 +0000 Subject: [PATCH 39/98] Missed a piece of moved code. --- test/distributed/shard_tensor/ops/utils.py | 44 ---------------------- 1 file changed, 44 deletions(-) diff --git a/test/distributed/shard_tensor/ops/utils.py b/test/distributed/shard_tensor/ops/utils.py index cdece254e7..19e3de4d73 100644 --- a/test/distributed/shard_tensor/ops/utils.py +++ b/test/distributed/shard_tensor/ops/utils.py @@ -18,7 +18,6 @@ from collections.abc import Iterable import torch -import torch.distributed as dist from torch.distributed.tensor import DTensor, distribute_module from torch.distributed.tensor.device_mesh import DeviceMesh @@ -84,18 +83,6 @@ def sharded_to_local(container): def default_tensor_comparison(output, d_output, atol, rtol): # We assume a single output! - if not isinstance(output, torch.Tensor): - if isinstance(output, Iterable): - return all( - [ - default_tensor_comparison(item, d_item, atol, rtol) - for item, d_item in zip(output, d_output) - ] - ) - - if isinstance(d_output, ShardTensor): - validate_shard_tensor_spec(d_output) - local_output = sharded_to_local(d_output) # Check forward agreement: @@ -108,37 +95,6 @@ def default_loss_fn(output): return output.mean() -def validate_shard_tensor_spec(shard_tensor): - # Take a shard tensor and cross check on the dimensions. - # Take care about assertions here, since this is a collective - - # Check out shard shapes - # The local shard shape needs to match the local tensor shape: - sharding_shapes = shard_tensor._spec.sharding_shapes() - mesh = shard_tensor._spec.mesh - - for mesh_dim in range(mesh.ndim): - mesh_rank = mesh.get_local_rank(mesh_dim) - mesh_size = dist.get_world_size(mesh.get_group(mesh_dim)) - - # Is this axis sharded? - this_placement = shard_tensor._spec.placements[mesh_dim] - if this_placement.is_shard(): - # This axis is sharded. the mesh dim should be in the shapes - assert mesh_dim in sharding_shapes.keys() - - # The length of the sharding shapes should match the mesh size: - assert len(sharding_shapes[mesh_dim]) == mesh_size - - # The local shape should match the listed shape for this rank: - # this_shape = shard_tensor._spec.sharding_shapes()[mesh_dim] - # print(f"local tensor shape: {shard_tensor._local_tensor.shape}") - # print(f"sharding shapes: {sharding_shapes[mesh_dim][mesh_rank]}") - assert ( - sharding_shapes[mesh_dim][mesh_rank] == shard_tensor._local_tensor.shape - ) - - def numerical_shard_tensor_check( mesh: DeviceMesh, module: torch.nn.Module, From 8a34242b0f9309da2edd495684ffb4c289fda4fd Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 22 Sep 2025 17:59:06 +0000 Subject: [PATCH 40/98] move sdf, knn, and radius_search torch interface and stream fixes to a different PR --- physicsnemo/utils/neighbors/knn/_cuml_impl.py | 11 +- .../neighbors/radius_search/_warp_impl.py | 167 +++++++++--------- physicsnemo/utils/sdf.py | 157 ++++++++-------- 3 files changed, 158 insertions(+), 177 deletions(-) diff --git a/physicsnemo/utils/neighbors/knn/_cuml_impl.py b/physicsnemo/utils/neighbors/knn/_cuml_impl.py index 72546cf6a7..10d20ce1f5 100644 --- a/physicsnemo/utils/neighbors/knn/_cuml_impl.py +++ b/physicsnemo/utils/neighbors/knn/_cuml_impl.py @@ -28,21 +28,12 @@ def knn_impl( points: torch.Tensor, queries: torch.Tensor, k: int = 3 ) -> tuple[torch.Tensor, torch.Tensor]: - # Create a cuml handle to ensure we use the right stream: - torch_stream = torch.cuda.current_stream() - - # Get the raw CUDA stream pointer (as an integer) - ptr = torch_stream.cuda_stream - - # Build a cuML handle with that stream - handle = cuml.Handle(stream=ptr) - # Use dlpack to move the data without copying between pytorch and cuml: points = cp.from_dlpack(points) queries = cp.from_dlpack(queries) # Construct the knn: - knn = cuml.neighbors.NearestNeighbors(n_neighbors=k, handle=handle) + knn = cuml.neighbors.NearestNeighbors(n_neighbors=k) # First pass partitions everything in points to make lookups fast knn.fit(points) diff --git a/physicsnemo/utils/neighbors/radius_search/_warp_impl.py b/physicsnemo/utils/neighbors/radius_search/_warp_impl.py index 997f95d3fd..9b15b6816f 100644 --- a/physicsnemo/utils/neighbors/radius_search/_warp_impl.py +++ b/physicsnemo/utils/neighbors/radius_search/_warp_impl.py @@ -278,6 +278,11 @@ def radius_search_impl( if points.device != queries.device: raise ValueError("points and queries must be on the same device") + # We're in the warp-backended regime. So, the first thing to do is to convert these torch tensors to warp + # These are readonly in warp, allocated with pytorch. + wp_points = wp.from_torch(points, dtype=wp.vec3) + wp_queries = wp.from_torch(queries, dtype=wp.vec3, return_ctype=True) + N_queries = len(queries) # Compute follows data. @@ -292,104 +297,92 @@ def radius_search_impl( wp_launch_stream = None wp_launch_device = "cpu" # CPUs have no streams - with wp.ScopedStream(wp_launch_stream): - # We're in the warp-backended regime. So, the first thing to do is to convert these torch tensors to warp - # These are readonly in warp, allocated with pytorch. - wp_points = wp.from_torch(points, dtype=wp.vec3) - wp_queries = wp.from_torch(queries, dtype=wp.vec3, return_ctype=True) + # We need to create a hash grid: + grid = wp.HashGrid(dim_x=128, dim_y=128, dim_z=128, device=wp_points.device) + grid.reserve(N_queries) + grid.build(points=wp_points, radius=0.5 * radius) - # We need to create a hash grid: - grid = wp.HashGrid(dim_x=128, dim_y=128, dim_z=128, device=wp_points.device) - grid.reserve(N_queries) - grid.build(points=wp_points, radius=0.5 * radius) + # Now, the situations diverge based on max_points. - # Now, the situations diverge based on max_points. + if max_points is None: + total_count, wp_offset = count_neighbors( + grid, + wp_points, + wp_queries, + wp_launch_device, + wp_launch_stream, + radius, + N_queries, + ) - if max_points is None: - total_count, wp_offset = count_neighbors( - grid, - wp_points, - wp_queries, - wp_launch_device, - wp_launch_stream, - radius, - N_queries, + if not total_count < 2**31 - 1: + raise RuntimeError( + f"Total found neighbors is too large: {total_count} > 2**31 - 1" ) - if not total_count < 2**31 - 1: - raise RuntimeError( - f"Total found neighbors is too large: {total_count} > 2**31 - 1" - ) + return gather_neighbors( + grid, + points.device, + wp_points, + wp_queries, + wp_offset, + wp_launch_device, + wp_launch_stream, + radius, + N_queries, + return_dists, + return_points, + total_count, + ) - return gather_neighbors( - grid, - points.device, - wp_points, - wp_queries, - wp_offset, - wp_launch_device, - wp_launch_stream, - radius, - N_queries, - return_dists, - return_points, - total_count, + else: + # With a fixed number of output points, we have no need for a second kernel. + indices = torch.full( + (N_queries, max_points), 0, dtype=torch.int32, device=points.device + ) + if return_dists: + distances = torch.zeros( + (N_queries, max_points), dtype=torch.float32, device=points.device ) - else: - # With a fixed number of output points, we have no need for a second kernel. - indices = torch.full( - (N_queries, max_points), 0, dtype=torch.int32, device=points.device + distances = torch.empty(0, dtype=torch.float32, device=points.device) + num_neighbors = torch.zeros( + (N_queries,), dtype=torch.int32, device=points.device + ) + + if return_points: + points = torch.zeros( + (len(queries), max_points, 3), + dtype=torch.float32, + device=points.device, ) - if return_dists: - distances = torch.zeros( - (N_queries, max_points), - dtype=torch.float32, - device=points.device, - ) - else: - distances = torch.empty( - 0, dtype=torch.float32, device=points.device - ) - num_neighbors = torch.zeros( - (N_queries,), dtype=torch.int32, device=points.device + else: + points = torch.empty( + (0, max_points, 3), dtype=torch.float32, device=points.device ) + # This kernel selects up to max_points hits per query. + # It is not necessarily deterministic. + # If the number of matches > max_points, you may get different results. - if return_points: - points = torch.zeros( - (len(queries), max_points, 3), - dtype=torch.float32, - device=points.device, - ) - else: - points = torch.empty( - (0, max_points, 3), dtype=torch.float32, device=points.device - ) - # This kernel selects up to max_points hits per query. - # It is not necessarily deterministic. - # If the number of matches > max_points, you may get different results. - - wp.launch( - kernel=radius_search_limited_select, - dim=N_queries, - inputs=[ - grid.id, - wp_points, - wp_queries, - max_points, - radius, - wp.from_torch(indices, return_ctype=True), - wp.from_torch(num_neighbors, return_ctype=True), - return_dists, - wp.from_torch(distances, return_ctype=True), - return_points, - wp.from_torch(points, return_ctype=True) - if return_points - else None, - ], - stream=wp_launch_stream, - device=wp_launch_device, - ) + wp.launch( + kernel=radius_search_limited_select, + dim=N_queries, + inputs=[ + grid.id, + wp_points, + wp_queries, + max_points, + radius, + wp.from_torch(indices, return_ctype=True), + wp.from_torch(num_neighbors, return_ctype=True), + return_dists, + wp.from_torch(distances, return_ctype=True), + return_points, + wp.from_torch(points, return_ctype=True) if return_points else None, + ], + stream=wp_launch_stream, + device=wp_launch_device, + ) # Handle the matrix of return values: return indices, points, distances, num_neighbors diff --git a/physicsnemo/utils/sdf.py b/physicsnemo/utils/sdf.py index 9abbafa460..a095074a96 100644 --- a/physicsnemo/utils/sdf.py +++ b/physicsnemo/utils/sdf.py @@ -14,7 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import torch +import cupy as cp +import numpy as np import warp as wp wp.config.quiet = True @@ -27,6 +28,7 @@ def _bvh_query_distance( max_dist: wp.float32, sdf: wp.array(dtype=wp.float32), sdf_hit_point: wp.array(dtype=wp.vec3f), + sdf_hit_point_id: wp.array(dtype=wp.int32), use_sign_winding_number: bool = False, ): """ @@ -65,16 +67,22 @@ def _bvh_query_distance( sdf[tid] = res.sign * wp.abs(wp.length(points[tid] - p_closest)) sdf_hit_point[tid] = p_closest + sdf_hit_point_id[tid] = res.face + + +Array = np.ndarray | cp.ndarray -@torch.library.custom_op("physicsnemo::signed_distance_field", mutates_args=()) def signed_distance_field( - mesh_vertices: torch.Tensor, - mesh_indices: torch.Tensor, - input_points: torch.Tensor, + mesh_vertices: Array, + mesh_indices: Array, + input_points: Array, max_dist: float = 1e8, + include_hit_points: bool = False, + include_hit_points_id: bool = False, use_sign_winding_number: bool = False, -) -> tuple[torch.Tensor, torch.Tensor]: + return_cupy: bool | None = None, +) -> Array | tuple[Array, ...]: """ Computes the signed distance field (SDF) for a given mesh and input points. @@ -92,7 +100,11 @@ def signed_distance_field( max_dist (float, optional): Maximum distance within which to search for the closest point on the mesh. Default is 1e8. include_hit_points (bool, optional): Whether to include hit points in - the output. Here, + the output. Here, "hit points" are the points on the mesh that are + closest to the input points, and hence, are defining the SDF. + Default is False. + include_hit_points_id (bool, optional): Whether to include hit point + IDs in the output. Default is False. use_sign_winding_number (bool, optional): Whether to use sign winding number method for SDF. Default is False. If False, your mesh should be watertight to obtain correct results. @@ -103,103 +115,88 @@ def signed_distance_field( Returns: ------- Returns: - tuple[torch.Tensor, torch.Tensor] of: - - signed distance to the mesh, per input point - - hith point, per input point. "hit points" are the points on the - mesh that are closest to the input points, and hence, are - defining the SDF. + np.ndarray | cp.ndarray or tuple: + - If both `include_hit_points` and `include_hit_points_id` are False + (default), returns a 1D array of signed distances for each input + point. + - If `include_hit_points` is True, returns a tuple: (sdf, + hit_points), where `hit_points` contains the closest mesh point + for each input point. + - If `include_hit_points_id` is True, returns a tuple: (sdf, + hit_point_ids), where `hit_point_ids` contains the face index of + the closest mesh face for each input point. + - If both `include_hit_points` and `include_hit_points_id` are True, + returns a tuple: (sdf, hit_points, hit_point_ids). + - The returned array type (NumPy or CuPy) is determined by the + `return_cupy` argument, or inferred from the input arrays. Example: ------- >>> mesh_vertices = [(0, 0, 0), (1, 0, 0), (0, 1, 0)] - >>> mesh_indices = torch.tensor((0, 1, 2)) - >>> input_points = torch.tensor((0.5, 0.5, 0.5)) + >>> mesh_indices = np.array((0, 1, 2)) + >>> input_points = [(0.5, 0.5, 0.5)] >>> signed_distance_field(mesh_vertices, mesh_indices, input_points) - (tensor([0.5]), tensor([0.5, 0.5, 0.5])) + array([0.5], dtype=float32) """ + if return_cupy is None: + return_cupy = any( + isinstance(arr, cp.ndarray) + for arr in (mesh_vertices, mesh_indices, input_points) + ) - if input_points.shape[-1] != 3: - raise ValueError("Input points must be a tensor with last dimension of size 3") - - input_shape = input_points.shape - - # Flatten the input points: - input_points = input_points.reshape(-1, 3) - - N = len(input_points) + wp.init() - # Allocate output tensors with torch: - sdf = torch.zeros(N, dtype=torch.float32, device=input_points.device) - sdf_hit_point = torch.zeros(N, 3, dtype=torch.float32, device=input_points.device) + if isinstance(mesh_vertices, cp.ndarray): + device = mesh_vertices.device + wp_device = f"cuda:{device.id}" + else: + wp_device = wp.get_device() - if input_points.device.type == "cuda": - wp_launch_stream = wp.stream_from_torch( - torch.cuda.current_stream(input_points.device) + with wp.ScopedDevice(wp_device): + mesh = wp.Mesh( + points=wp.array(mesh_vertices, dtype=wp.vec3f, device=wp_device), + indices=wp.array(mesh_indices, dtype=wp.int32, device=wp_device), ) - wp_launch_device = None # We explicitly pass None if using the stream. - else: - wp_launch_stream = None - wp_launch_device = "cpu" # CPUs have no streams - with wp.ScopedStream(wp_launch_stream): - wp.init() + warp_input_points = wp.array(input_points, dtype=wp.vec3f, device=wp_device) - # zero copy the vertices, indices, and input points to warp: - wp_vertices = wp.from_torch(mesh_vertices.to(torch.float32), dtype=wp.vec3) - wp_indices = wp.from_torch(mesh_indices.to(torch.int32), dtype=wp.int32) - wp_input_points = wp.from_torch(input_points.to(torch.float32), dtype=wp.vec3) + N = len(warp_input_points) - # Convert output points: - wp_sdf = wp.from_torch(sdf, dtype=wp.float32) - wp_sdf_hit_point = wp.from_torch(sdf_hit_point, dtype=wp.vec3f) - - mesh = wp.Mesh( - points=wp_vertices, - indices=wp_indices, - support_winding_number=use_sign_winding_number, - ) + sdf = wp.empty(shape=(N,), dtype=wp.float32, device=wp_device) + sdf_hit_point = wp.empty(shape=(N,), dtype=wp.vec3f, device=wp_device) + sdf_hit_point_id = wp.empty(shape=(N,), dtype=wp.int32, device=wp_device) wp.launch( kernel=_bvh_query_distance, dim=N, inputs=[ mesh.id, - wp_input_points, + warp_input_points, max_dist, - wp_sdf, - wp_sdf_hit_point, + sdf, + sdf_hit_point, + sdf_hit_point_id, use_sign_winding_number, ], - device=wp_launch_device, - stream=wp_launch_stream, + device=wp_device, ) - # Unflatten the output to be like the input: - sdf = sdf.reshape(input_shape[:-1]) - sdf_hit_point = sdf_hit_point.reshape(input_shape) - - return sdf.to(input_points.dtype), sdf_hit_point.to(input_points.dtype) + def convert(array: wp.array) -> np.ndarray | cp.ndarray: + """Converts a Warp array to CuPy/NumPy based on the `return_cupy` flag.""" + if return_cupy: + return cp.asarray(array) + else: + return array.numpy() + arrays_to_return: list[np.ndarray | cp.ndarray] = [convert(sdf)] -@signed_distance_field.register_fake -def _( - mesh_vertices: torch.Tensor, - mesh_indices: torch.Tensor, - input_points: torch.Tensor, - max_dist: float = 1e8, - use_sign_winding_number: bool = False, -) -> tuple[torch.Tensor, torch.Tensor]: - if mesh_vertices.device != input_points.device: - raise RuntimeError("mesh_vertices and input_points must be on the same device") + if include_hit_points: + arrays_to_return.append(convert(sdf_hit_point)) + if include_hit_points_id: + arrays_to_return.append(convert(sdf_hit_point_id)) - if mesh_vertices.device != mesh_indices.device: - raise RuntimeError("mesh_vertices and mesh_indices must be on the same device") - - N = input_points.shape[0] - - sdf_output = torch.empty(N, 1, device=input_points.device, dtype=input_points.dtype) - sdf_hit_point_output = torch.empty( - N, 3, device=input_points.device, dtype=input_points.dtype - ) - - return sdf_output, sdf_hit_point_output + return ( + arrays_to_return[0] + if len(arrays_to_return) == 1 + else tuple(arrays_to_return) + ) From 3c0a551fd15473bae039fbae9c4e2441d04032f1 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 22 Sep 2025 18:02:14 +0000 Subject: [PATCH 41/98] Move sdf test changes to a different PR --- test/utils/test_sdf.py | 51 ++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/test/utils/test_sdf.py b/test/utils/test_sdf.py index f449469b5a..107e5e0316 100644 --- a/test/utils/test_sdf.py +++ b/test/utils/test_sdf.py @@ -16,13 +16,12 @@ # ruff: noqa: E402 -import pytest -import torch +import numpy as np from pytest_utils import import_or_fail def tet_verts(flip_x=1): - tet = torch.tensor( + tet = np.array( [ flip_x * 0, 0, @@ -61,47 +60,35 @@ def tet_verts(flip_x=1): 0, 1, ], - dtype=torch.float64, + dtype=np.float64, ) return tet @import_or_fail("warp") -@pytest.mark.parametrize("dtype", [torch.float32, torch.float64]) -@pytest.mark.parametrize("device", ["cpu", "cuda"]) -def test_sdf(pytestconfig, dtype, device): +def test_sdf(pytestconfig): from physicsnemo.utils.sdf import signed_distance_field - mesh_vertices = tet_verts().reshape(-1, 3) + tet = tet_verts() - if device == "cuda": - device = torch.device("cuda") - else: - device = torch.device("cpu") - - mesh_indices = torch.tensor( - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=torch.int32 + sdf_tet = signed_distance_field( + tet, + np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), + np.array([1, 1, 1, 0.1, 0.1, 0.1], dtype=np.float64), ) - input_points = torch.tensor([[1, 1, 1], [0.05, 0.1, 0.1]], dtype=torch.float64) - - mesh_vertices = mesh_vertices.to(dtype) - input_points = input_points.to(dtype) + np.testing.assert_allclose(sdf_tet, [1.15470052, -0.1], atol=1e-7) - sdf_tet, sdf_hit_point = signed_distance_field( - mesh_vertices, - mesh_indices, - input_points, - use_sign_winding_number=False, + sdf_tet, sdf_hit_point, sdf_hit_point_id = signed_distance_field( + tet, + np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype=np.int32), + np.array([1, 1, 1, 0.12, 0.11, 0.1], dtype=np.float64), + include_hit_points=True, + include_hit_points_id=True, ) - - expected_sdf = torch.tensor([[1.1547], [-0.05]], dtype=dtype) - assert torch.allclose(sdf_tet, expected_sdf, atol=1e-7) - - assert torch.allclose( + np.testing.assert_allclose( sdf_hit_point, - torch.tensor( - [[0.33333322, 0.33333334, 0.3333334], [0.0, 0.10, 0.10]], dtype=dtype - ), + [[0.33333322, 0.33333334, 0.3333334], [0.12000002, 0.11, 0.0]], atol=1e-7, ) + np.testing.assert_allclose(sdf_hit_point_id, [3, 0], atol=1e-7) From 737201fef88e259776ee20fbcb7d2935e1abc469 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:15:24 +0000 Subject: [PATCH 42/98] Move minor model changes to the model refactor. --- physicsnemo/models/domino/model.py | 61 +++++++++++------------------- 1 file changed, 22 insertions(+), 39 deletions(-) diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 24588b123c..c95f971e97 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -155,8 +155,9 @@ def forward( - outputs: Tensor containing coordinates of the neighboring points """ batch_size = x.shape[0] + nx, ny, nz = self.grid_resolution - p_grid = p_grid.reshape(batch_size, -1, 3) + p_grid = torch.reshape(p_grid, (batch_size, nx * ny * nz, 3)) if reverse_mapping: mapping, outputs = radius_search( @@ -591,23 +592,15 @@ def forward( if self.geo_encoding_type == "both" or self.geo_encoding_type == "stl": # Calculate multi-scale geoemtry dependency x_encoding = [] - for j in range(len(self.radii)): - with torch.autograd.profiler.record_function(f"bq_warp_{j}"): - mapping, k_short = self.bq_warp[j](x, p_grid) - x_encoding_inter = self.geo_conv_out[j](k_short, p_grid) - # Propagate information in the geometry enclosed BBox - for _i in range(self.hops): - with torch.autograd.profiler.record_function( - f"geo_processor_{j}_{_i}" - ): - dx = self.geo_processors[j](x_encoding_inter) / self.hops - x_encoding_inter = x_encoding_inter + dx - x_encoding_inter = self.geo_processor_out[j](x_encoding_inter) - - x_encoding.append(x_encoding_inter) - - # current_stream. + mapping, k_short = self.bq_warp[j](x, p_grid) + x_encoding_inter = self.geo_conv_out[j](k_short, p_grid) + # Propagate information in the geometry enclosed BBox + for _ in range(self.hops): + dx = self.geo_processors[j](x_encoding_inter) / self.hops + x_encoding_inter = x_encoding_inter + dx + x_encoding_inter = self.geo_processor_out[j](x_encoding_inter) + x_encoding.append(x_encoding_inter) x_encoding = torch.cat(x_encoding, dim=1) if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf": @@ -1666,7 +1659,6 @@ def calculate_solution( return_volume_neighbors=False, ): """Function to approximate solution sampling the neighborhood information""" - if eval_mode == "volume": num_variables = self.num_variables_vol nn_basis = self.nn_basis_vol @@ -1884,6 +1876,9 @@ def forward(self, data_dict, return_volume_neighbors=False): # Bounding box grid s_grid = data_dict["surf_grid"] sdf_surf_grid = data_dict["sdf_surf_grid"] + # Scaling factors + surf_max = data_dict["surface_min_max"][:, 1] + surf_min = data_dict["surface_min_max"][:, 0] # Parameters global_params_values = data_dict["global_params_values"] @@ -1894,17 +1889,12 @@ def forward(self, data_dict, return_volume_neighbors=False): # Computational domain grid p_grid = data_dict["grid"] sdf_grid = data_dict["sdf_grid"] - if "volume_min_max" in data_dict.keys(): - # Scaling factors - vol_max = data_dict["volume_min_max"][:, 1] - vol_min = data_dict["volume_min_max"][:, 0] - - # Normalize based on computational domain - geo_centers_vol = ( - 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 - ) - else: - geo_centers_vol = geo_centers + # Scaling factors + vol_max = data_dict["volume_min_max"][:, 1] + vol_min = data_dict["volume_min_max"][:, 0] + + # Normalize based on computational domain + geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 encoding_g_vol = self.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid) @@ -1928,16 +1918,9 @@ def forward(self, data_dict, return_volume_neighbors=False): if self.output_features_surf is not None: # Represent geometry on bounding box - if "surface_min_max" in data_dict.keys(): - # Scaling factors - surf_max = data_dict["surface_min_max"][:, 1] - surf_min = data_dict["surface_min_max"][:, 0] - geo_centers_surf = ( - 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1 - ) - else: - geo_centers_surf = geo_centers - + geo_centers_surf = ( + 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1 + ) encoding_g_surf = self.geo_rep_surface( geo_centers_surf, s_grid, sdf_surf_grid ) From 378218f53e7a786fec7e00a4f24ffe5faa57cfac Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 23 Sep 2025 18:39:01 +0000 Subject: [PATCH 43/98] Fix minor errors in the datapipe --- physicsnemo/datapipes/cae/domino_datapipe2.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 781402a5b3..53abe95822 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -494,7 +494,8 @@ def process_surface( ) # Select out the sampled points for non-neighbor arrays: - surface_fields = surface_fields[idx_surface] + if surface_fields is not None: + surface_fields = surface_fields[idx_surface] pos_normals_com_surface = pos_normals_com_surface[idx_surface] # Subsample the normals and sizes: surface_normals = surface_normals[idx_surface] @@ -606,12 +607,10 @@ def process_volume( if self.config.sampling: # Generate a series of idx to sample the volume # without replacement - volume_coordinates_sampled, idx_volume = shuffle_array( volume_coordinates, self.config.volume_points_sample ) volume_coordinates_sampled = volume_coordinates[idx_volume] - # In case too few points are in the sampled data (because the # inputs were too few), pad the outputs: if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample: @@ -619,6 +618,7 @@ def process_volume( self.config.volume_points_sample - volume_coordinates_sampled.shape[0] ) + volume_coordinates_sampled = torch.nn.functional.pad( volume_coordinates_sampled, (0, 0, 0, 0, 0, padding_size), @@ -1122,17 +1122,15 @@ def create_domino_dataset( device_mesh: torch.distributed.DeviceMesh | None = None, placements: dict[str, torch.distributed.tensor.Placement] | None = None, ): + model_type = cfg.model.model_type if phase == "train": input_path = cfg.data.input_dir - model_type = cfg.model.model_type dataloader_cfg = cfg.train.dataloader elif phase == "val": input_path = cfg.data.input_dir_val - model_type = cfg.model.model_type dataloader_cfg = cfg.val.dataloader elif phase == "test": input_path = cfg.eval.test_path - model_type = "inference" dataloader_cfg = None else: raise ValueError(f"Invalid phase {phase}") From b0bd877d6d338842692728dc22360a9908847e85 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 23 Sep 2025 19:09:34 +0000 Subject: [PATCH 44/98] Move several components of the recipe to the deprecation bin. --- .../external_aerodynamics/domino/src/deprecated/README.md | 5 +++++ .../domino/src/{ => deprecated}/openfoam_datapipe.py | 0 .../domino/src/{ => deprecated}/retraining.py | 0 3 files changed, 5 insertions(+) create mode 100644 examples/cfd/external_aerodynamics/domino/src/deprecated/README.md rename examples/cfd/external_aerodynamics/domino/src/{ => deprecated}/openfoam_datapipe.py (100%) rename examples/cfd/external_aerodynamics/domino/src/{ => deprecated}/retraining.py (100%) diff --git a/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md b/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md new file mode 100644 index 0000000000..9124353b7e --- /dev/null +++ b/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md @@ -0,0 +1,5 @@ +# DoMINO Deprecation + +The files in this folder have been deprecated as of the physicsnemo 25.11 release - +they are no longer officially supported. They are kept here only as a reference, +and may be removed in a future release. diff --git a/examples/cfd/external_aerodynamics/domino/src/openfoam_datapipe.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/openfoam_datapipe.py similarity index 100% rename from examples/cfd/external_aerodynamics/domino/src/openfoam_datapipe.py rename to examples/cfd/external_aerodynamics/domino/src/deprecated/openfoam_datapipe.py diff --git a/examples/cfd/external_aerodynamics/domino/src/retraining.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/retraining.py similarity index 100% rename from examples/cfd/external_aerodynamics/domino/src/retraining.py rename to examples/cfd/external_aerodynamics/domino/src/deprecated/retraining.py From 614710e4dadecf8004490a85ed6c88354afff53f Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 23 Sep 2025 19:11:20 +0000 Subject: [PATCH 45/98] Move and rename inference scripts --- .../domino/src/deprecated/inference_on_stl.py | 1586 +++++++++++++ .../domino/src/inference_on_stl.py | 2072 +++++------------ .../domino/src/inference_on_stl2.py | 634 ----- 3 files changed, 2146 insertions(+), 2146 deletions(-) create mode 100644 examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py delete mode 100644 examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py diff --git a/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py new file mode 100644 index 0000000000..a85cc7df86 --- /dev/null +++ b/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py @@ -0,0 +1,1586 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This code defines a standalone distributed inference pipeline the DoMINO model. +This inference pipeline can be used to evaluate the model given an STL and +an inflow speed. The pre-trained model checkpoint can be specified in this script +or inferred from the config file. The results are calculated on a point cloud +sampled in the volume around the STL and on the surface of the STL. They are stored +in a dictionary, which can be written out for visualization. +""" + +import os +import time + +import hydra, re +from hydra import compose, initialize +from hydra.utils import to_absolute_path +from omegaconf import DictConfig, OmegaConf + +import numpy as np +import torch + +from physicsnemo.models.domino.model import DoMINO +from physicsnemo.utils.domino.utils import ( + unnormalize, + create_directory, + nd_interpolator, + get_filenames, + write_to_vtp, +) +from torch.cuda.amp import autocast +from torch.nn.parallel import DistributedDataParallel +from physicsnemo.distributed import DistributedManager + +from numpy.typing import NDArray +from typing import Any, Iterable, List, Literal, Mapping, Optional, Union, Callable +import warp as wp +from pathlib import Path +import pandas as pd +import matplotlib.pyplot as plt +import pyvista as pv + +try: + from physicsnemo.sym.geometry.tessellation import Tessellation + + SYM_AVAILABLE = True +except ImportError: + SYM_AVAILABLE = False + + +def combine_stls(stl_path, stl_files): + meshes = [] + combined_mesh = pv.PolyData() + for file in stl_files: + if ".stl" in file and "single_solid" not in file: + stl_file_path = os.path.join(stl_path, file) + reader = pv.get_reader(stl_file_path) + mesh_stl = reader.read() + combined_mesh = combined_mesh.merge(mesh_stl) + # meshes.append(mesh_stl) + break + # combined_mesh = pv.merge(meshes) + return combined_mesh + + +def plot(truth, prediction, var, save_path, axes_titles=None, plot_error=True): + if plot_error: + c = 3 + else: + c = 2 + fig, axes = plt.subplots(1, c, figsize=(15, 5)) + error = truth - prediction + # Plot Truth + im = axes[0].imshow( + truth, + cmap="jet", + vmax=np.ma.masked_invalid(truth).max(), + vmin=np.ma.masked_invalid(truth).min(), + ) + axes[0].axis("off") + cbar = fig.colorbar(im, ax=axes[0], orientation="vertical") + cbar.ax.tick_params(labelsize=12) + if axes_titles is None: + axes[0].set_title(f"{var} Truth") + else: + axes[0].set_title(axes_titles[0]) + + # Plot Predicted + im = axes[1].imshow( + prediction, + cmap="jet", + vmax=np.ma.masked_invalid(prediction).max(), + vmin=np.ma.masked_invalid(prediction).min(), + ) + axes[1].axis("off") + cbar = fig.colorbar(im, ax=axes[1], orientation="vertical") + cbar.ax.tick_params(labelsize=12) + if axes_titles is None: + axes[1].set_title(f"{var} Predicted") + else: + axes[1].set_title(axes_titles[1]) + + if plot_error: + # Plot Error + im = axes[2].imshow( + error, + cmap="jet", + vmax=np.ma.masked_invalid(error).max(), + vmin=np.ma.masked_invalid(error).min(), + ) + axes[2].axis("off") + cbar = fig.colorbar(im, ax=axes[2], orientation="vertical") + cbar.ax.tick_params(labelsize=12) + if axes_titles is None: + axes[2].set_title(f"{var} Error") + else: + axes[2].set_title(axes_titles[2]) + + MAE = np.mean(np.ma.masked_invalid((error))) + + if MAE: + fig.suptitle(f"MAE {MAE}", fontsize=18, x=0.5) + + plt.tight_layout() + + path_to_save_path = os.path.join(save_path) + plt.savefig(path_to_save_path, bbox_inches="tight", pad_inches=0.1) + plt.close() + + +@wp.kernel +def _bvh_query_distance( + mesh: wp.uint64, + points: wp.array(dtype=wp.vec3f), + max_dist: wp.float32, + sdf: wp.array(dtype=wp.float32), + sdf_hit_point: wp.array(dtype=wp.vec3f), + sdf_hit_point_id: wp.array(dtype=wp.int32), +): + """ + Computes the signed distance from each point in the given array `points` + to the mesh represented by `mesh`,within the maximum distance `max_dist`, + and stores the result in the array `sdf`. + + Parameters: + mesh (wp.uint64): The identifier of the mesh. + points (wp.array): An array of 3D points for which to compute the + signed distance. + max_dist (wp.float32): The maximum distance within which to search + for the closest point on the mesh. + sdf (wp.array): An array to store the computed signed distances. + sdf_hit_point (wp.array): An array to store the computed hit points. + sdf_hit_point_id (wp.array): An array to store the computed hit point ids. + + Returns: + None + """ + tid = wp.tid() + + res = wp.mesh_query_point_sign_winding_number(mesh, points[tid], max_dist) + + mesh_ = wp.mesh_get(mesh) + + p0 = mesh_.points[mesh_.indices[3 * res.face + 0]] + p1 = mesh_.points[mesh_.indices[3 * res.face + 1]] + p2 = mesh_.points[mesh_.indices[3 * res.face + 2]] + + p_closest = res.u * p0 + res.v * p1 + (1.0 - res.u - res.v) * p2 + + sdf[tid] = res.sign * wp.abs(wp.length(points[tid] - p_closest)) + sdf_hit_point[tid] = p_closest + sdf_hit_point_id[tid] = res.face + + +def signed_distance_field( + mesh_vertices: list[tuple[float, float, float]], + mesh_indices: NDArray[float], + input_points: list[tuple[float, float, float]], + max_dist: float = 1e8, + include_hit_points: bool = False, + include_hit_points_id: bool = False, + device: int = 0, +) -> wp.array: + """ + Computes the signed distance field (SDF) for a given mesh and input points. + + Parameters: + ---------- + mesh_vertices (list[tuple[float, float, float]]): List of vertices defining the mesh. + mesh_indices (list[tuple[int, int, int]]): List of indices defining the triangles of the mesh. + input_points (list[tuple[float, float, float]]): List of input points for which to compute the SDF. + max_dist (float, optional): Maximum distance within which to search for + the closest point on the mesh. Default is 1e8. + include_hit_points (bool, optional): Whether to include hit points in + the output. Default is False. + include_hit_points_id (bool, optional): Whether to include hit point + IDs in the output. Default is False. + + Returns: + ------- + wp.array: An array containing the computed signed distance field. + + Example: + ------- + >>> mesh_vertices = [(0, 0, 0), (1, 0, 0), (0, 1, 0)] + >>> mesh_indices = np.array((0, 1, 2)) + >>> input_points = [(0.5, 0.5, 0.5)] + >>> signed_distance_field(mesh_vertices, mesh_indices, input_points).numpy() + Module ... + array([0.5], dtype=float32) + """ + + wp.init() + # mesh = wp.Mesh( + # wp.array(mesh_vertices.cpu(), dtype=wp.vec3), wp.array(mesh_indices.cpu(), dtype=wp.int32) + # ) + mesh = wp.Mesh( + wp.from_torch(mesh_vertices, dtype=wp.vec3), + wp.from_torch(mesh_indices, dtype=wp.int32), + ) + + sdf_points = wp.from_torch(input_points, dtype=wp.vec3) + sdf = wp.zeros(shape=sdf_points.shape, dtype=wp.float32) + sdf_hit_point = wp.zeros(shape=sdf_points.shape, dtype=wp.vec3f) + sdf_hit_point_id = wp.zeros(shape=sdf_points.shape, dtype=wp.int32) + wp.launch( + kernel=_bvh_query_distance, + dim=len(sdf_points), + inputs=[mesh.id, sdf_points, max_dist, sdf, sdf_hit_point, sdf_hit_point_id], + ) + if include_hit_points and include_hit_points_id: + return ( + wp.to_torch(sdf), + wp.to_torch(sdf_hit_point), + wp.to_torch(sdf_hit_point_id), + ) + elif include_hit_points: + return (wp.to_torch(sdf), wp.to_torch(sdf_hit_point)) + elif include_hit_points_id: + return (wp.to_torch(sdf), wp.to_torch(sdf_hit_point_id)) + else: + return wp.to_torch(sdf) + + +def shuffle_array_torch(surface_vertices, geometry_points, device): + idx = torch.unsqueeze( + torch.randperm(surface_vertices.shape[0])[:geometry_points], -1 + ).to(device) + idx = idx.repeat(1, 3) + surface_sampled = torch.gather(surface_vertices, 0, idx) + return surface_sampled + + +class inferenceDataPipe: + def __init__( + self, + device: int = 0, + grid_resolution: Optional[list] = [256, 96, 64], + normalize_coordinates: bool = False, + geom_points_sample: int = 300000, + positional_encoding: bool = False, + surface_vertices=None, + surface_indices=None, + surface_areas=None, + surface_centers=None, + use_sdf_basis=False, + ): + self.surface_vertices = surface_vertices + self.surface_indices = surface_indices + self.surface_areas = surface_areas + self.surface_centers = surface_centers + self.device = device + self.grid_resolution = grid_resolution + self.normalize_coordinates = normalize_coordinates + self.geom_points_sample = geom_points_sample + self.positional_encoding = positional_encoding + self.use_sdf_basis = use_sdf_basis + torch.manual_seed(int(42 + torch.cuda.current_device())) + self.data_dict = {} + + def clear_dict(self): + del self.data_dict + + def clear_volume_dict(self): + del self.data_dict["volume_mesh_centers"] + del self.data_dict["pos_enc_closest"] + del self.data_dict["pos_normals_com"] + del self.data_dict["sdf_nodes"] + + def create_grid_torch(self, mx, mn, nres): + start_time = time.time() + dx = torch.linspace(mn[0], mx[0], nres[0], device=self.device) + dy = torch.linspace(mn[1], mx[1], nres[1], device=self.device) + dz = torch.linspace(mn[2], mx[2], nres[2], device=self.device) + + xv, yv, zv = torch.meshgrid(dx, dy, dz, indexing="ij") + xv = torch.unsqueeze(xv, -1) + yv = torch.unsqueeze(yv, -1) + zv = torch.unsqueeze(zv, -1) + grid = torch.cat((xv, yv, zv), axis=-1) + return grid + + def process_surface_mesh(self, bounding_box=None, bounding_box_surface=None): + # Use coarse mesh to calculate SDF + surface_vertices = self.surface_vertices + surface_indices = self.surface_indices + surface_areas = self.surface_areas + surface_centers = self.surface_centers + + start_time = time.time() + + if bounding_box is None: + # Create a bounding box + s_max = torch.amax(surface_vertices, 0) + s_min = torch.amin(surface_vertices, 0) + + c_max = s_max + (s_max - s_min) / 2 + c_min = s_min - (s_max - s_min) / 2 + c_min[2] = s_min[2] + else: + c_min = bounding_box[0] + c_max = bounding_box[1] + + if bounding_box_surface is None: + # Create a bounding box + s_max = torch.amax(surface_vertices, 0) + s_min = torch.amin(surface_vertices, 0) + + surf_max = s_max + (s_max - s_min) / 2 + surf_min = s_min - (s_max - s_min) / 2 + surf_min[2] = s_min[2] + else: + surf_min = bounding_box_surface[0] + surf_max = bounding_box_surface[1] + + nx, ny, nz = self.grid_resolution + + grid = self.create_grid_torch(c_max, c_min, self.grid_resolution) + grid_reshaped = torch.reshape(grid, (nx * ny * nz, 3)) + + # SDF on grid + sdf_grid = signed_distance_field( + surface_vertices, surface_indices, grid_reshaped, device=self.device + ) + sdf_grid = torch.reshape(sdf_grid, (nx, ny, nz)) + + surface_areas = torch.unsqueeze(surface_areas, -1) + center_of_mass = torch.sum(surface_centers * surface_areas, 0) / torch.sum( + surface_areas + ) + + s_grid = self.create_grid_torch(surf_max, surf_min, self.grid_resolution) + surf_grid_reshaped = torch.reshape(s_grid, (nx * ny * nz, 3)) + + surf_sdf_grid = signed_distance_field( + surface_vertices, surface_indices, surf_grid_reshaped, device=self.device + ) + surf_sdf_grid = torch.reshape(surf_sdf_grid, (nx, ny, nz)) + + if self.normalize_coordinates: + grid = 2.0 * (grid - c_min) / (c_max - c_min) - 1.0 + s_grid = 2.0 * (s_grid - surf_min) / (surf_max - surf_min) - 1.0 + + surface_vertices = torch.unsqueeze(surface_vertices, 0) + grid = torch.unsqueeze(grid, 0) + s_grid = torch.unsqueeze(s_grid, 0) + sdf_grid = torch.unsqueeze(sdf_grid, 0) + surf_sdf_grid = torch.unsqueeze(surf_sdf_grid, 0) + max_min = [c_min, c_max] + surf_max_min = [surf_min, surf_max] + center_of_mass = center_of_mass + + return ( + surface_vertices, + grid, + sdf_grid, + max_min, + s_grid, + surf_sdf_grid, + surf_max_min, + center_of_mass, + ) + + def sample_stl_points( + self, + num_points, + stl_centers, + stl_area, + stl_normals, + max_min, + center_of_mass, + bounding_box=None, + stencil_size=7, + ): + if bounding_box is not None: + c_max = bounding_box[1] + c_min = bounding_box[0] + else: + c_min = max_min[0] + c_max = max_min[1] + + start_time = time.time() + + nx, ny, nz = self.grid_resolution + + idx = np.arange(stl_centers.shape[0]) + # np.random.shuffle(idx) + if num_points is not None: + idx = idx[:num_points] + + surface_coordinates = stl_centers + surface_normals = stl_normals + surface_area = stl_area + + if stencil_size > 1: + interp_func = KDTree(surface_coordinates) + dd, ii = interp_func.query(surface_coordinates, k=stencil_size) + surface_neighbors = surface_coordinates[ii] + surface_neighbors = surface_neighbors[:, 1:] + 1e-6 + surface_neighbors_normals = surface_normals[ii] + surface_neighbors_normals = surface_neighbors_normals[:, 1:] + surface_neighbors_area = surface_area[ii] + surface_neighbors_area = surface_neighbors_area[:, 1:] + else: + surface_neighbors = np.expand_dims(surface_coordinates, 1) + 1e-6 + surface_neighbors_normals = np.expand_dims(surface_normals, 1) + surface_neighbors_area = np.expand_dims(surface_area, 1) + + surface_coordinates = torch.from_numpy(surface_coordinates).to(self.device) + surface_normals = torch.from_numpy(surface_normals).to(self.device) + surface_area = torch.from_numpy(surface_area).to(self.device) + surface_neighbors = torch.from_numpy(surface_neighbors).to(self.device) + surface_neighbors_normals = torch.from_numpy(surface_neighbors_normals).to( + self.device + ) + surface_neighbors_area = torch.from_numpy(surface_neighbors_area).to( + self.device + ) + + pos_normals_com = surface_coordinates - center_of_mass + + if self.normalize_coordinates: + surface_coordinates = ( + 2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0 + ) + surface_neighbors = ( + 2.0 * (surface_neighbors - c_min) / (c_max - c_min) - 1.0 + ) + + surface_coordinates = surface_coordinates[idx] + surface_area = surface_area[idx] + surface_normals = surface_normals[idx] + pos_normals_com = pos_normals_com[idx] + surface_coordinates = torch.unsqueeze(surface_coordinates, 0) + surface_normals = torch.unsqueeze(surface_normals, 0) + surface_area = torch.unsqueeze(surface_area, 0) + pos_normals_com = torch.unsqueeze(pos_normals_com, 0) + + surface_neighbors = surface_neighbors[idx] + surface_neighbors_normals = surface_neighbors_normals[idx] + surface_neighbors_area = surface_neighbors_area[idx] + surface_neighbors = torch.unsqueeze(surface_neighbors, 0) + surface_neighbors_normals = torch.unsqueeze(surface_neighbors_normals, 0) + surface_neighbors_area = torch.unsqueeze(surface_neighbors_area, 0) + + scaling_factors = [c_max, c_min] + + return ( + surface_coordinates, + surface_neighbors, + surface_normals, + surface_neighbors_normals, + surface_area, + surface_neighbors_area, + pos_normals_com, + scaling_factors, + idx, + ) + + def sample_points_on_surface( + self, + num_points_surf, + max_min, + center_of_mass, + stl_path, + bounding_box=None, + stencil_size=7, + ): + if bounding_box is not None: + c_max = bounding_box[1] + c_min = bounding_box[0] + else: + c_min = max_min[0] + c_max = max_min[1] + + start_time = time.time() + + nx, ny, nz = self.grid_resolution + + obj = Tessellation.from_stl(stl_path, airtight=False) + + boundary = obj.sample_boundary(num_points_surf) + surface_coordinates = np.concatenate( + [ + np.float32(boundary["x"]), + np.float32(boundary["y"]), + np.float32(boundary["z"]), + ], + axis=1, + ) + surface_normals = np.concatenate( + [ + np.float32(boundary["normal_x"]), + np.float32(boundary["normal_y"]), + np.float32(boundary["normal_z"]), + ], + axis=1, + ) + + surface_area = np.float32(boundary["area"]) + + interp_func = KDTree(surface_coordinates) + dd, ii = interp_func.query(surface_coordinates, k=stencil_size) + surface_neighbors = surface_coordinates[ii] + surface_neighbors = surface_neighbors[:, 1:] + surface_neighbors_normals = surface_normals[ii] + surface_neighbors_normals = surface_neighbors_normals[:, 1:] + surface_neighbors_area = surface_area[ii] + surface_neighbors_area = surface_neighbors_area[:, 1:] + + surface_coordinates = torch.from_numpy(surface_coordinates).to(self.device) + surface_normals = torch.from_numpy(surface_normals).to(self.device) + surface_area = torch.from_numpy(surface_area).to(self.device) + surface_neighbors = torch.from_numpy(surface_neighbors).to(self.device) + surface_neighbors_normals = torch.from_numpy(surface_neighbors_normals).to( + self.device + ) + surface_neighbors_area = torch.from_numpy(surface_neighbors_area).to( + self.device + ) + + pos_normals_com = surface_coordinates - center_of_mass + + if self.normalize_coordinates: + surface_coordinates = ( + 2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0 + ) + + surface_coordinates = torch.unsqueeze(surface_coordinates, 0) + surface_normals = torch.unsqueeze(surface_normals, 0) + surface_area = torch.unsqueeze(surface_area, 0) + pos_normals_com = torch.unsqueeze(pos_normals_com, 0) + + surface_neighbors = torch.unsqueeze(surface_neighbors, 0) + surface_neighbors_normals = torch.unsqueeze(surface_neighbors_normals, 0) + surface_neighbors_area = torch.unsqueeze(surface_neighbors_area, 0) + + scaling_factors = [c_max, c_min] + + return ( + surface_coordinates, + surface_neighbors, + surface_normals, + surface_neighbors_normals, + surface_area, + surface_neighbors_area, + pos_normals_com, + scaling_factors, + ) + + def sample_points_in_volume( + self, num_points_vol, max_min, center_of_mass, bounding_box=None + ): + if bounding_box is not None: + c_max = bounding_box[1] + c_min = bounding_box[0] + else: + c_min = max_min[0] + c_max = max_min[1] + + start_time = time.time() + + nx, ny, nz = self.grid_resolution + for k in range(10): + if k > 0: + num_pts_vol = num_points_vol - int(volume_coordinates.shape[0] / 2) + else: + num_pts_vol = int(1.25 * num_points_vol) + + volume_coordinates_sub = (c_max - c_min) * torch.rand( + num_pts_vol, 3, device=self.device, dtype=torch.float32 + ) + c_min + + sdf_nodes, sdf_node_closest_point = signed_distance_field( + self.surface_vertices, + self.surface_indices, + volume_coordinates_sub, + include_hit_points=True, + device=self.device, + ) + sdf_nodes = torch.unsqueeze(sdf_nodes, -1) + + idx = torch.unsqueeze(torch.where((sdf_nodes > 0))[0], -1) + idx = idx.repeat(1, volume_coordinates_sub.shape[1]) + if k == 0: + volume_coordinates = torch.gather(volume_coordinates_sub, 0, idx) + else: + volume_coordinates_1 = torch.gather(volume_coordinates_sub, 0, idx) + volume_coordinates = torch.cat( + (volume_coordinates, volume_coordinates_1), axis=0 + ) + + if volume_coordinates.shape[0] > num_points_vol: + volume_coordinates = volume_coordinates[:num_points_vol] + break + + sdf_nodes, sdf_node_closest_point = signed_distance_field( + self.surface_vertices, + self.surface_indices, + volume_coordinates, + include_hit_points=True, + device=self.device, + ) + sdf_nodes = torch.unsqueeze(sdf_nodes, -1) + + pos_normals_closest = volume_coordinates - sdf_node_closest_point + pos_normals_com = volume_coordinates - center_of_mass + + if self.normalize_coordinates: + volume_coordinates = ( + 2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0 + ) + + volume_coordinates = torch.unsqueeze(volume_coordinates, 0) + pos_normals_com = torch.unsqueeze(pos_normals_com, 0) + + if self.use_sdf_basis: + pos_normals_closest = torch.unsqueeze(pos_normals_closest, 0) + sdf_nodes = torch.unsqueeze(sdf_nodes, 0) + + scaling_factors = [c_max, c_min] + return ( + volume_coordinates, + pos_normals_com, + pos_normals_closest, + sdf_nodes, + scaling_factors, + ) + + +class dominoInference: + def __init__( + self, + cfg: DictConfig, + dist: None, + cached_geo_encoding: bool = False, + ): + self.cfg = cfg + self.dist = dist + self.stream_velocity = None + self.stencil_size = None + self.stl_path = None + self.stl_vertices = None + self.stl_centers = None + self.surface_areas = None + self.mesh_indices_flattened = None + self.length_scale = 1.0 + if self.dist is None: + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + else: + self.device = self.dist.device + + self.air_density = torch.full((1, 1), 1.205, dtype=torch.float32).to( + self.device + ) + ( + self.num_vol_vars, + self.num_surf_vars, + self.num_global_features, + ) = self.get_num_variables() + self.model = None + self.grid_resolution = torch.tensor(self.cfg.model.interp_res).to(self.device) + self.vol_factors = None + self.bounding_box_min_max = None + self.bounding_box_surface_min_max = None + self.center_of_mass = None + self.grid = None + self.geometry_encoding = None + self.geometry_encoding_surface = None + self.cached_geo_encoding = cached_geo_encoding + self.out_dict = {} + + def get_geometry_encoding(self): + return self.geometry_encoding + + def get_geometry_encoding_surface(self): + return self.geometry_encoding_surface + + def get_out_dict(self): + return self.out_dict + + def clear_out_dict(self): + self.out_dict.clear() + + def initialize_data_processor(self): + self.ifp = inferenceDataPipe( + device=self.device, + surface_vertices=self.stl_vertices, + surface_indices=self.mesh_indices_flattened, + surface_areas=self.surface_areas, + surface_centers=self.stl_centers, + grid_resolution=self.grid_resolution, + normalize_coordinates=True, + geom_points_sample=300000, + positional_encoding=False, + use_sdf_basis=self.cfg.model.use_sdf_in_basis_func, + ) + + def load_bounding_box(self): + if ( + self.cfg.data.bounding_box.min is not None + and self.cfg.data.bounding_box.max is not None + ): + c_min = torch.from_numpy( + np.array(self.cfg.data.bounding_box.min, dtype=np.float32) + ).to(self.device) + c_max = torch.from_numpy( + np.array(self.cfg.data.bounding_box.max, dtype=np.float32) + ).to(self.device) + self.bounding_box_min_max = [c_min, c_max] + + if ( + self.cfg.data.bounding_box_surface.min is not None + and self.cfg.data.bounding_box_surface.max is not None + ): + c_min = torch.from_numpy( + np.array(self.cfg.data.bounding_box_surface.min, dtype=np.float32) + ).to(self.device) + c_max = torch.from_numpy( + np.array(self.cfg.data.bounding_box_surface.max, dtype=np.float32) + ).to(self.device) + self.bounding_box_surface_min_max = [c_min, c_max] + + def load_volume_scaling_factors(self): + scaling_param_path = self.cfg.eval.scaling_param_path + vol_factors_path = os.path.join( + scaling_param_path, "volume_scaling_factors.npy" + ) + + vol_factors = np.load(vol_factors_path, allow_pickle=True) + vol_factors = torch.from_numpy(vol_factors).to(self.device) + + return vol_factors + + def load_surface_scaling_factors(self): + scaling_param_path = self.cfg.eval.scaling_param_path + surf_factors_path = os.path.join( + scaling_param_path, "surface_scaling_factors.npy" + ) + + surf_factors = np.load(surf_factors_path, allow_pickle=True) + surf_factors = torch.from_numpy(surf_factors).to(self.device) + + return surf_factors + + def read_stl(self): + stl_files = get_filenames(self.stl_path) + mesh_stl = combine_stls(self.stl_path, stl_files) + if self.cfg.eval.refine_stl: + mesh_stl = mesh_stl.subdivide( + nsub=2, subfilter="linear" + ) # .smooth(n_iter=20) + stl_vertices = mesh_stl.points + length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)) + stl_centers = mesh_stl.cell_centers().points + # Assuming triangular elements + stl_faces = np.array(mesh_stl.faces).reshape((-1, 4))[:, 1:] + mesh_indices_flattened = stl_faces.flatten() + + surface_areas = mesh_stl.compute_cell_sizes( + length=False, area=True, volume=False + ) + surface_areas = np.array(surface_areas.cell_data["Area"]) + + surface_normals = np.array(mesh_stl.cell_normals, dtype=np.float32) + + self.stl_vertices = torch.from_numpy(np.float32(stl_vertices)).to(self.device) + self.stl_centers = torch.from_numpy(np.float32(stl_centers)).to(self.device) + self.surface_areas = torch.from_numpy(np.float32(surface_areas)).to(self.device) + self.stl_normals = -1.0 * torch.from_numpy(np.float32(surface_normals)).to( + self.device + ) + self.mesh_indices_flattened = torch.from_numpy( + np.int32(mesh_indices_flattened) + ).to(self.device) + self.length_scale = length_scale + self.mesh_stl = mesh_stl + + def read_stl_trimesh( + self, stl_vertices, stl_faces, stl_centers, surface_normals, surface_areas + ): + mesh_indices_flattened = stl_faces.flatten() + length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)) + self.stl_vertices = torch.from_numpy(stl_vertices).to(self.device) + self.stl_centers = torch.from_numpy(stl_centers).to(self.device) + self.stl_normals = -1.0 * torch.from_numpy(surface_normals).to(self.device) + self.surface_areas = torch.from_numpy(surface_areas).to(self.device) + self.mesh_indices_flattened = torch.from_numpy( + np.int32(mesh_indices_flattened) + ).to(self.device) + self.length_scale = length_scale + + def get_num_variables(self): + volume_variable_names = list(self.cfg.variables.volume.solution.keys()) + num_vol_vars = 0 + for j in volume_variable_names: + if self.cfg.variables.volume.solution[j] == "vector": + num_vol_vars += 3 + else: + num_vol_vars += 1 + + surface_variable_names = list(self.cfg.variables.surface.solution.keys()) + num_surf_vars = 0 + for j in surface_variable_names: + if self.cfg.variables.surface.solution[j] == "vector": + num_surf_vars += 3 + else: + num_surf_vars += 1 + + num_global_features = 0 + global_params_names = list(cfg.variables.global_parameters.keys()) + for param in global_params_names: + if cfg.variables.global_parameters[param].type == "vector": + num_global_features += len( + cfg.variables.global_parameters[param].reference + ) + elif cfg.variables.global_parameters[param].type == "scalar": + num_global_features += 1 + else: + raise ValueError(f"Unknown global parameter type") + + return num_vol_vars, num_surf_vars, num_global_features + + def initialize_model(self, model_path): + model = ( + DoMINO( + input_features=3, + output_features_vol=self.num_vol_vars, + output_features_surf=self.num_surf_vars, + global_features=self.num_global_features, + model_parameters=self.cfg.model, + ) + .to(self.device) + .eval() + ) + model = torch.compile(model, disable=True) + + checkpoint_iter = torch.load( + to_absolute_path(model_path), map_location=self.dist.device + ) + + model.load_state_dict(checkpoint_iter) + + if self.dist is not None: + if self.dist.world_size > 1: + model = DistributedDataParallel( + model, + device_ids=[self.dist.local_rank], + output_device=self.dist.device, + broadcast_buffers=self.dist.broadcast_buffers, + find_unused_parameters=self.dist.find_unused_parameters, + gradient_as_bucket_view=True, + static_graph=True, + ) + + self.model = model + self.vol_factors = self.load_volume_scaling_factors() + self.surf_factors = self.load_surface_scaling_factors() + self.load_bounding_box() + + def set_stream_velocity(self, stream_velocity): + self.stream_velocity = torch.full( + (1, 1), stream_velocity, dtype=torch.float32 + ).to(self.device) + + def set_stencil_size(self, stencil_size): + self.stencil_size = stencil_size + + def set_air_density(self, air_density): + self.air_density = torch.full((1, 1), air_density, dtype=torch.float32).to( + self.device + ) + + def set_stl_path(self, filename): + self.stl_path = filename + + @torch.no_grad() + def compute_geo_encoding(self, cached_geom_path=None): + start_time = time.time() + + if not self.cached_geo_encoding: + ( + surface_vertices, + grid, + sdf_grid, + max_min, + s_grid, + surf_sdf_grid, + surf_max_min, + center_of_mass, + ) = self.ifp.process_surface_mesh( + self.bounding_box_min_max, self.bounding_box_surface_min_max + ) + if self.bounding_box_min_max is None: + self.bounding_box_min_max = max_min + if self.bounding_box_surface_min_max is None: + self.bounding_box_surface_min_max = surf_max_min + self.center_of_mass = center_of_mass + self.grid = grid + self.s_grid = s_grid + self.sdf_grid = sdf_grid + self.surf_sdf_grid = surf_sdf_grid + self.out_dict["sdf"] = sdf_grid + + geo_encoding, geo_encoding_surface = self.calculate_geometry_encoding( + surface_vertices, grid, sdf_grid, s_grid, surf_sdf_grid, self.model + ) + else: + out_dict_cached = torch.load(cached_geom_path, map_location=self.device) + self.bounding_box_min_max = out_dict_cached["bounding_box_min_max"] + self.grid = out_dict_cached["grid"] + self.sdf_grid = out_dict_cached["sdf_grid"] + self.center_of_mass = out_dict_cached["com"] + geo_encoding = out_dict_cached["geo_encoding"] + geo_encoding_surface = out_dict_cached["geo_encoding_surface"] + self.out_dict["sdf"] = self.sdf_grid + torch.cuda.synchronize() + print("Time taken for geo encoding = %f" % (time.time() - start_time)) + + self.geometry_encoding = geo_encoding + self.geometry_encoding_surface = geo_encoding_surface + + def compute_forces(self): + pressure = self.out_dict["pressure_surface"] + wall_shear = self.out_dict["wall-shear-stress"] + # sampling_indices = self.out_dict["sampling_indices"] + + surface_normals = self.stl_normals[self.sampling_indices] + surface_areas = self.surface_areas[self.sampling_indices] + + drag_force = torch.sum( + pressure[0, :, 0] * surface_normals[:, 0] * surface_areas + - wall_shear[0, :, 0] * surface_areas + ) + lift_force = torch.sum( + pressure[0, :, 0] * surface_normals[:, 2] * surface_areas + - wall_shear[0, :, 2] * surface_areas + ) + + self.out_dict["drag_force"] = drag_force + self.out_dict["lift_force"] = lift_force + + @torch.inference_mode() + def compute_surface_solutions(self, num_sample_points=None, plot_solutions=False): + total_time = 0.0 + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + + geo_encoding = self.geometry_encoding_surface + j = 0 + + with autocast(enabled=True): + start_event.record() + ( + surface_mesh_centers, + surface_neighbors, + surface_normals, + surface_neighbors_normals, + surface_areas, + surface_neighbors_areas, + pos_normals_com, + surf_scaling_factors, + sampling_indices, + ) = self.ifp.sample_stl_points( + num_sample_points, + self.stl_centers.cpu().numpy(), + self.surface_areas.cpu().numpy(), + self.stl_normals.cpu().numpy(), + max_min=self.bounding_box_surface_min_max, + center_of_mass=self.center_of_mass, + stencil_size=self.stencil_size, + ) + end_event.record() + end_event.synchronize() + cur_time = start_event.elapsed_time(end_event) / 1000.0 + print(f"sample_points_in_surface time (s): {cur_time:.4f}") + # vol_coordinates_all.append(volume_mesh_centers) + surface_coordinates_all = surface_mesh_centers + + inner_time = time.time() + start_event.record() + if num_sample_points == None: + point_batch_size = 512_000 + num_points = surface_coordinates_all.shape[1] + subdomain_points = int(np.floor(num_points / point_batch_size)) + surface_solutions = torch.zeros(1, num_points, self.num_surf_vars).to( + self.device + ) + for p in range(subdomain_points + 1): + start_idx = p * point_batch_size + end_idx = (p + 1) * point_batch_size + surface_solutions_batch = self.compute_solution_on_surface( + geo_encoding, + surface_mesh_centers[:, start_idx:end_idx], + surface_neighbors[:, start_idx:end_idx], + surface_normals[:, start_idx:end_idx], + surface_neighbors_normals[:, start_idx:end_idx], + surface_areas[:, start_idx:end_idx], + surface_neighbors_areas[:, start_idx:end_idx], + pos_normals_com[:, start_idx:end_idx], + self.s_grid, + self.model, + inlet_velocity=self.stream_velocity, + air_density=self.air_density, + ) + surface_solutions[:, start_idx:end_idx] = surface_solutions_batch + else: + point_batch_size = 512_000 + num_points = num_sample_points + subdomain_points = int(np.floor(num_points / point_batch_size)) + surface_solutions = torch.zeros(1, num_points, self.num_surf_vars).to( + self.device + ) + for p in range(subdomain_points + 1): + start_idx = p * point_batch_size + end_idx = (p + 1) * point_batch_size + surface_solutions_batch = self.compute_solution_on_surface( + geo_encoding, + surface_mesh_centers[:, start_idx:end_idx], + surface_neighbors[:, start_idx:end_idx], + surface_normals[:, start_idx:end_idx], + surface_neighbors_normals[:, start_idx:end_idx], + surface_areas[:, start_idx:end_idx], + surface_neighbors_areas[:, start_idx:end_idx], + pos_normals_com[:, start_idx:end_idx], + self.s_grid, + self.model, + inlet_velocity=self.stream_velocity, + air_density=self.air_density, + ) + # print(torch.amax(surface_solutions_batch, (0, 1)), torch.amin(surface_solutions_batch, (0, 1))) + surface_solutions[:, start_idx:end_idx] = surface_solutions_batch + + # print(surface_solutions.shape) + end_event.record() + end_event.synchronize() + cur_time = start_event.elapsed_time(end_event) / 1000.0 + print(f"compute_solution time (s): {cur_time:.4f}") + total_time += float(time.time() - inner_time) + surface_solutions_all = surface_solutions + print( + "Time taken for compute solution on surface for=%f, %f" + % (time.time() - inner_time, torch.cuda.utilization(self.device)) + ) + cmax = surf_scaling_factors[0] + cmin = surf_scaling_factors[1] + + surface_coordinates_all = torch.reshape( + surface_coordinates_all, (1, num_points, 3) + ) + surface_solutions_all = torch.reshape(surface_solutions_all, (1, num_points, 4)) + + if self.surf_factors is not None: + surface_solutions_all = unnormalize( + surface_solutions_all, self.surf_factors[0], self.surf_factors[1] + ) + + self.out_dict["surface_coordinates"] = ( + 0.5 * (surface_coordinates_all + 1.0) * (cmax - cmin) + cmin + ) + self.out_dict["pressure_surface"] = ( + surface_solutions_all[:, :, :1] + * self.stream_velocity**2.0 + * self.air_density + ) + self.out_dict["wall-shear-stress"] = ( + surface_solutions_all[:, :, 1:4] + * self.stream_velocity**2.0 + * self.air_density + ) + self.sampling_indices = sampling_indices + + @torch.inference_mode() + def compute_volume_solutions(self, num_sample_points, plot_solutions=False): + total_time = 0.0 + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + + geo_encoding = self.geometry_encoding + j = 0 + + # Compute volume + point_batch_size = 512_000 + num_points = num_sample_points + subdomain_points = int(np.floor(num_points / point_batch_size)) + volume_solutions = torch.zeros(1, num_points, self.num_vol_vars).to(self.device) + volume_coordinates = torch.zeros(1, num_points, 3).to(self.device) + + for p in range(subdomain_points + 1): + start_idx = p * point_batch_size + end_idx = (p + 1) * point_batch_size + if end_idx > num_points: + point_batch_size = num_points - start_idx + end_idx = num_points + + with autocast(enabled=True): + inner_time = time.time() + start_event.record() + ( + volume_mesh_centers, + pos_normals_com, + pos_normals_closest, + sdf_nodes, + scaling_factors, + ) = self.ifp.sample_points_in_volume( + num_points_vol=point_batch_size, + max_min=self.bounding_box_min_max, + center_of_mass=self.center_of_mass, + ) + end_event.record() + end_event.synchronize() + cur_time = start_event.elapsed_time(end_event) / 1000.0 + print(f"sample_points_in_volume time (s): {cur_time:.4f}") + + volume_coordinates[:, start_idx:end_idx] = volume_mesh_centers + + start_event.record() + + volume_solutions_batch = self.compute_solution_in_volume( + geo_encoding, + volume_mesh_centers, + sdf_nodes, + pos_normals_closest, + pos_normals_com, + self.grid, + self.model, + use_sdf_basis=self.cfg.model.use_sdf_in_basis_func, + inlet_velocity=self.stream_velocity, + air_density=self.air_density, + ) + volume_solutions[:, start_idx:end_idx] = volume_solutions_batch + end_event.record() + end_event.synchronize() + cur_time = start_event.elapsed_time(end_event) / 1000.0 + print(f"compute_solution time (s): {cur_time:.4f}") + total_time += float(time.time() - inner_time) + # volume_solutions_all = volume_solutions + print( + "Time taken for compute solution in volume for =%f" + % (time.time() - inner_time) + ) + # print("Points processed:", end_idx) + print("Total time measured = %f" % total_time) + print("Points processed:", end_idx) + + cmax = scaling_factors[0] + cmin = scaling_factors[1] + volume_coordinates_all = volume_coordinates + volume_solutions_all = volume_solutions + + cmax = scaling_factors[0] + cmin = scaling_factors[1] + + volume_coordinates_all = torch.reshape( + volume_coordinates_all, (1, num_sample_points, 3) + ) + volume_solutions_all = torch.reshape( + volume_solutions_all, (1, num_sample_points, self.num_vol_vars) + ) + + if self.vol_factors is not None: + volume_solutions_all = unnormalize( + volume_solutions_all, self.vol_factors[0], self.vol_factors[1] + ) + + self.out_dict["coordinates"] = ( + 0.5 * (volume_coordinates_all + 1.0) * (cmax - cmin) + cmin + ) + self.out_dict["velocity"] = ( + volume_solutions_all[:, :, :3] * self.stream_velocity + ) + self.out_dict["pressure"] = ( + volume_solutions_all[:, :, 3:4] + * self.stream_velocity**2.0 + * self.air_density + ) + # self.out_dict["turbulent-kinetic-energy"] = ( + # volume_solutions_all[:, :, 4:5] + # * self.stream_velocity**2.0 + # * self.air_density + # ) + # self.out_dict["turbulent-viscosity"] = ( + # volume_solutions_all[:, :, 5:] * self.stream_velocity * self.length_scale + # ) + self.out_dict["bounding_box_dims"] = torch.vstack(self.bounding_box_min_max) + + if plot_solutions: + print("Plotting solutions") + plot_save_path = os.path.join(self.cfg.output, "plots/contours/") + create_directory(plot_save_path) + + p_grid = 0.5 * (self.grid + 1.0) * (cmax - cmin) + cmin + p_grid = p_grid.cpu().numpy() + sdf_grid = self.sdf_grid.cpu().numpy() + volume_coordinates_all = ( + 0.5 * (volume_coordinates_all + 1.0) * (cmax - cmin) + cmin + ) + volume_solutions_all[:, :, :3] = ( + volume_solutions_all[:, :, :3] * self.stream_velocity + ) + volume_solutions_all[:, :, 3:4] = ( + volume_solutions_all[:, :, 3:4] + * self.stream_velocity**2.0 + * self.air_density + ) + # volume_solutions_all[:, :, 4:5] = ( + # volume_solutions_all[:, :, 4:5] + # * self.stream_velocity**2.0 + # * self.air_density + # ) + # volume_solutions_all[:, :, 5] = ( + # volume_solutions_all[:, :, 5] * self.stream_velocity * self.length_scale + # ) + volume_coordinates_all = volume_coordinates_all.cpu().numpy() + volume_solutions_all = volume_solutions_all.cpu().numpy() + + # ND interpolation on a grid + prediction_grid = nd_interpolator( + volume_coordinates_all, volume_solutions_all[0], p_grid[0] + ) + nx, ny, nz, vars = prediction_grid.shape + idx = np.where(sdf_grid[0] < 0.0) + prediction_grid[idx] = float("inf") + axes_titles = ["y/4 plane", "y/2 plane"] + + plot( + prediction_grid[:, int(ny / 4), :, 0], + prediction_grid[:, int(ny / 2), :, 0], + var="x-vel", + save_path=plot_save_path + f"x-vel-midplane_{self.stream_velocity}.png", + axes_titles=axes_titles, + plot_error=False, + ) + plot( + prediction_grid[:, int(ny / 4), :, 1], + prediction_grid[:, int(ny / 2), :, 1], + var="y-vel", + save_path=plot_save_path + f"y-vel-midplane_{self.stream_velocity}.png", + axes_titles=axes_titles, + plot_error=False, + ) + plot( + prediction_grid[:, int(ny / 4), :, 2], + prediction_grid[:, int(ny / 2), :, 2], + var="z-vel", + save_path=plot_save_path + f"z-vel-midplane_{self.stream_velocity}.png", + axes_titles=axes_titles, + plot_error=False, + ) + plot( + prediction_grid[:, int(ny / 4), :, 3], + prediction_grid[:, int(ny / 2), :, 3], + var="pres", + save_path=plot_save_path + f"pres-midplane_{self.stream_velocity}.png", + axes_titles=axes_titles, + plot_error=False, + ) + # plot( + # prediction_grid[:, int(ny / 4), :, 4], + # prediction_grid[:, int(ny / 2), :, 4], + # var="tke", + # save_path=plot_save_path + f"tke-midplane_{self.stream_velocity}.png", + # axes_titles=axes_titles, + # plot_error=False, + # ) + # plot( + # prediction_grid[:, int(ny / 4), :, 5], + # prediction_grid[:, int(ny / 2), :, 5], + # var="nut", + # save_path=plot_save_path + f"nut-midplane_{self.stream_velocity}.png", + # axes_titles=axes_titles, + # plot_error=False, + # ) + + def cold_start(self, cached_geom_path=None): + print("Cold start") + self.compute_geo_encoding(cached_geom_path) + self.compute_volume_solutions(num_sample_points=10) + self.clear_out_dict() + + @torch.no_grad() + def calculate_geometry_encoding( + self, geo_centers, p_grid, sdf_grid, s_grid, sdf_surf_grid, model + ): + vol_min = self.bounding_box_min_max[0] + vol_max = self.bounding_box_min_max[1] + surf_min = self.bounding_box_surface_min_max[0] + surf_max = self.bounding_box_surface_min_max[1] + + geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 + if self.dist.world_size == 1: + encoding_g_vol = model.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid) + else: + encoding_g_vol = model.module.geo_rep_volume( + geo_centers_vol, p_grid, sdf_grid + ) + + geo_centers_surf = 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1 + + if self.dist.world_size == 1: + encoding_g_surf = model.geo_rep_surface( + geo_centers_surf, s_grid, sdf_surf_grid + ) + else: + encoding_g_surf = model.module.geo_rep_surface( + geo_centers_surf, s_grid, sdf_surf_grid + ) + + if self.dist.world_size == 1: + encoding_g_surf1 = model.geo_rep_surface1( + geo_centers_surf, s_grid, sdf_surf_grid + ) + else: + encoding_g_surf1 = model.module.geo_rep_surface1( + geo_centers_surf, s_grid, sdf_surf_grid + ) + + geo_encoding = 0.5 * encoding_g_surf1 + 0.5 * encoding_g_vol + geo_encoding_surface = 0.5 * encoding_g_surf + return geo_encoding, geo_encoding_surface + + @torch.no_grad() + def compute_solution_on_surface( + self, + geo_encoding, + surface_mesh_centers, + surface_mesh_neighbors, + surface_normals, + surface_neighbors_normals, + surface_areas, + surface_neighbors_areas, + pos_normals_com, + s_grid, + model, + inlet_velocity, + air_density, + ): + """ + Global parameters: For this particular case, the model was trained on single velocity/density values + across all simulations. Hence, global_params_values and global_params_reference are the same. + """ + global_params_values = torch.cat( + (inlet_velocity, air_density), axis=1 + ) # (1, 2) + global_params_values = torch.unsqueeze(global_params_values, -1) # (1, 2, 1) + + global_params_reference = torch.cat( + (inlet_velocity, air_density), axis=1 + ) # (1, 2) + global_params_reference = torch.unsqueeze( + global_params_reference, -1 + ) # (1, 2, 1) + + if self.dist.world_size == 1: + geo_encoding_local = model.geo_encoding_local( + geo_encoding, surface_mesh_centers, s_grid, mode="surface" + ) + else: + geo_encoding_local = model.module.geo_encoding_local( + geo_encoding, surface_mesh_centers, s_grid, mode="surface" + ) + + pos_encoding = pos_normals_com + surface_areas = torch.unsqueeze(surface_areas, -1) + surface_neighbors_areas = torch.unsqueeze(surface_neighbors_areas, -1) + + if self.dist.world_size == 1: + pos_encoding = model.position_encoder(pos_encoding, eval_mode="surface") + tpredictions_batch = model.calculate_solution_with_neighbors( + surface_mesh_centers, + geo_encoding_local, + pos_encoding, + surface_mesh_neighbors, + surface_normals, + surface_neighbors_normals, + surface_areas, + surface_neighbors_areas, + global_params_values, + global_params_reference, + ) + else: + pos_encoding = model.module.position_encoder( + pos_encoding, eval_mode="surface" + ) + tpredictions_batch = model.module.calculate_solution_with_neighbors( + surface_mesh_centers, + geo_encoding_local, + pos_encoding, + surface_mesh_neighbors, + surface_normals, + surface_neighbors_normals, + surface_areas, + surface_neighbors_areas, + global_params_values, + global_params_reference, + ) + + return tpredictions_batch + + @torch.no_grad() + def compute_solution_in_volume( + self, + geo_encoding, + volume_mesh_centers, + sdf_nodes, + pos_enc_closest, + pos_normals_com, + p_grid, + model, + use_sdf_basis, + inlet_velocity, + air_density, + ): + ## Global parameters + global_params_values = torch.cat( + (inlet_velocity, air_density), axis=1 + ) # (1, 2) + global_params_values = torch.unsqueeze(global_params_values, -1) # (1, 2, 1) + + global_params_reference = torch.cat( + (inlet_velocity, air_density), axis=1 + ) # (1, 2) + global_params_reference = torch.unsqueeze( + global_params_reference, -1 + ) # (1, 2, 1) + + if self.dist.world_size == 1: + geo_encoding_local = model.geo_encoding_local( + geo_encoding, volume_mesh_centers, p_grid, mode="volume" + ) + else: + geo_encoding_local = model.module.geo_encoding_local( + geo_encoding, volume_mesh_centers, p_grid, mode="volume" + ) + if use_sdf_basis: + pos_encoding = torch.cat( + (sdf_nodes, pos_enc_closest, pos_normals_com), axis=-1 + ) + else: + pos_encoding = pos_normals_com + + if self.dist.world_size == 1: + pos_encoding = model.position_encoder(pos_encoding, eval_mode="volume") + tpredictions_batch = model.calculate_solution( + volume_mesh_centers, + geo_encoding_local, + pos_encoding, + global_params_values, + global_params_reference, + num_sample_points=self.stencil_size, + eval_mode="volume", + ) + else: + pos_encoding = model.module.position_encoder( + pos_encoding, eval_mode="volume" + ) + tpredictions_batch = model.module.calculate_solution( + volume_mesh_centers, + geo_encoding_local, + pos_encoding, + global_params_values, + global_params_reference, + num_sample_points=self.stencil_size, + eval_mode="volume", + ) + return tpredictions_batch + + +if __name__ == "__main__": + OmegaConf.register_new_resolver("eval", eval) + with initialize(version_base="1.3", config_path="conf"): + cfg = compose(config_name="config") + + DistributedManager.initialize() + dist = DistributedManager() + + if dist.world_size > 1: + torch.distributed.barrier() + + input_path = cfg.eval.test_path + dirnames = get_filenames(input_path) + dev_id = torch.cuda.current_device() + num_files = int(len(dirnames) / 8) + dirnames_per_gpu = dirnames[int(num_files * dev_id) : int(num_files * (dev_id + 1))] + + domino = dominoInference(cfg, dist, False) + domino.initialize_model( + model_path="/lustre/models/DoMINO.0.7.pt" + ) ## Replace the model path with location of the trained model + + for count, dirname in enumerate(dirnames_per_gpu): + # print(f"Processing file {dirname}") + filepath = os.path.join(input_path, dirname) + + STREAM_VELOCITY = 30.0 + AIR_DENSITY = 1.205 + + # Neighborhood points sampled for evaluation, tradeoff between accuracy and speed + STENCIL_SIZE = ( + 7 # Higher stencil size -> more accuracy but more evaluation time + ) + + domino.set_stl_path(filepath) + domino.set_stream_velocity(STREAM_VELOCITY) + domino.set_stencil_size(STENCIL_SIZE) + + domino.read_stl() + + domino.initialize_data_processor() + + # Calculate geometry encoding + domino.compute_geo_encoding() + + # Calculate volume solutions + domino.compute_volume_solutions( + num_sample_points=10_256_000, plot_solutions=False + ) + + # Calculate surface solutions + domino.compute_surface_solutions() + domino.compute_forces() + out_dict = domino.get_out_dict() + + print( + "Dirname:", + dirname, + "Drag:", + out_dict["drag_force"], + "Lift:", + out_dict["lift_force"], + ) + vtp_path = f"/lustre/snidhan/physicsnemo-work/domino-global-param-runs/stl-results/pred_{dirname}_4.vtp" + domino.mesh_stl.save(vtp_path) + reader = vtk.vtkXMLPolyDataReader() + reader.SetFileName(f"{vtp_path}") + reader.Update() + polydata_surf = reader.GetOutput() + + surfParam_vtk = numpy_support.numpy_to_vtk( + out_dict["pressure_surface"][0].cpu().numpy() + ) + surfParam_vtk.SetName(f"Pressure") + polydata_surf.GetCellData().AddArray(surfParam_vtk) + + surfParam_vtk = numpy_support.numpy_to_vtk( + out_dict["wall-shear-stress"][0].cpu().numpy() + ) + surfParam_vtk.SetName(f"Wall-shear-stress") + polydata_surf.GetCellData().AddArray(surfParam_vtk) + + write_to_vtp(polydata_surf, vtp_path) + exit() diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index a85cc7df86..3c6acc3ccd 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -15,1572 +15,620 @@ # limitations under the License. """ -This code defines a standalone distributed inference pipeline the DoMINO model. -This inference pipeline can be used to evaluate the model given an STL and -an inflow speed. The pre-trained model checkpoint can be specified in this script -or inferred from the config file. The results are calculated on a point cloud -sampled in the volume around the STL and on the surface of the STL. They are stored -in a dictionary, which can be written out for visualization. +This code shows how to use a trained DoMINO model, with it's corresponding +preprocessing pipeline, to infer values on and around an STL mesh file. + +This script uses the meshes from the DrivaerML dataset, however, the logic +is largely the same. As an overview: +- Load the model +- Set up the preprocessor +- Loop over meshes +- In each mesh, sample random points on the surface, volume, or both +- Preprocess the points and run them through the model +- Process the STL mesh centers, too +- Collect the results and return +- Save the results to file. """ -import os import time +import os +import re +from typing import Literal, Any -import hydra, re -from hydra import compose, initialize +import apex +import numpy as np +import hydra from hydra.utils import to_absolute_path from omegaconf import DictConfig, OmegaConf - -import numpy as np import torch -from physicsnemo.models.domino.model import DoMINO -from physicsnemo.utils.domino.utils import ( - unnormalize, - create_directory, - nd_interpolator, - get_filenames, - write_to_vtp, -) -from torch.cuda.amp import autocast -from torch.nn.parallel import DistributedDataParallel -from physicsnemo.distributed import DistributedManager - -from numpy.typing import NDArray -from typing import Any, Iterable, List, Literal, Mapping, Optional, Union, Callable -import warp as wp -from pathlib import Path -import pandas as pd -import matplotlib.pyplot as plt -import pyvista as pv - -try: - from physicsnemo.sym.geometry.tessellation import Tessellation - - SYM_AVAILABLE = True -except ImportError: - SYM_AVAILABLE = False - - -def combine_stls(stl_path, stl_files): - meshes = [] - combined_mesh = pv.PolyData() - for file in stl_files: - if ".stl" in file and "single_solid" not in file: - stl_file_path = os.path.join(stl_path, file) - reader = pv.get_reader(stl_file_path) - mesh_stl = reader.read() - combined_mesh = combined_mesh.merge(mesh_stl) - # meshes.append(mesh_stl) - break - # combined_mesh = pv.merge(meshes) - return combined_mesh - - -def plot(truth, prediction, var, save_path, axes_titles=None, plot_error=True): - if plot_error: - c = 3 - else: - c = 2 - fig, axes = plt.subplots(1, c, figsize=(15, 5)) - error = truth - prediction - # Plot Truth - im = axes[0].imshow( - truth, - cmap="jet", - vmax=np.ma.masked_invalid(truth).max(), - vmin=np.ma.masked_invalid(truth).min(), - ) - axes[0].axis("off") - cbar = fig.colorbar(im, ax=axes[0], orientation="vertical") - cbar.ax.tick_params(labelsize=12) - if axes_titles is None: - axes[0].set_title(f"{var} Truth") - else: - axes[0].set_title(axes_titles[0]) - - # Plot Predicted - im = axes[1].imshow( - prediction, - cmap="jet", - vmax=np.ma.masked_invalid(prediction).max(), - vmin=np.ma.masked_invalid(prediction).min(), - ) - axes[1].axis("off") - cbar = fig.colorbar(im, ax=axes[1], orientation="vertical") - cbar.ax.tick_params(labelsize=12) - if axes_titles is None: - axes[1].set_title(f"{var} Predicted") - else: - axes[1].set_title(axes_titles[1]) - - if plot_error: - # Plot Error - im = axes[2].imshow( - error, - cmap="jet", - vmax=np.ma.masked_invalid(error).max(), - vmin=np.ma.masked_invalid(error).min(), - ) - axes[2].axis("off") - cbar = fig.colorbar(im, ax=axes[2], orientation="vertical") - cbar.ax.tick_params(labelsize=12) - if axes_titles is None: - axes[2].set_title(f"{var} Error") - else: - axes[2].set_title(axes_titles[2]) - - MAE = np.mean(np.ma.masked_invalid((error))) - - if MAE: - fig.suptitle(f"MAE {MAE}", fontsize=18, x=0.5) - - plt.tight_layout() - - path_to_save_path = os.path.join(save_path) - plt.savefig(path_to_save_path, bbox_inches="tight", pad_inches=0.1) - plt.close() - - -@wp.kernel -def _bvh_query_distance( - mesh: wp.uint64, - points: wp.array(dtype=wp.vec3f), - max_dist: wp.float32, - sdf: wp.array(dtype=wp.float32), - sdf_hit_point: wp.array(dtype=wp.vec3f), - sdf_hit_point_id: wp.array(dtype=wp.int32), -): - """ - Computes the signed distance from each point in the given array `points` - to the mesh represented by `mesh`,within the maximum distance `max_dist`, - and stores the result in the array `sdf`. - - Parameters: - mesh (wp.uint64): The identifier of the mesh. - points (wp.array): An array of 3D points for which to compute the - signed distance. - max_dist (wp.float32): The maximum distance within which to search - for the closest point on the mesh. - sdf (wp.array): An array to store the computed signed distances. - sdf_hit_point (wp.array): An array to store the computed hit points. - sdf_hit_point_id (wp.array): An array to store the computed hit point ids. - - Returns: - None - """ - tid = wp.tid() - - res = wp.mesh_query_point_sign_winding_number(mesh, points[tid], max_dist) - - mesh_ = wp.mesh_get(mesh) - - p0 = mesh_.points[mesh_.indices[3 * res.face + 0]] - p1 = mesh_.points[mesh_.indices[3 * res.face + 1]] - p2 = mesh_.points[mesh_.indices[3 * res.face + 2]] - - p_closest = res.u * p0 + res.v * p1 + (1.0 - res.u - res.v) * p2 - - sdf[tid] = res.sign * wp.abs(wp.length(points[tid] - p_closest)) - sdf_hit_point[tid] = p_closest - sdf_hit_point_id[tid] = res.face - - -def signed_distance_field( - mesh_vertices: list[tuple[float, float, float]], - mesh_indices: NDArray[float], - input_points: list[tuple[float, float, float]], - max_dist: float = 1e8, - include_hit_points: bool = False, - include_hit_points_id: bool = False, - device: int = 0, -) -> wp.array: - """ - Computes the signed distance field (SDF) for a given mesh and input points. - - Parameters: - ---------- - mesh_vertices (list[tuple[float, float, float]]): List of vertices defining the mesh. - mesh_indices (list[tuple[int, int, int]]): List of indices defining the triangles of the mesh. - input_points (list[tuple[float, float, float]]): List of input points for which to compute the SDF. - max_dist (float, optional): Maximum distance within which to search for - the closest point on the mesh. Default is 1e8. - include_hit_points (bool, optional): Whether to include hit points in - the output. Default is False. - include_hit_points_id (bool, optional): Whether to include hit point - IDs in the output. Default is False. - - Returns: - ------- - wp.array: An array containing the computed signed distance field. - - Example: - ------- - >>> mesh_vertices = [(0, 0, 0), (1, 0, 0), (0, 1, 0)] - >>> mesh_indices = np.array((0, 1, 2)) - >>> input_points = [(0.5, 0.5, 0.5)] - >>> signed_distance_field(mesh_vertices, mesh_indices, input_points).numpy() - Module ... - array([0.5], dtype=float32) - """ - - wp.init() - # mesh = wp.Mesh( - # wp.array(mesh_vertices.cpu(), dtype=wp.vec3), wp.array(mesh_indices.cpu(), dtype=wp.int32) - # ) - mesh = wp.Mesh( - wp.from_torch(mesh_vertices, dtype=wp.vec3), - wp.from_torch(mesh_indices, dtype=wp.int32), - ) - - sdf_points = wp.from_torch(input_points, dtype=wp.vec3) - sdf = wp.zeros(shape=sdf_points.shape, dtype=wp.float32) - sdf_hit_point = wp.zeros(shape=sdf_points.shape, dtype=wp.vec3f) - sdf_hit_point_id = wp.zeros(shape=sdf_points.shape, dtype=wp.int32) - wp.launch( - kernel=_bvh_query_distance, - dim=len(sdf_points), - inputs=[mesh.id, sdf_points, max_dist, sdf, sdf_hit_point, sdf_hit_point_id], - ) - if include_hit_points and include_hit_points_id: - return ( - wp.to_torch(sdf), - wp.to_torch(sdf_hit_point), - wp.to_torch(sdf_hit_point_id), - ) - elif include_hit_points: - return (wp.to_torch(sdf), wp.to_torch(sdf_hit_point)) - elif include_hit_points_id: - return (wp.to_torch(sdf), wp.to_torch(sdf_hit_point_id)) - else: - return wp.to_torch(sdf) - - -def shuffle_array_torch(surface_vertices, geometry_points, device): - idx = torch.unsqueeze( - torch.randperm(surface_vertices.shape[0])[:geometry_points], -1 - ).to(device) - idx = idx.repeat(1, 3) - surface_sampled = torch.gather(surface_vertices, 0, idx) - return surface_sampled - - -class inferenceDataPipe: - def __init__( - self, - device: int = 0, - grid_resolution: Optional[list] = [256, 96, 64], - normalize_coordinates: bool = False, - geom_points_sample: int = 300000, - positional_encoding: bool = False, - surface_vertices=None, - surface_indices=None, - surface_areas=None, - surface_centers=None, - use_sdf_basis=False, - ): - self.surface_vertices = surface_vertices - self.surface_indices = surface_indices - self.surface_areas = surface_areas - self.surface_centers = surface_centers - self.device = device - self.grid_resolution = grid_resolution - self.normalize_coordinates = normalize_coordinates - self.geom_points_sample = geom_points_sample - self.positional_encoding = positional_encoding - self.use_sdf_basis = use_sdf_basis - torch.manual_seed(int(42 + torch.cuda.current_device())) - self.data_dict = {} - - def clear_dict(self): - del self.data_dict - - def clear_volume_dict(self): - del self.data_dict["volume_mesh_centers"] - del self.data_dict["pos_enc_closest"] - del self.data_dict["pos_normals_com"] - del self.data_dict["sdf_nodes"] - - def create_grid_torch(self, mx, mn, nres): - start_time = time.time() - dx = torch.linspace(mn[0], mx[0], nres[0], device=self.device) - dy = torch.linspace(mn[1], mx[1], nres[1], device=self.device) - dz = torch.linspace(mn[2], mx[2], nres[2], device=self.device) - - xv, yv, zv = torch.meshgrid(dx, dy, dz, indexing="ij") - xv = torch.unsqueeze(xv, -1) - yv = torch.unsqueeze(yv, -1) - zv = torch.unsqueeze(zv, -1) - grid = torch.cat((xv, yv, zv), axis=-1) - return grid - - def process_surface_mesh(self, bounding_box=None, bounding_box_surface=None): - # Use coarse mesh to calculate SDF - surface_vertices = self.surface_vertices - surface_indices = self.surface_indices - surface_areas = self.surface_areas - surface_centers = self.surface_centers - - start_time = time.time() - - if bounding_box is None: - # Create a bounding box - s_max = torch.amax(surface_vertices, 0) - s_min = torch.amin(surface_vertices, 0) - - c_max = s_max + (s_max - s_min) / 2 - c_min = s_min - (s_max - s_min) / 2 - c_min[2] = s_min[2] - else: - c_min = bounding_box[0] - c_max = bounding_box[1] - - if bounding_box_surface is None: - # Create a bounding box - s_max = torch.amax(surface_vertices, 0) - s_min = torch.amin(surface_vertices, 0) - - surf_max = s_max + (s_max - s_min) / 2 - surf_min = s_min - (s_max - s_min) / 2 - surf_min[2] = s_min[2] - else: - surf_min = bounding_box_surface[0] - surf_max = bounding_box_surface[1] - - nx, ny, nz = self.grid_resolution - - grid = self.create_grid_torch(c_max, c_min, self.grid_resolution) - grid_reshaped = torch.reshape(grid, (nx * ny * nz, 3)) - - # SDF on grid - sdf_grid = signed_distance_field( - surface_vertices, surface_indices, grid_reshaped, device=self.device - ) - sdf_grid = torch.reshape(sdf_grid, (nx, ny, nz)) - - surface_areas = torch.unsqueeze(surface_areas, -1) - center_of_mass = torch.sum(surface_centers * surface_areas, 0) / torch.sum( - surface_areas - ) +DISABLE_RMM = os.environ.get("DISABLE_RMM", False) +if not DISABLE_RMM: + import rmm + from rmm.allocators.torch import rmm_torch_allocator - s_grid = self.create_grid_torch(surf_max, surf_min, self.grid_resolution) - surf_grid_reshaped = torch.reshape(s_grid, (nx * ny * nz, 3)) + rmm.reinitialize(pool_allocator=True) + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - surf_sdf_grid = signed_distance_field( - surface_vertices, surface_indices, surf_grid_reshaped, device=self.device - ) - surf_sdf_grid = torch.reshape(surf_sdf_grid, (nx, ny, nz)) - - if self.normalize_coordinates: - grid = 2.0 * (grid - c_min) / (c_max - c_min) - 1.0 - s_grid = 2.0 * (s_grid - surf_min) / (surf_max - surf_min) - 1.0 - - surface_vertices = torch.unsqueeze(surface_vertices, 0) - grid = torch.unsqueeze(grid, 0) - s_grid = torch.unsqueeze(s_grid, 0) - sdf_grid = torch.unsqueeze(sdf_grid, 0) - surf_sdf_grid = torch.unsqueeze(surf_sdf_grid, 0) - max_min = [c_min, c_max] - surf_max_min = [surf_min, surf_max] - center_of_mass = center_of_mass - - return ( - surface_vertices, - grid, - sdf_grid, - max_min, - s_grid, - surf_sdf_grid, - surf_max_min, - center_of_mass, - ) - - def sample_stl_points( - self, - num_points, - stl_centers, - stl_area, - stl_normals, - max_min, - center_of_mass, - bounding_box=None, - stencil_size=7, - ): - if bounding_box is not None: - c_max = bounding_box[1] - c_min = bounding_box[0] - else: - c_min = max_min[0] - c_max = max_min[1] - - start_time = time.time() - - nx, ny, nz = self.grid_resolution - - idx = np.arange(stl_centers.shape[0]) - # np.random.shuffle(idx) - if num_points is not None: - idx = idx[:num_points] - - surface_coordinates = stl_centers - surface_normals = stl_normals - surface_area = stl_area - - if stencil_size > 1: - interp_func = KDTree(surface_coordinates) - dd, ii = interp_func.query(surface_coordinates, k=stencil_size) - surface_neighbors = surface_coordinates[ii] - surface_neighbors = surface_neighbors[:, 1:] + 1e-6 - surface_neighbors_normals = surface_normals[ii] - surface_neighbors_normals = surface_neighbors_normals[:, 1:] - surface_neighbors_area = surface_area[ii] - surface_neighbors_area = surface_neighbors_area[:, 1:] - else: - surface_neighbors = np.expand_dims(surface_coordinates, 1) + 1e-6 - surface_neighbors_normals = np.expand_dims(surface_normals, 1) - surface_neighbors_area = np.expand_dims(surface_area, 1) - - surface_coordinates = torch.from_numpy(surface_coordinates).to(self.device) - surface_normals = torch.from_numpy(surface_normals).to(self.device) - surface_area = torch.from_numpy(surface_area).to(self.device) - surface_neighbors = torch.from_numpy(surface_neighbors).to(self.device) - surface_neighbors_normals = torch.from_numpy(surface_neighbors_normals).to( - self.device - ) - surface_neighbors_area = torch.from_numpy(surface_neighbors_area).to( - self.device - ) - - pos_normals_com = surface_coordinates - center_of_mass - - if self.normalize_coordinates: - surface_coordinates = ( - 2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0 - ) - surface_neighbors = ( - 2.0 * (surface_neighbors - c_min) / (c_max - c_min) - 1.0 - ) - - surface_coordinates = surface_coordinates[idx] - surface_area = surface_area[idx] - surface_normals = surface_normals[idx] - pos_normals_com = pos_normals_com[idx] - surface_coordinates = torch.unsqueeze(surface_coordinates, 0) - surface_normals = torch.unsqueeze(surface_normals, 0) - surface_area = torch.unsqueeze(surface_area, 0) - pos_normals_com = torch.unsqueeze(pos_normals_com, 0) - - surface_neighbors = surface_neighbors[idx] - surface_neighbors_normals = surface_neighbors_normals[idx] - surface_neighbors_area = surface_neighbors_area[idx] - surface_neighbors = torch.unsqueeze(surface_neighbors, 0) - surface_neighbors_normals = torch.unsqueeze(surface_neighbors_normals, 0) - surface_neighbors_area = torch.unsqueeze(surface_neighbors_area, 0) - - scaling_factors = [c_max, c_min] - - return ( - surface_coordinates, - surface_neighbors, - surface_normals, - surface_neighbors_normals, - surface_area, - surface_neighbors_area, - pos_normals_com, - scaling_factors, - idx, - ) - - def sample_points_on_surface( - self, - num_points_surf, - max_min, - center_of_mass, - stl_path, - bounding_box=None, - stencil_size=7, - ): - if bounding_box is not None: - c_max = bounding_box[1] - c_min = bounding_box[0] - else: - c_min = max_min[0] - c_max = max_min[1] - - start_time = time.time() - - nx, ny, nz = self.grid_resolution - - obj = Tessellation.from_stl(stl_path, airtight=False) - - boundary = obj.sample_boundary(num_points_surf) - surface_coordinates = np.concatenate( - [ - np.float32(boundary["x"]), - np.float32(boundary["y"]), - np.float32(boundary["z"]), - ], - axis=1, - ) - surface_normals = np.concatenate( - [ - np.float32(boundary["normal_x"]), - np.float32(boundary["normal_y"]), - np.float32(boundary["normal_z"]), - ], - axis=1, - ) - - surface_area = np.float32(boundary["area"]) - - interp_func = KDTree(surface_coordinates) - dd, ii = interp_func.query(surface_coordinates, k=stencil_size) - surface_neighbors = surface_coordinates[ii] - surface_neighbors = surface_neighbors[:, 1:] - surface_neighbors_normals = surface_normals[ii] - surface_neighbors_normals = surface_neighbors_normals[:, 1:] - surface_neighbors_area = surface_area[ii] - surface_neighbors_area = surface_neighbors_area[:, 1:] - - surface_coordinates = torch.from_numpy(surface_coordinates).to(self.device) - surface_normals = torch.from_numpy(surface_normals).to(self.device) - surface_area = torch.from_numpy(surface_area).to(self.device) - surface_neighbors = torch.from_numpy(surface_neighbors).to(self.device) - surface_neighbors_normals = torch.from_numpy(surface_neighbors_normals).to( - self.device - ) - surface_neighbors_area = torch.from_numpy(surface_neighbors_area).to( - self.device - ) - - pos_normals_com = surface_coordinates - center_of_mass - - if self.normalize_coordinates: - surface_coordinates = ( - 2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0 - ) - - surface_coordinates = torch.unsqueeze(surface_coordinates, 0) - surface_normals = torch.unsqueeze(surface_normals, 0) - surface_area = torch.unsqueeze(surface_area, 0) - pos_normals_com = torch.unsqueeze(pos_normals_com, 0) - - surface_neighbors = torch.unsqueeze(surface_neighbors, 0) - surface_neighbors_normals = torch.unsqueeze(surface_neighbors_normals, 0) - surface_neighbors_area = torch.unsqueeze(surface_neighbors_area, 0) - - scaling_factors = [c_max, c_min] - - return ( - surface_coordinates, - surface_neighbors, - surface_normals, - surface_neighbors_normals, - surface_area, - surface_neighbors_area, - pos_normals_com, - scaling_factors, - ) - - def sample_points_in_volume( - self, num_points_vol, max_min, center_of_mass, bounding_box=None - ): - if bounding_box is not None: - c_max = bounding_box[1] - c_min = bounding_box[0] - else: - c_min = max_min[0] - c_max = max_min[1] - - start_time = time.time() - - nx, ny, nz = self.grid_resolution - for k in range(10): - if k > 0: - num_pts_vol = num_points_vol - int(volume_coordinates.shape[0] / 2) - else: - num_pts_vol = int(1.25 * num_points_vol) - - volume_coordinates_sub = (c_max - c_min) * torch.rand( - num_pts_vol, 3, device=self.device, dtype=torch.float32 - ) + c_min - - sdf_nodes, sdf_node_closest_point = signed_distance_field( - self.surface_vertices, - self.surface_indices, - volume_coordinates_sub, - include_hit_points=True, - device=self.device, - ) - sdf_nodes = torch.unsqueeze(sdf_nodes, -1) - - idx = torch.unsqueeze(torch.where((sdf_nodes > 0))[0], -1) - idx = idx.repeat(1, volume_coordinates_sub.shape[1]) - if k == 0: - volume_coordinates = torch.gather(volume_coordinates_sub, 0, idx) - else: - volume_coordinates_1 = torch.gather(volume_coordinates_sub, 0, idx) - volume_coordinates = torch.cat( - (volume_coordinates, volume_coordinates_1), axis=0 - ) +import torchinfo +import torch.distributed as dist +from torch.amp import GradScaler, autocast +from torch.nn.parallel import DistributedDataParallel +from torch.utils.data import DataLoader +from torch.utils.data.distributed import DistributedSampler +from torch.utils.tensorboard import SummaryWriter +from nvtx import annotate as nvtx_annotate +import torch.cuda.nvtx as nvtx - if volume_coordinates.shape[0] > num_points_vol: - volume_coordinates = volume_coordinates[:num_points_vol] - break +from physicsnemo.distributed import DistributedManager +from physicsnemo.launch.utils import load_checkpoint, save_checkpoint +from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper - sdf_nodes, sdf_node_closest_point = signed_distance_field( - self.surface_vertices, - self.surface_indices, - volume_coordinates, - include_hit_points=True, - device=self.device, - ) - sdf_nodes = torch.unsqueeze(sdf_nodes, -1) +from physicsnemo.datapipes.cae.domino_datapipe2 import ( + DoMINODataPipe, + create_domino_dataset, +) +from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( + DrivaerMLDataset, +) - pos_normals_closest = volume_coordinates - sdf_node_closest_point - pos_normals_com = volume_coordinates - center_of_mass +from physicsnemo.models.domino.model import DoMINO +from physicsnemo.utils.domino.utils import sample_points_on_mesh - if self.normalize_coordinates: - volume_coordinates = ( - 2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0 - ) +from utils import ScalingFactors - volume_coordinates = torch.unsqueeze(volume_coordinates, 0) - pos_normals_com = torch.unsqueeze(pos_normals_com, 0) +# This is included for GPU memory tracking: +from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo +import time - if self.use_sdf_basis: - pos_normals_closest = torch.unsqueeze(pos_normals_closest, 0) - sdf_nodes = torch.unsqueeze(sdf_nodes, 0) - scaling_factors = [c_max, c_min] - return ( - volume_coordinates, - pos_normals_com, - pos_normals_closest, - sdf_nodes, - scaling_factors, - ) +# Initialize NVML +nvmlInit() -class dominoInference: - def __init__( - self, - cfg: DictConfig, - dist: None, - cached_geo_encoding: bool = False, - ): - self.cfg = cfg - self.dist = dist - self.stream_velocity = None - self.stencil_size = None - self.stl_path = None - self.stl_vertices = None - self.stl_centers = None - self.surface_areas = None - self.mesh_indices_flattened = None - self.length_scale = 1.0 - if self.dist is None: - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - else: - self.device = self.dist.device +from physicsnemo.utils.profiling import profile, Profiler - self.air_density = torch.full((1, 1), 1.205, dtype=torch.float32).to( - self.device - ) - ( - self.num_vol_vars, - self.num_surf_vars, - self.num_global_features, - ) = self.get_num_variables() - self.model = None - self.grid_resolution = torch.tensor(self.cfg.model.interp_res).to(self.device) - self.vol_factors = None - self.bounding_box_min_max = None - self.bounding_box_surface_min_max = None - self.center_of_mass = None - self.grid = None - self.geometry_encoding = None - self.geometry_encoding_surface = None - self.cached_geo_encoding = cached_geo_encoding - self.out_dict = {} - - def get_geometry_encoding(self): - return self.geometry_encoding - - def get_geometry_encoding_surface(self): - return self.geometry_encoding_surface - - def get_out_dict(self): - return self.out_dict - - def clear_out_dict(self): - self.out_dict.clear() - - def initialize_data_processor(self): - self.ifp = inferenceDataPipe( - device=self.device, - surface_vertices=self.stl_vertices, - surface_indices=self.mesh_indices_flattened, - surface_areas=self.surface_areas, - surface_centers=self.stl_centers, - grid_resolution=self.grid_resolution, - normalize_coordinates=True, - geom_points_sample=300000, - positional_encoding=False, - use_sdf_basis=self.cfg.model.use_sdf_in_basis_func, - ) - def load_bounding_box(self): - if ( - self.cfg.data.bounding_box.min is not None - and self.cfg.data.bounding_box.max is not None - ): - c_min = torch.from_numpy( - np.array(self.cfg.data.bounding_box.min, dtype=np.float32) - ).to(self.device) - c_max = torch.from_numpy( - np.array(self.cfg.data.bounding_box.max, dtype=np.float32) - ).to(self.device) - self.bounding_box_min_max = [c_min, c_max] - - if ( - self.cfg.data.bounding_box_surface.min is not None - and self.cfg.data.bounding_box_surface.max is not None - ): - c_min = torch.from_numpy( - np.array(self.cfg.data.bounding_box_surface.min, dtype=np.float32) - ).to(self.device) - c_max = torch.from_numpy( - np.array(self.cfg.data.bounding_box_surface.max, dtype=np.float32) - ).to(self.device) - self.bounding_box_surface_min_max = [c_min, c_max] - - def load_volume_scaling_factors(self): - scaling_param_path = self.cfg.eval.scaling_param_path - vol_factors_path = os.path.join( - scaling_param_path, "volume_scaling_factors.npy" - ) +from loss import compute_loss_dict +from utils import get_num_vars - vol_factors = np.load(vol_factors_path, allow_pickle=True) - vol_factors = torch.from_numpy(vol_factors).to(self.device) - return vol_factors +def inference_on_single_stl( + stl_coordinates: torch.Tensor, + stl_faces: torch.Tensor, + model: DoMINO, + datapipe: DoMINODataPipe, + batch_size: int, + total_points: int, + gpu_handle: int | None = None, + logger: PythonLogger | None = None, +): + """ + Perform model inference on a single STL mesh. - def load_surface_scaling_factors(self): - scaling_param_path = self.cfg.eval.scaling_param_path - surf_factors_path = os.path.join( - scaling_param_path, "surface_scaling_factors.npy" - ) + This function will take the input mesh + faces and + then sample the surface and volume to produce the model outputs + at `total_points` locations in batches of `batch_size`. - surf_factors = np.load(surf_factors_path, allow_pickle=True) - surf_factors = torch.from_numpy(surf_factors).to(self.device) - - return surf_factors - - def read_stl(self): - stl_files = get_filenames(self.stl_path) - mesh_stl = combine_stls(self.stl_path, stl_files) - if self.cfg.eval.refine_stl: - mesh_stl = mesh_stl.subdivide( - nsub=2, subfilter="linear" - ) # .smooth(n_iter=20) - stl_vertices = mesh_stl.points - length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)) - stl_centers = mesh_stl.cell_centers().points - # Assuming triangular elements - stl_faces = np.array(mesh_stl.faces).reshape((-1, 4))[:, 1:] - mesh_indices_flattened = stl_faces.flatten() - - surface_areas = mesh_stl.compute_cell_sizes( - length=False, area=True, volume=False - ) - surface_areas = np.array(surface_areas.cell_data["Area"]) - surface_normals = np.array(mesh_stl.cell_normals, dtype=np.float32) - self.stl_vertices = torch.from_numpy(np.float32(stl_vertices)).to(self.device) - self.stl_centers = torch.from_numpy(np.float32(stl_centers)).to(self.device) - self.surface_areas = torch.from_numpy(np.float32(surface_areas)).to(self.device) - self.stl_normals = -1.0 * torch.from_numpy(np.float32(surface_normals)).to( - self.device - ) - self.mesh_indices_flattened = torch.from_numpy( - np.int32(mesh_indices_flattened) - ).to(self.device) - self.length_scale = length_scale - self.mesh_stl = mesh_stl - - def read_stl_trimesh( - self, stl_vertices, stl_faces, stl_centers, surface_normals, surface_areas - ): - mesh_indices_flattened = stl_faces.flatten() - length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)) - self.stl_vertices = torch.from_numpy(stl_vertices).to(self.device) - self.stl_centers = torch.from_numpy(stl_centers).to(self.device) - self.stl_normals = -1.0 * torch.from_numpy(surface_normals).to(self.device) - self.surface_areas = torch.from_numpy(surface_areas).to(self.device) - self.mesh_indices_flattened = torch.from_numpy( - np.int32(mesh_indices_flattened) - ).to(self.device) - self.length_scale = length_scale - - def get_num_variables(self): - volume_variable_names = list(self.cfg.variables.volume.solution.keys()) - num_vol_vars = 0 - for j in volume_variable_names: - if self.cfg.variables.volume.solution[j] == "vector": - num_vol_vars += 3 - else: - num_vol_vars += 1 - - surface_variable_names = list(self.cfg.variables.surface.solution.keys()) - num_surf_vars = 0 - for j in surface_variable_names: - if self.cfg.variables.surface.solution[j] == "vector": - num_surf_vars += 3 - else: - num_surf_vars += 1 - - num_global_features = 0 - global_params_names = list(cfg.variables.global_parameters.keys()) - for param in global_params_names: - if cfg.variables.global_parameters[param].type == "vector": - num_global_features += len( - cfg.variables.global_parameters[param].reference + Args: + stl_coordinates: The coordinates of the STL mesh. + stl_faces: The faces of the STL mesh. + model: The model to use for inference. + datapipe: The datapipe to use for preprocessing. + batch_size: The batch size to use for inference. + total_points: The total number of points to process. + gpu_handle: The GPU handle to use for inference. + logger: The logger to use for logging. + """ + device = stl_coordinates.device + batch_start_time = time.perf_counter() + ###################################################### + # The IO only reads in "stl_faces" and "stl_coordinates". + # "stl_areas" and "stl_centers" would be computed by + # pyvista on CPU - instead, we do it on the GPU + # right here. + ###################################################### + + # Center is a mean of the 3 vertices + triangle_vertices = stl_coordinates[stl_faces.reshape((-1, 3))] + stl_centers = triangle_vertices.mean(dim=-1) + ###################################################### + # Area we compute from the cross product of two sides: + ###################################################### + d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0] + d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0] + stl_mesh_normals = torch.linalg.cross(d1, d2, dim=1) + normals_norm = torch.linalg.norm(stl_mesh_normals, dim=1) + stl_mesh_normals = stl_mesh_normals / normals_norm.unsqueeze(1) + stl_areas = 0.5 * normals_norm + + ###################################################### + # For computing the points, we take those stl objects, + # sample in chunks of `batch_size` until we've + # accumulated `total_points` predictions. + ###################################################### + + batch_output_dict = {} + N = 2 + total_points_processed = 0 + + # Use these lists to build up the output tensors: + surface_results = [] + volume_results = [] + + while total_points_processed < total_points: + inner_loop_start_time = time.perf_counter() + + ###################################################### + # Create the dictionary as the preprocessing expects: + ###################################################### + inference_dict = { + "stl_coordinates": stl_coordinates, + "stl_faces": stl_faces, + "stl_centers": stl_centers, + "stl_areas": stl_areas, + } + + # If the surface data is part of the model, sample the surface: + + if datapipe.model_type == "surface" or datapipe.model_type == "combined": + ###################################################### + # This function will sample points on the STL surface + ###################################################### + sampled_points, sampled_faces, sampled_areas, sampled_normals = ( + sample_points_on_mesh( + stl_coordinates, + stl_faces, + batch_size, + mesh_normals=stl_mesh_normals, + mesh_areas=stl_areas, ) - elif cfg.variables.global_parameters[param].type == "scalar": - num_global_features += 1 - else: - raise ValueError(f"Unknown global parameter type") - - return num_vol_vars, num_surf_vars, num_global_features - - def initialize_model(self, model_path): - model = ( - DoMINO( - input_features=3, - output_features_vol=self.num_vol_vars, - output_features_surf=self.num_surf_vars, - global_features=self.num_global_features, - model_parameters=self.cfg.model, ) - .to(self.device) - .eval() - ) - model = torch.compile(model, disable=True) - - checkpoint_iter = torch.load( - to_absolute_path(model_path), map_location=self.dist.device - ) - - model.load_state_dict(checkpoint_iter) - - if self.dist is not None: - if self.dist.world_size > 1: - model = DistributedDataParallel( - model, - device_ids=[self.dist.local_rank], - output_device=self.dist.device, - broadcast_buffers=self.dist.broadcast_buffers, - find_unused_parameters=self.dist.find_unused_parameters, - gradient_as_bucket_view=True, - static_graph=True, - ) - - self.model = model - self.vol_factors = self.load_volume_scaling_factors() - self.surf_factors = self.load_surface_scaling_factors() - self.load_bounding_box() - def set_stream_velocity(self, stream_velocity): - self.stream_velocity = torch.full( - (1, 1), stream_velocity, dtype=torch.float32 - ).to(self.device) + inference_dict["surface_mesh_centers"] = sampled_points + inference_dict["surface_normals"] = sampled_normals + inference_dict["surface_areas"] = sampled_areas + inference_dict["surface_faces"] = sampled_faces - def set_stencil_size(self, stencil_size): - self.stencil_size = stencil_size + # If the volume data is part of the model, sample the volume: + if datapipe.model_type == "volume" or datapipe.model_type == "combined": + ###################################################### + # Build up volume points too with uniform sampling + # TODO - this doesn't filter points that are + # internal to the mesh + ###################################################### + c_min = datapipe.config.bounding_box_dims[1] + c_max = datapipe.config.bounding_box_dims[0] - def set_air_density(self, air_density): - self.air_density = torch.full((1, 1), air_density, dtype=torch.float32).to( - self.device - ) + sampled_volume_points = (c_max - c_min) * torch.rand( + batch_size, 3, device=device, dtype=torch.float32 + ) + c_min - def set_stl_path(self, filename): - self.stl_path = filename - - @torch.no_grad() - def compute_geo_encoding(self, cached_geom_path=None): - start_time = time.time() - - if not self.cached_geo_encoding: - ( - surface_vertices, - grid, - sdf_grid, - max_min, - s_grid, - surf_sdf_grid, - surf_max_min, - center_of_mass, - ) = self.ifp.process_surface_mesh( - self.bounding_box_min_max, self.bounding_box_surface_min_max - ) - if self.bounding_box_min_max is None: - self.bounding_box_min_max = max_min - if self.bounding_box_surface_min_max is None: - self.bounding_box_surface_min_max = surf_max_min - self.center_of_mass = center_of_mass - self.grid = grid - self.s_grid = s_grid - self.sdf_grid = sdf_grid - self.surf_sdf_grid = surf_sdf_grid - self.out_dict["sdf"] = sdf_grid - - geo_encoding, geo_encoding_surface = self.calculate_geometry_encoding( - surface_vertices, grid, sdf_grid, s_grid, surf_sdf_grid, self.model - ) + inference_dict["volume_mesh_centers"] = (sampled_volume_points,) + + ###################################################### + # Pre-process the data with the datapipe: + ###################################################### + preprocessed_data = datapipe.process_data(inference_dict) + + if datapipe.model_type == "volume" or datapipe.model_type == "combined": + ###################################################### + # Use the sign of the volume SDF to filter out points + # That are inside the STL mesh + ###################################################### + sdf_nodes = preprocessed_data["sdf_nodes"] + valid_volume_idx = sdf_nodes > 0 + preprocessed_data["volume_mesh_centers"] = preprocessed_data[ + "volume_mesh_centers" + ][valid_volume_idx] + + ###################################################### + # Add a batch dimension to the data_dict + # (normally this is added in __getitem__ of the datapipe) + ###################################################### + preprocessed_data = {k: v.unsqueeze(0) for k, v in preprocessed_data.items()} + + ###################################################### + # Forward pass through the model: + ###################################################### + with torch.no_grad(): + output_vol, output_surf = model(preprocessed_data) + + ###################################################### + # unnormalize the outputs with the datapipe + # Whatever settings are configured for normalizing the + # output fields - even though we don't have ground + # truth here - are reused to undo that for the predictions + ###################################################### + output_vol, output_surf = datapipe.unscale_model_outputs( + output_vol, output_surf + ) + + surface_results.append(output_surf) + volume_results.append(output_vol) + + total_points_processed += batch_size + + current_loop_time = time.perf_counter() + + logging_string = f"Device {device} processed {total_points_processed} points of {total_points}\n" + if gpu_handle is not None: + gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle) + gpu_memory_used = gpu_info.used / (1024**3) + logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" + + logging_string += f" Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n" + logging_string += f" iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n" + logging_string += f" Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second.\n" + + if logger is not None: + logger.info(logging_string) else: - out_dict_cached = torch.load(cached_geom_path, map_location=self.device) - self.bounding_box_min_max = out_dict_cached["bounding_box_min_max"] - self.grid = out_dict_cached["grid"] - self.sdf_grid = out_dict_cached["sdf_grid"] - self.center_of_mass = out_dict_cached["com"] - geo_encoding = out_dict_cached["geo_encoding"] - geo_encoding_surface = out_dict_cached["geo_encoding_surface"] - self.out_dict["sdf"] = self.sdf_grid - torch.cuda.synchronize() - print("Time taken for geo encoding = %f" % (time.time() - start_time)) - - self.geometry_encoding = geo_encoding - self.geometry_encoding_surface = geo_encoding_surface - - def compute_forces(self): - pressure = self.out_dict["pressure_surface"] - wall_shear = self.out_dict["wall-shear-stress"] - # sampling_indices = self.out_dict["sampling_indices"] - - surface_normals = self.stl_normals[self.sampling_indices] - surface_areas = self.surface_areas[self.sampling_indices] - - drag_force = torch.sum( - pressure[0, :, 0] * surface_normals[:, 0] * surface_areas - - wall_shear[0, :, 0] * surface_areas - ) - lift_force = torch.sum( - pressure[0, :, 0] * surface_normals[:, 2] * surface_areas - - wall_shear[0, :, 2] * surface_areas - ) + print(logging_string) + + ###################################################### + # Here at the end, get the values for the stl centers + # by updating the previous inference dict + # Only do this if the surface is part of the computation + # Comments are shorter here - it's a condensed version + # of the above logic. + ###################################################### + if datapipe.model_type == "surface" or datapipe.model_type == "combined": + stl_inference_dict = { + "stl_coordinates": stl_coordinates, + "stl_faces": stl_faces, + "stl_centers": stl_centers, + "stl_areas": stl_areas, + } + inference_dict["surface_mesh_centers"] = stl_centers + inference_dict["surface_normals"] = stl_mesh_normals + inference_dict["surface_areas"] = stl_areas + inference_dict["surface_faces"] = stl_faces + + # Just reuse the previous volume samples here if needed: + if datapipe.model_type == "combined": + inference_dict["volume_mesh_centers"] = sampled_volume_points + + # Preprocess: + preprocessed_data = datapipe.process_data(inference_dict) + + # Pull out the invalid volume points again, if needed: + if datapipe.model_type == "combined": + sdf_nodes = preprocessed_data["sdf_nodes"] + valid_volume_idx = sdf_nodes > 0 + preprocessed_data["volume_mesh_centers"] = preprocessed_data[ + "volume_mesh_centers" + ][valid_volume_idx] + + # Run the model forward: + with torch.no_grad(): + preprocessed_data = { + k: v.unsqueeze(0) for k, v in preprocessed_data.items() + } + _, output_surf = model(preprocessed_data) + + # Unnormalize the outputs: + _, stl_center_results = datapipe.unscale_model_outputs(None, output_surf) - self.out_dict["drag_force"] = drag_force - self.out_dict["lift_force"] = lift_force - - @torch.inference_mode() - def compute_surface_solutions(self, num_sample_points=None, plot_solutions=False): - total_time = 0.0 - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - - geo_encoding = self.geometry_encoding_surface - j = 0 - - with autocast(enabled=True): - start_event.record() - ( - surface_mesh_centers, - surface_neighbors, - surface_normals, - surface_neighbors_normals, - surface_areas, - surface_neighbors_areas, - pos_normals_com, - surf_scaling_factors, - sampling_indices, - ) = self.ifp.sample_stl_points( - num_sample_points, - self.stl_centers.cpu().numpy(), - self.surface_areas.cpu().numpy(), - self.stl_normals.cpu().numpy(), - max_min=self.bounding_box_surface_min_max, - center_of_mass=self.center_of_mass, - stencil_size=self.stencil_size, - ) - end_event.record() - end_event.synchronize() - cur_time = start_event.elapsed_time(end_event) / 1000.0 - print(f"sample_points_in_surface time (s): {cur_time:.4f}") - # vol_coordinates_all.append(volume_mesh_centers) - surface_coordinates_all = surface_mesh_centers - - inner_time = time.time() - start_event.record() - if num_sample_points == None: - point_batch_size = 512_000 - num_points = surface_coordinates_all.shape[1] - subdomain_points = int(np.floor(num_points / point_batch_size)) - surface_solutions = torch.zeros(1, num_points, self.num_surf_vars).to( - self.device - ) - for p in range(subdomain_points + 1): - start_idx = p * point_batch_size - end_idx = (p + 1) * point_batch_size - surface_solutions_batch = self.compute_solution_on_surface( - geo_encoding, - surface_mesh_centers[:, start_idx:end_idx], - surface_neighbors[:, start_idx:end_idx], - surface_normals[:, start_idx:end_idx], - surface_neighbors_normals[:, start_idx:end_idx], - surface_areas[:, start_idx:end_idx], - surface_neighbors_areas[:, start_idx:end_idx], - pos_normals_com[:, start_idx:end_idx], - self.s_grid, - self.model, - inlet_velocity=self.stream_velocity, - air_density=self.air_density, - ) - surface_solutions[:, start_idx:end_idx] = surface_solutions_batch - else: - point_batch_size = 512_000 - num_points = num_sample_points - subdomain_points = int(np.floor(num_points / point_batch_size)) - surface_solutions = torch.zeros(1, num_points, self.num_surf_vars).to( - self.device - ) - for p in range(subdomain_points + 1): - start_idx = p * point_batch_size - end_idx = (p + 1) * point_batch_size - surface_solutions_batch = self.compute_solution_on_surface( - geo_encoding, - surface_mesh_centers[:, start_idx:end_idx], - surface_neighbors[:, start_idx:end_idx], - surface_normals[:, start_idx:end_idx], - surface_neighbors_normals[:, start_idx:end_idx], - surface_areas[:, start_idx:end_idx], - surface_neighbors_areas[:, start_idx:end_idx], - pos_normals_com[:, start_idx:end_idx], - self.s_grid, - self.model, - inlet_velocity=self.stream_velocity, - air_density=self.air_density, - ) - # print(torch.amax(surface_solutions_batch, (0, 1)), torch.amin(surface_solutions_batch, (0, 1))) - surface_solutions[:, start_idx:end_idx] = surface_solutions_batch - - # print(surface_solutions.shape) - end_event.record() - end_event.synchronize() - cur_time = start_event.elapsed_time(end_event) / 1000.0 - print(f"compute_solution time (s): {cur_time:.4f}") - total_time += float(time.time() - inner_time) - surface_solutions_all = surface_solutions - print( - "Time taken for compute solution on surface for=%f, %f" - % (time.time() - inner_time, torch.cuda.utilization(self.device)) - ) - cmax = surf_scaling_factors[0] - cmin = surf_scaling_factors[1] + else: + stl_center_results = None + + # Stack up the results into one big tensor for surface and volume: + if all([s is not None for s in surface_results]): + surface_results = torch.cat(surface_results, dim=1) + if all([v is not None for v in volume_results]): + volume_results = torch.cat(volume_results, dim=0) + + return stl_center_results, surface_results, volume_results + + +def inference_epoch( + dataset: DrivaerMLDataset, + sampler: DistributedSampler, + datapipe: DoMINODataPipe, + model: DoMINO, + gpu_handle: int, + logger: PythonLogger, + batch_size: int = 24_000, + total_points: int = 1_024_000, +): + ###################################################### + # Inference can run in a distributed way by coordinating + # the indices for each rank, which the sampler does + ###################################################### + + # Convert the indices right to a list: + epoch_indices = list(sampler) + + ###################################################### + # Assuming here there are more than two target meshes + # This will get the IO pipe running in the background + # While we process a dataset. + ###################################################### + dataset.preload(epoch_indices[0]) + dataset.preload(epoch_indices[1]) + + for i_batch, epoch_index in enumerate(epoch_indices): + batch_start_time = time.perf_counter() + ###################################################### + # Put another example in the preload queue while this + # batch is processed + ###################################################### + data_loading_start = time.perf_counter() + if i_batch + 2 < len(epoch_indices): + # Preload next next + dataset.preload(epoch_indices[i_batch + 2]) + + ###################################################### + # Get the data for this index: + ###################################################### + sample_batched = dataset[epoch_index] + dataloading_time = time.perf_counter() - data_loading_start + + logger.info( + f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds" + ) + + procesing_time_start = time.perf_counter() + stl_center_resulst, surface_results, volume_results = inference_on_single_stl( + sample_batched["stl_coordinates"], + sample_batched["stl_faces"], + model, + datapipe, + batch_size, + total_points, + gpu_handle, + logger, + ) + + ###################################################### + # Peel off pressure, velocity, nut, shear, etc. + # Also compute drag, lift forces. + ###################################################### + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + + procesing_time_end = time.perf_counter() + logger.info( + f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds" + ) + + output_start_time = time.perf_counter() + ###################################################### + # Save the outputs to file: + ###################################################### + # TODO + # TODO + # TODO + # TODO + # TODO + # TODO + output_end_time = time.perf_counter() + logger.info( + f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds" + ) + + +@hydra.main(version_base="1.3", config_path="conf", config_name="config") +def main(cfg: DictConfig) -> None: + ###################################################### + # initialize distributed manager + ###################################################### + DistributedManager.initialize() + dist = DistributedManager() - surface_coordinates_all = torch.reshape( - surface_coordinates_all, (1, num_points, 3) - ) - surface_solutions_all = torch.reshape(surface_solutions_all, (1, num_points, 4)) + ###################################################### + # Initialize NVML + ###################################################### + nvmlInit() + gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index) - if self.surf_factors is not None: - surface_solutions_all = unnormalize( - surface_solutions_all, self.surf_factors[0], self.surf_factors[1] - ) + ###################################################### + # Initialize logger + ###################################################### - self.out_dict["surface_coordinates"] = ( - 0.5 * (surface_coordinates_all + 1.0) * (cmax - cmin) + cmin - ) - self.out_dict["pressure_surface"] = ( - surface_solutions_all[:, :, :1] - * self.stream_velocity**2.0 - * self.air_density - ) - self.out_dict["wall-shear-stress"] = ( - surface_solutions_all[:, :, 1:4] - * self.stream_velocity**2.0 - * self.air_density - ) - self.sampling_indices = sampling_indices - - @torch.inference_mode() - def compute_volume_solutions(self, num_sample_points, plot_solutions=False): - total_time = 0.0 - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - - geo_encoding = self.geometry_encoding - j = 0 - - # Compute volume - point_batch_size = 512_000 - num_points = num_sample_points - subdomain_points = int(np.floor(num_points / point_batch_size)) - volume_solutions = torch.zeros(1, num_points, self.num_vol_vars).to(self.device) - volume_coordinates = torch.zeros(1, num_points, 3).to(self.device) - - for p in range(subdomain_points + 1): - start_idx = p * point_batch_size - end_idx = (p + 1) * point_batch_size - if end_idx > num_points: - point_batch_size = num_points - start_idx - end_idx = num_points - - with autocast(enabled=True): - inner_time = time.time() - start_event.record() - ( - volume_mesh_centers, - pos_normals_com, - pos_normals_closest, - sdf_nodes, - scaling_factors, - ) = self.ifp.sample_points_in_volume( - num_points_vol=point_batch_size, - max_min=self.bounding_box_min_max, - center_of_mass=self.center_of_mass, - ) - end_event.record() - end_event.synchronize() - cur_time = start_event.elapsed_time(end_event) / 1000.0 - print(f"sample_points_in_volume time (s): {cur_time:.4f}") - - volume_coordinates[:, start_idx:end_idx] = volume_mesh_centers - - start_event.record() - - volume_solutions_batch = self.compute_solution_in_volume( - geo_encoding, - volume_mesh_centers, - sdf_nodes, - pos_normals_closest, - pos_normals_com, - self.grid, - self.model, - use_sdf_basis=self.cfg.model.use_sdf_in_basis_func, - inlet_velocity=self.stream_velocity, - air_density=self.air_density, - ) - volume_solutions[:, start_idx:end_idx] = volume_solutions_batch - end_event.record() - end_event.synchronize() - cur_time = start_event.elapsed_time(end_event) / 1000.0 - print(f"compute_solution time (s): {cur_time:.4f}") - total_time += float(time.time() - inner_time) - # volume_solutions_all = volume_solutions - print( - "Time taken for compute solution in volume for =%f" - % (time.time() - inner_time) - ) - # print("Points processed:", end_idx) - print("Total time measured = %f" % total_time) - print("Points processed:", end_idx) + logger = PythonLogger("Train") + logger = RankZeroLoggingWrapper(logger, dist) - cmax = scaling_factors[0] - cmin = scaling_factors[1] - volume_coordinates_all = volume_coordinates - volume_solutions_all = volume_solutions + logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") - cmax = scaling_factors[0] - cmin = scaling_factors[1] + ###################################################### + # Get scaling factors + # Likely, you want to reuse the scaling factors from training. + ###################################################### + pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" - volume_coordinates_all = torch.reshape( - volume_coordinates_all, (1, num_sample_points, 3) - ) - volume_solutions_all = torch.reshape( - volume_solutions_all, (1, num_sample_points, self.num_vol_vars) + try: + scaling_factors = ScalingFactors.load(pickle_path) + logger.info(f"Scaling factors loaded from: {pickle_path}") + except FileNotFoundError: + raise FileNotFoundError( + f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." ) - if self.vol_factors is not None: - volume_solutions_all = unnormalize( - volume_solutions_all, self.vol_factors[0], self.vol_factors[1] - ) - - self.out_dict["coordinates"] = ( - 0.5 * (volume_coordinates_all + 1.0) * (cmax - cmin) + cmin - ) - self.out_dict["velocity"] = ( - volume_solutions_all[:, :, :3] * self.stream_velocity - ) - self.out_dict["pressure"] = ( - volume_solutions_all[:, :, 3:4] - * self.stream_velocity**2.0 - * self.air_density - ) - # self.out_dict["turbulent-kinetic-energy"] = ( - # volume_solutions_all[:, :, 4:5] - # * self.stream_velocity**2.0 - # * self.air_density - # ) - # self.out_dict["turbulent-viscosity"] = ( - # volume_solutions_all[:, :, 5:] * self.stream_velocity * self.length_scale - # ) - self.out_dict["bounding_box_dims"] = torch.vstack(self.bounding_box_min_max) - - if plot_solutions: - print("Plotting solutions") - plot_save_path = os.path.join(self.cfg.output, "plots/contours/") - create_directory(plot_save_path) - - p_grid = 0.5 * (self.grid + 1.0) * (cmax - cmin) + cmin - p_grid = p_grid.cpu().numpy() - sdf_grid = self.sdf_grid.cpu().numpy() - volume_coordinates_all = ( - 0.5 * (volume_coordinates_all + 1.0) * (cmax - cmin) + cmin - ) - volume_solutions_all[:, :, :3] = ( - volume_solutions_all[:, :, :3] * self.stream_velocity - ) - volume_solutions_all[:, :, 3:4] = ( - volume_solutions_all[:, :, 3:4] - * self.stream_velocity**2.0 - * self.air_density - ) - # volume_solutions_all[:, :, 4:5] = ( - # volume_solutions_all[:, :, 4:5] - # * self.stream_velocity**2.0 - # * self.air_density - # ) - # volume_solutions_all[:, :, 5] = ( - # volume_solutions_all[:, :, 5] * self.stream_velocity * self.length_scale - # ) - volume_coordinates_all = volume_coordinates_all.cpu().numpy() - volume_solutions_all = volume_solutions_all.cpu().numpy() - - # ND interpolation on a grid - prediction_grid = nd_interpolator( - volume_coordinates_all, volume_solutions_all[0], p_grid[0] - ) - nx, ny, nz, vars = prediction_grid.shape - idx = np.where(sdf_grid[0] < 0.0) - prediction_grid[idx] = float("inf") - axes_titles = ["y/4 plane", "y/2 plane"] - - plot( - prediction_grid[:, int(ny / 4), :, 0], - prediction_grid[:, int(ny / 2), :, 0], - var="x-vel", - save_path=plot_save_path + f"x-vel-midplane_{self.stream_velocity}.png", - axes_titles=axes_titles, - plot_error=False, - ) - plot( - prediction_grid[:, int(ny / 4), :, 1], - prediction_grid[:, int(ny / 2), :, 1], - var="y-vel", - save_path=plot_save_path + f"y-vel-midplane_{self.stream_velocity}.png", - axes_titles=axes_titles, - plot_error=False, - ) - plot( - prediction_grid[:, int(ny / 4), :, 2], - prediction_grid[:, int(ny / 2), :, 2], - var="z-vel", - save_path=plot_save_path + f"z-vel-midplane_{self.stream_velocity}.png", - axes_titles=axes_titles, - plot_error=False, - ) - plot( - prediction_grid[:, int(ny / 4), :, 3], - prediction_grid[:, int(ny / 2), :, 3], - var="pres", - save_path=plot_save_path + f"pres-midplane_{self.stream_velocity}.png", - axes_titles=axes_titles, - plot_error=False, - ) - # plot( - # prediction_grid[:, int(ny / 4), :, 4], - # prediction_grid[:, int(ny / 2), :, 4], - # var="tke", - # save_path=plot_save_path + f"tke-midplane_{self.stream_velocity}.png", - # axes_titles=axes_titles, - # plot_error=False, - # ) - # plot( - # prediction_grid[:, int(ny / 4), :, 5], - # prediction_grid[:, int(ny / 2), :, 5], - # var="nut", - # save_path=plot_save_path + f"nut-midplane_{self.stream_velocity}.png", - # axes_titles=axes_titles, - # plot_error=False, - # ) - - def cold_start(self, cached_geom_path=None): - print("Cold start") - self.compute_geo_encoding(cached_geom_path) - self.compute_volume_solutions(num_sample_points=10) - self.clear_out_dict() - - @torch.no_grad() - def calculate_geometry_encoding( - self, geo_centers, p_grid, sdf_grid, s_grid, sdf_surf_grid, model - ): - vol_min = self.bounding_box_min_max[0] - vol_max = self.bounding_box_min_max[1] - surf_min = self.bounding_box_surface_min_max[0] - surf_max = self.bounding_box_surface_min_max[1] - - geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 - if self.dist.world_size == 1: - encoding_g_vol = model.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid) - else: - encoding_g_vol = model.module.geo_rep_volume( - geo_centers_vol, p_grid, sdf_grid - ) + vol_factors = scaling_factors.mean["volume_fields"] + surf_factors = scaling_factors.mean["surface_fields"] - geo_centers_surf = 2.0 * (geo_centers - surf_min) / (surf_max - surf_min) - 1 + ###################################################### + # Configure the model + ###################################################### + model_type = cfg.model.model_type + num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type) - if self.dist.world_size == 1: - encoding_g_surf = model.geo_rep_surface( - geo_centers_surf, s_grid, sdf_surf_grid - ) - else: - encoding_g_surf = model.module.geo_rep_surface( - geo_centers_surf, s_grid, sdf_surf_grid - ) - - if self.dist.world_size == 1: - encoding_g_surf1 = model.geo_rep_surface1( - geo_centers_surf, s_grid, sdf_surf_grid - ) - else: - encoding_g_surf1 = model.module.geo_rep_surface1( - geo_centers_surf, s_grid, sdf_surf_grid - ) - - geo_encoding = 0.5 * encoding_g_surf1 + 0.5 * encoding_g_vol - geo_encoding_surface = 0.5 * encoding_g_surf - return geo_encoding, geo_encoding_surface - - @torch.no_grad() - def compute_solution_on_surface( - self, - geo_encoding, - surface_mesh_centers, - surface_mesh_neighbors, - surface_normals, - surface_neighbors_normals, - surface_areas, - surface_neighbors_areas, - pos_normals_com, - s_grid, - model, - inlet_velocity, - air_density, - ): - """ - Global parameters: For this particular case, the model was trained on single velocity/density values - across all simulations. Hence, global_params_values and global_params_reference are the same. - """ - global_params_values = torch.cat( - (inlet_velocity, air_density), axis=1 - ) # (1, 2) - global_params_values = torch.unsqueeze(global_params_values, -1) # (1, 2, 1) - - global_params_reference = torch.cat( - (inlet_velocity, air_density), axis=1 - ) # (1, 2) - global_params_reference = torch.unsqueeze( - global_params_reference, -1 - ) # (1, 2, 1) - - if self.dist.world_size == 1: - geo_encoding_local = model.geo_encoding_local( - geo_encoding, surface_mesh_centers, s_grid, mode="surface" - ) - else: - geo_encoding_local = model.module.geo_encoding_local( - geo_encoding, surface_mesh_centers, s_grid, mode="surface" - ) + if model_type == "combined" or model_type == "surface": + surface_variable_names = list(cfg.variables.surface.solution.keys()) + else: + surface_variable_names = [] - pos_encoding = pos_normals_com - surface_areas = torch.unsqueeze(surface_areas, -1) - surface_neighbors_areas = torch.unsqueeze(surface_neighbors_areas, -1) - - if self.dist.world_size == 1: - pos_encoding = model.position_encoder(pos_encoding, eval_mode="surface") - tpredictions_batch = model.calculate_solution_with_neighbors( - surface_mesh_centers, - geo_encoding_local, - pos_encoding, - surface_mesh_neighbors, - surface_normals, - surface_neighbors_normals, - surface_areas, - surface_neighbors_areas, - global_params_values, - global_params_reference, - ) - else: - pos_encoding = model.module.position_encoder( - pos_encoding, eval_mode="surface" - ) - tpredictions_batch = model.module.calculate_solution_with_neighbors( - surface_mesh_centers, - geo_encoding_local, - pos_encoding, - surface_mesh_neighbors, - surface_normals, - surface_neighbors_normals, - surface_areas, - surface_neighbors_areas, - global_params_values, - global_params_reference, - ) + if model_type == "combined" or model_type == "volume": + volume_variable_names = list(cfg.variables.volume.solution.keys()) + else: + volume_variable_names = [] + + ###################################################### + # Check that the sample size is equal. + # unequal samples could be done but they aren't, here.s + ###################################################### + if cfg.model.model_type == "combined": + if cfg.model.volume_points_sample != cfg.model.surface_points_sample: + raise ValueError( + "Volume and surface points sample must be equal for combined model" + ) + + # Get the number of sample points: + sample_points = ( + cfg.model.surface_points_sample + if cfg.model.model_type == "surface" + else cfg.model.volume_points_sample + ) - return tpredictions_batch - - @torch.no_grad() - def compute_solution_in_volume( - self, - geo_encoding, - volume_mesh_centers, - sdf_nodes, - pos_enc_closest, - pos_normals_com, - p_grid, - model, - use_sdf_basis, - inlet_velocity, - air_density, - ): - ## Global parameters - global_params_values = torch.cat( - (inlet_velocity, air_density), axis=1 - ) # (1, 2) - global_params_values = torch.unsqueeze(global_params_values, -1) # (1, 2, 1) - - global_params_reference = torch.cat( - (inlet_velocity, air_density), axis=1 - ) # (1, 2) - global_params_reference = torch.unsqueeze( - global_params_reference, -1 - ) # (1, 2, 1) - - if self.dist.world_size == 1: - geo_encoding_local = model.geo_encoding_local( - geo_encoding, volume_mesh_centers, p_grid, mode="volume" - ) - else: - geo_encoding_local = model.module.geo_encoding_local( - geo_encoding, volume_mesh_centers, p_grid, mode="volume" - ) - if use_sdf_basis: - pos_encoding = torch.cat( - (sdf_nodes, pos_enc_closest, pos_normals_com), axis=-1 - ) - else: - pos_encoding = pos_normals_com - - if self.dist.world_size == 1: - pos_encoding = model.position_encoder(pos_encoding, eval_mode="volume") - tpredictions_batch = model.calculate_solution( - volume_mesh_centers, - geo_encoding_local, - pos_encoding, - global_params_values, - global_params_reference, - num_sample_points=self.stencil_size, - eval_mode="volume", - ) - else: - pos_encoding = model.module.position_encoder( - pos_encoding, eval_mode="volume" - ) - tpredictions_batch = model.module.calculate_solution( - volume_mesh_centers, - geo_encoding_local, - pos_encoding, - global_params_values, - global_params_reference, - num_sample_points=self.stencil_size, - eval_mode="volume", - ) - return tpredictions_batch + ###################################################### + # If the batch size doesn't evenly divide + # the num points, that's ok. But print a warning + # that the total points will get tweaked. + ###################################################### + if cfg.eval.num_points % sample_points != 0: + logger.warning( + f"Batch size {sample_points} doesn't evenly divide num points {cfg.eval.num_points}." + ) + logger.warning( + f"Total points will be rounded up to {((cfg.eval.num_points // sample_points) + 1) * sample_points}." + ) + + ###################################################### + # Configure the dataset + # We are applying preprocessing in a separate step + # for this - so the dataset and datapipe are separate + ###################################################### + + # Override the model type + # For the inference pipeline, we adjust the tooling a little for the data. + # We use only a bare STL dataset that will read the mesh coordinates + # and triangle definitions. We'll compute the centers and normals + # on the GPU (instead of on the CPU, as pyvista would do) and + # then we can sample from that mesh on the GPU. + test_dataset = DrivaerMLDataset( + data_dir=cfg.eval.test_path, + keys_to_read=[ + "stl_coordinates", + "stl_faces", + ], + output_device=dist.device, + ) + # Volumetric data will be generated on the fly on the GPU. + + ###################################################### + # Configure the datapipe + # We _won't_ iterate over the datapipe, however, we can use the + # datapipe processing tools on the sampled surface and + # volume points with the same preprocessing. + # It also is used to un-normalize the model outputs. + ###################################################### + overrides = {} + if hasattr(cfg.data, "gpu_preprocessing"): + overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing + + if hasattr(cfg.data, "gpu_output"): + overrides["gpu_output"] = cfg.data.gpu_output + + test_datapipe = DoMINODataPipe( + None, + phase="test", + grid_resolution=cfg.model.interp_res, + volume_variables=volume_variable_names, + surface_variables=surface_variable_names, + normalize_coordinates=True, + sampling=False, + sample_in_bbox=True, + volume_points_sample=None, + surface_points_sample=None, + geom_points_sample=None, + positional_encoding=cfg.model.positional_encoding, + volume_factors=vol_factors, + surface_factors=surf_factors, + scaling_type=cfg.model.normalization, + model_type=model_type, + bounding_box_dims=cfg.data.bounding_box, + bounding_box_dims_surf=cfg.data.bounding_box_surface, + num_surface_neighbors=cfg.model.num_neighbors_surface, + resample_surfaces=cfg.model.resampling_surface_mesh.resample, + resampling_points=cfg.model.resampling_surface_mesh.points, + surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, + **overrides, + ) -if __name__ == "__main__": - OmegaConf.register_new_resolver("eval", eval) - with initialize(version_base="1.3", config_path="conf"): - cfg = compose(config_name="config") + ###################################################### + # The sampler is used in multi-gpu inference to + # coordinate the batches used for each rank. + ###################################################### + test_sampler = DistributedSampler( + test_dataset, + num_replicas=dist.world_size, + rank=dist.rank, + **cfg.train.sampler, + ) - DistributedManager.initialize() - dist = DistributedManager() + ###################################################### + # Configure the model + # and move it to the device. + ###################################################### + model = DoMINO( + input_features=3, + output_features_vol=num_vol_vars, + output_features_surf=num_surf_vars, + global_features=num_global_features, + model_parameters=cfg.model, + ).to(dist.device) + # model = torch.compile(model, fullgraph=True, dynamic=True) # TODO make this configurable + + # Print model summary (structure and parmeter count). + logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n") if dist.world_size > 1: torch.distributed.barrier() - input_path = cfg.eval.test_path - dirnames = get_filenames(input_path) - dev_id = torch.cuda.current_device() - num_files = int(len(dirnames) / 8) - dirnames_per_gpu = dirnames[int(num_files * dev_id) : int(num_files * (dev_id + 1))] - - domino = dominoInference(cfg, dist, False) - domino.initialize_model( - model_path="/lustre/models/DoMINO.0.7.pt" - ) ## Replace the model path with location of the trained model - - for count, dirname in enumerate(dirnames_per_gpu): - # print(f"Processing file {dirname}") - filepath = os.path.join(input_path, dirname) - - STREAM_VELOCITY = 30.0 - AIR_DENSITY = 1.205 - - # Neighborhood points sampled for evaluation, tradeoff between accuracy and speed - STENCIL_SIZE = ( - 7 # Higher stencil size -> more accuracy but more evaluation time - ) - - domino.set_stl_path(filepath) - domino.set_stream_velocity(STREAM_VELOCITY) - domino.set_stencil_size(STENCIL_SIZE) - - domino.read_stl() - - domino.initialize_data_processor() - - # Calculate geometry encoding - domino.compute_geo_encoding() - - # Calculate volume solutions - domino.compute_volume_solutions( - num_sample_points=10_256_000, plot_solutions=False - ) + load_checkpoint( + to_absolute_path(cfg.resume_dir), + models=model, + device=dist.device, + ) - # Calculate surface solutions - domino.compute_surface_solutions() - domino.compute_forces() - out_dict = domino.get_out_dict() - - print( - "Dirname:", - dirname, - "Drag:", - out_dict["drag_force"], - "Lift:", - out_dict["lift_force"], - ) - vtp_path = f"/lustre/snidhan/physicsnemo-work/domino-global-param-runs/stl-results/pred_{dirname}_4.vtp" - domino.mesh_stl.save(vtp_path) - reader = vtk.vtkXMLPolyDataReader() - reader.SetFileName(f"{vtp_path}") - reader.Update() - polydata_surf = reader.GetOutput() - - surfParam_vtk = numpy_support.numpy_to_vtk( - out_dict["pressure_surface"][0].cpu().numpy() - ) - surfParam_vtk.SetName(f"Pressure") - polydata_surf.GetCellData().AddArray(surfParam_vtk) + start_time = time.perf_counter() + + # This controls what indices to use for each epoch. + test_sampler.set_epoch(0) + + prof = Profiler() + + model.eval() + epoch_start_time = time.perf_counter() + with prof: + inference_epoch( + dataset=test_dataset, + sampler=test_sampler, + datapipe=test_datapipe, + model=model, + logger=logger, + gpu_handle=gpu_handle, + batch_size=sample_points, + total_points=cfg.eval.num_points, + ) + epoch_end_time = time.perf_counter() + logger.info( + f"Device {dist.device}, Epoch took {epoch_end_time - epoch_start_time:.3f} seconds" + ) - surfParam_vtk = numpy_support.numpy_to_vtk( - out_dict["wall-shear-stress"][0].cpu().numpy() - ) - surfParam_vtk.SetName(f"Wall-shear-stress") - polydata_surf.GetCellData().AddArray(surfParam_vtk) - write_to_vtp(polydata_surf, vtp_path) - exit() +if __name__ == "__main__": + # Profiler().enable("torch") + # Profiler().initialize() + main() + # Profiler().finalize() diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py deleted file mode 100644 index 3c6acc3ccd..0000000000 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl2.py +++ /dev/null @@ -1,634 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This code shows how to use a trained DoMINO model, with it's corresponding -preprocessing pipeline, to infer values on and around an STL mesh file. - -This script uses the meshes from the DrivaerML dataset, however, the logic -is largely the same. As an overview: -- Load the model -- Set up the preprocessor -- Loop over meshes -- In each mesh, sample random points on the surface, volume, or both -- Preprocess the points and run them through the model -- Process the STL mesh centers, too -- Collect the results and return -- Save the results to file. -""" - -import time -import os -import re -from typing import Literal, Any - -import apex -import numpy as np -import hydra -from hydra.utils import to_absolute_path -from omegaconf import DictConfig, OmegaConf -import torch - -DISABLE_RMM = os.environ.get("DISABLE_RMM", False) -if not DISABLE_RMM: - import rmm - from rmm.allocators.torch import rmm_torch_allocator - - rmm.reinitialize(pool_allocator=True) - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - -import torchinfo -import torch.distributed as dist -from torch.amp import GradScaler, autocast -from torch.nn.parallel import DistributedDataParallel -from torch.utils.data import DataLoader -from torch.utils.data.distributed import DistributedSampler -from torch.utils.tensorboard import SummaryWriter -from nvtx import annotate as nvtx_annotate -import torch.cuda.nvtx as nvtx - -from physicsnemo.distributed import DistributedManager -from physicsnemo.launch.utils import load_checkpoint, save_checkpoint -from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper - -from physicsnemo.datapipes.cae.domino_datapipe2 import ( - DoMINODataPipe, - create_domino_dataset, -) -from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( - DrivaerMLDataset, -) - -from physicsnemo.models.domino.model import DoMINO -from physicsnemo.utils.domino.utils import sample_points_on_mesh - -from utils import ScalingFactors - -# This is included for GPU memory tracking: -from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo -import time - - -# Initialize NVML -nvmlInit() - - -from physicsnemo.utils.profiling import profile, Profiler - - -from loss import compute_loss_dict -from utils import get_num_vars - - -def inference_on_single_stl( - stl_coordinates: torch.Tensor, - stl_faces: torch.Tensor, - model: DoMINO, - datapipe: DoMINODataPipe, - batch_size: int, - total_points: int, - gpu_handle: int | None = None, - logger: PythonLogger | None = None, -): - """ - Perform model inference on a single STL mesh. - - This function will take the input mesh + faces and - then sample the surface and volume to produce the model outputs - at `total_points` locations in batches of `batch_size`. - - - - Args: - stl_coordinates: The coordinates of the STL mesh. - stl_faces: The faces of the STL mesh. - model: The model to use for inference. - datapipe: The datapipe to use for preprocessing. - batch_size: The batch size to use for inference. - total_points: The total number of points to process. - gpu_handle: The GPU handle to use for inference. - logger: The logger to use for logging. - """ - device = stl_coordinates.device - batch_start_time = time.perf_counter() - ###################################################### - # The IO only reads in "stl_faces" and "stl_coordinates". - # "stl_areas" and "stl_centers" would be computed by - # pyvista on CPU - instead, we do it on the GPU - # right here. - ###################################################### - - # Center is a mean of the 3 vertices - triangle_vertices = stl_coordinates[stl_faces.reshape((-1, 3))] - stl_centers = triangle_vertices.mean(dim=-1) - ###################################################### - # Area we compute from the cross product of two sides: - ###################################################### - d1 = triangle_vertices[:, 1] - triangle_vertices[:, 0] - d2 = triangle_vertices[:, 2] - triangle_vertices[:, 0] - stl_mesh_normals = torch.linalg.cross(d1, d2, dim=1) - normals_norm = torch.linalg.norm(stl_mesh_normals, dim=1) - stl_mesh_normals = stl_mesh_normals / normals_norm.unsqueeze(1) - stl_areas = 0.5 * normals_norm - - ###################################################### - # For computing the points, we take those stl objects, - # sample in chunks of `batch_size` until we've - # accumulated `total_points` predictions. - ###################################################### - - batch_output_dict = {} - N = 2 - total_points_processed = 0 - - # Use these lists to build up the output tensors: - surface_results = [] - volume_results = [] - - while total_points_processed < total_points: - inner_loop_start_time = time.perf_counter() - - ###################################################### - # Create the dictionary as the preprocessing expects: - ###################################################### - inference_dict = { - "stl_coordinates": stl_coordinates, - "stl_faces": stl_faces, - "stl_centers": stl_centers, - "stl_areas": stl_areas, - } - - # If the surface data is part of the model, sample the surface: - - if datapipe.model_type == "surface" or datapipe.model_type == "combined": - ###################################################### - # This function will sample points on the STL surface - ###################################################### - sampled_points, sampled_faces, sampled_areas, sampled_normals = ( - sample_points_on_mesh( - stl_coordinates, - stl_faces, - batch_size, - mesh_normals=stl_mesh_normals, - mesh_areas=stl_areas, - ) - ) - - inference_dict["surface_mesh_centers"] = sampled_points - inference_dict["surface_normals"] = sampled_normals - inference_dict["surface_areas"] = sampled_areas - inference_dict["surface_faces"] = sampled_faces - - # If the volume data is part of the model, sample the volume: - if datapipe.model_type == "volume" or datapipe.model_type == "combined": - ###################################################### - # Build up volume points too with uniform sampling - # TODO - this doesn't filter points that are - # internal to the mesh - ###################################################### - c_min = datapipe.config.bounding_box_dims[1] - c_max = datapipe.config.bounding_box_dims[0] - - sampled_volume_points = (c_max - c_min) * torch.rand( - batch_size, 3, device=device, dtype=torch.float32 - ) + c_min - - inference_dict["volume_mesh_centers"] = (sampled_volume_points,) - - ###################################################### - # Pre-process the data with the datapipe: - ###################################################### - preprocessed_data = datapipe.process_data(inference_dict) - - if datapipe.model_type == "volume" or datapipe.model_type == "combined": - ###################################################### - # Use the sign of the volume SDF to filter out points - # That are inside the STL mesh - ###################################################### - sdf_nodes = preprocessed_data["sdf_nodes"] - valid_volume_idx = sdf_nodes > 0 - preprocessed_data["volume_mesh_centers"] = preprocessed_data[ - "volume_mesh_centers" - ][valid_volume_idx] - - ###################################################### - # Add a batch dimension to the data_dict - # (normally this is added in __getitem__ of the datapipe) - ###################################################### - preprocessed_data = {k: v.unsqueeze(0) for k, v in preprocessed_data.items()} - - ###################################################### - # Forward pass through the model: - ###################################################### - with torch.no_grad(): - output_vol, output_surf = model(preprocessed_data) - - ###################################################### - # unnormalize the outputs with the datapipe - # Whatever settings are configured for normalizing the - # output fields - even though we don't have ground - # truth here - are reused to undo that for the predictions - ###################################################### - output_vol, output_surf = datapipe.unscale_model_outputs( - output_vol, output_surf - ) - - surface_results.append(output_surf) - volume_results.append(output_vol) - - total_points_processed += batch_size - - current_loop_time = time.perf_counter() - - logging_string = f"Device {device} processed {total_points_processed} points of {total_points}\n" - if gpu_handle is not None: - gpu_info = nvmlDeviceGetMemoryInfo(gpu_handle) - gpu_memory_used = gpu_info.used / (1024**3) - logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" - - logging_string += f" Time taken since batch start: {current_loop_time - batch_start_time:.2f} seconds\n" - logging_string += f" iteration throughput: {batch_size / (current_loop_time - inner_loop_start_time):.1f} points per second\n" - logging_string += f" Batch mean throughput: {total_points_processed / (current_loop_time - batch_start_time):.1f} points per second.\n" - - if logger is not None: - logger.info(logging_string) - else: - print(logging_string) - - ###################################################### - # Here at the end, get the values for the stl centers - # by updating the previous inference dict - # Only do this if the surface is part of the computation - # Comments are shorter here - it's a condensed version - # of the above logic. - ###################################################### - if datapipe.model_type == "surface" or datapipe.model_type == "combined": - stl_inference_dict = { - "stl_coordinates": stl_coordinates, - "stl_faces": stl_faces, - "stl_centers": stl_centers, - "stl_areas": stl_areas, - } - inference_dict["surface_mesh_centers"] = stl_centers - inference_dict["surface_normals"] = stl_mesh_normals - inference_dict["surface_areas"] = stl_areas - inference_dict["surface_faces"] = stl_faces - - # Just reuse the previous volume samples here if needed: - if datapipe.model_type == "combined": - inference_dict["volume_mesh_centers"] = sampled_volume_points - - # Preprocess: - preprocessed_data = datapipe.process_data(inference_dict) - - # Pull out the invalid volume points again, if needed: - if datapipe.model_type == "combined": - sdf_nodes = preprocessed_data["sdf_nodes"] - valid_volume_idx = sdf_nodes > 0 - preprocessed_data["volume_mesh_centers"] = preprocessed_data[ - "volume_mesh_centers" - ][valid_volume_idx] - - # Run the model forward: - with torch.no_grad(): - preprocessed_data = { - k: v.unsqueeze(0) for k, v in preprocessed_data.items() - } - _, output_surf = model(preprocessed_data) - - # Unnormalize the outputs: - _, stl_center_results = datapipe.unscale_model_outputs(None, output_surf) - - else: - stl_center_results = None - - # Stack up the results into one big tensor for surface and volume: - if all([s is not None for s in surface_results]): - surface_results = torch.cat(surface_results, dim=1) - if all([v is not None for v in volume_results]): - volume_results = torch.cat(volume_results, dim=0) - - return stl_center_results, surface_results, volume_results - - -def inference_epoch( - dataset: DrivaerMLDataset, - sampler: DistributedSampler, - datapipe: DoMINODataPipe, - model: DoMINO, - gpu_handle: int, - logger: PythonLogger, - batch_size: int = 24_000, - total_points: int = 1_024_000, -): - ###################################################### - # Inference can run in a distributed way by coordinating - # the indices for each rank, which the sampler does - ###################################################### - - # Convert the indices right to a list: - epoch_indices = list(sampler) - - ###################################################### - # Assuming here there are more than two target meshes - # This will get the IO pipe running in the background - # While we process a dataset. - ###################################################### - dataset.preload(epoch_indices[0]) - dataset.preload(epoch_indices[1]) - - for i_batch, epoch_index in enumerate(epoch_indices): - batch_start_time = time.perf_counter() - ###################################################### - # Put another example in the preload queue while this - # batch is processed - ###################################################### - data_loading_start = time.perf_counter() - if i_batch + 2 < len(epoch_indices): - # Preload next next - dataset.preload(epoch_indices[i_batch + 2]) - - ###################################################### - # Get the data for this index: - ###################################################### - sample_batched = dataset[epoch_index] - dataloading_time = time.perf_counter() - data_loading_start - - logger.info( - f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds" - ) - - procesing_time_start = time.perf_counter() - stl_center_resulst, surface_results, volume_results = inference_on_single_stl( - sample_batched["stl_coordinates"], - sample_batched["stl_faces"], - model, - datapipe, - batch_size, - total_points, - gpu_handle, - logger, - ) - - ###################################################### - # Peel off pressure, velocity, nut, shear, etc. - # Also compute drag, lift forces. - ###################################################### - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO - - procesing_time_end = time.perf_counter() - logger.info( - f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds" - ) - - output_start_time = time.perf_counter() - ###################################################### - # Save the outputs to file: - ###################################################### - # TODO - # TODO - # TODO - # TODO - # TODO - # TODO - output_end_time = time.perf_counter() - logger.info( - f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds" - ) - - -@hydra.main(version_base="1.3", config_path="conf", config_name="config") -def main(cfg: DictConfig) -> None: - ###################################################### - # initialize distributed manager - ###################################################### - DistributedManager.initialize() - dist = DistributedManager() - - ###################################################### - # Initialize NVML - ###################################################### - nvmlInit() - gpu_handle = nvmlDeviceGetHandleByIndex(dist.device.index) - - ###################################################### - # Initialize logger - ###################################################### - - logger = PythonLogger("Train") - logger = RankZeroLoggingWrapper(logger, dist) - - logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") - - ###################################################### - # Get scaling factors - # Likely, you want to reuse the scaling factors from training. - ###################################################### - pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" - - try: - scaling_factors = ScalingFactors.load(pickle_path) - logger.info(f"Scaling factors loaded from: {pickle_path}") - except FileNotFoundError: - raise FileNotFoundError( - f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." - ) - - vol_factors = scaling_factors.mean["volume_fields"] - surf_factors = scaling_factors.mean["surface_fields"] - - ###################################################### - # Configure the model - ###################################################### - model_type = cfg.model.model_type - num_vol_vars, num_surf_vars, num_global_features = get_num_vars(cfg, model_type) - - if model_type == "combined" or model_type == "surface": - surface_variable_names = list(cfg.variables.surface.solution.keys()) - else: - surface_variable_names = [] - - if model_type == "combined" or model_type == "volume": - volume_variable_names = list(cfg.variables.volume.solution.keys()) - else: - volume_variable_names = [] - - ###################################################### - # Check that the sample size is equal. - # unequal samples could be done but they aren't, here.s - ###################################################### - if cfg.model.model_type == "combined": - if cfg.model.volume_points_sample != cfg.model.surface_points_sample: - raise ValueError( - "Volume and surface points sample must be equal for combined model" - ) - - # Get the number of sample points: - sample_points = ( - cfg.model.surface_points_sample - if cfg.model.model_type == "surface" - else cfg.model.volume_points_sample - ) - - ###################################################### - # If the batch size doesn't evenly divide - # the num points, that's ok. But print a warning - # that the total points will get tweaked. - ###################################################### - if cfg.eval.num_points % sample_points != 0: - logger.warning( - f"Batch size {sample_points} doesn't evenly divide num points {cfg.eval.num_points}." - ) - logger.warning( - f"Total points will be rounded up to {((cfg.eval.num_points // sample_points) + 1) * sample_points}." - ) - - ###################################################### - # Configure the dataset - # We are applying preprocessing in a separate step - # for this - so the dataset and datapipe are separate - ###################################################### - - # Override the model type - # For the inference pipeline, we adjust the tooling a little for the data. - # We use only a bare STL dataset that will read the mesh coordinates - # and triangle definitions. We'll compute the centers and normals - # on the GPU (instead of on the CPU, as pyvista would do) and - # then we can sample from that mesh on the GPU. - test_dataset = DrivaerMLDataset( - data_dir=cfg.eval.test_path, - keys_to_read=[ - "stl_coordinates", - "stl_faces", - ], - output_device=dist.device, - ) - - # Volumetric data will be generated on the fly on the GPU. - - ###################################################### - # Configure the datapipe - # We _won't_ iterate over the datapipe, however, we can use the - # datapipe processing tools on the sampled surface and - # volume points with the same preprocessing. - # It also is used to un-normalize the model outputs. - ###################################################### - overrides = {} - if hasattr(cfg.data, "gpu_preprocessing"): - overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing - - if hasattr(cfg.data, "gpu_output"): - overrides["gpu_output"] = cfg.data.gpu_output - - test_datapipe = DoMINODataPipe( - None, - phase="test", - grid_resolution=cfg.model.interp_res, - volume_variables=volume_variable_names, - surface_variables=surface_variable_names, - normalize_coordinates=True, - sampling=False, - sample_in_bbox=True, - volume_points_sample=None, - surface_points_sample=None, - geom_points_sample=None, - positional_encoding=cfg.model.positional_encoding, - volume_factors=vol_factors, - surface_factors=surf_factors, - scaling_type=cfg.model.normalization, - model_type=model_type, - bounding_box_dims=cfg.data.bounding_box, - bounding_box_dims_surf=cfg.data.bounding_box_surface, - num_surface_neighbors=cfg.model.num_neighbors_surface, - resample_surfaces=cfg.model.resampling_surface_mesh.resample, - resampling_points=cfg.model.resampling_surface_mesh.points, - surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, - **overrides, - ) - - ###################################################### - # The sampler is used in multi-gpu inference to - # coordinate the batches used for each rank. - ###################################################### - test_sampler = DistributedSampler( - test_dataset, - num_replicas=dist.world_size, - rank=dist.rank, - **cfg.train.sampler, - ) - - ###################################################### - # Configure the model - # and move it to the device. - ###################################################### - model = DoMINO( - input_features=3, - output_features_vol=num_vol_vars, - output_features_surf=num_surf_vars, - global_features=num_global_features, - model_parameters=cfg.model, - ).to(dist.device) - # model = torch.compile(model, fullgraph=True, dynamic=True) # TODO make this configurable - - # Print model summary (structure and parmeter count). - logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n") - - if dist.world_size > 1: - torch.distributed.barrier() - - load_checkpoint( - to_absolute_path(cfg.resume_dir), - models=model, - device=dist.device, - ) - - start_time = time.perf_counter() - - # This controls what indices to use for each epoch. - test_sampler.set_epoch(0) - - prof = Profiler() - - model.eval() - epoch_start_time = time.perf_counter() - with prof: - inference_epoch( - dataset=test_dataset, - sampler=test_sampler, - datapipe=test_datapipe, - model=model, - logger=logger, - gpu_handle=gpu_handle, - batch_size=sample_points, - total_points=cfg.eval.num_points, - ) - epoch_end_time = time.perf_counter() - logger.info( - f"Device {dist.device}, Epoch took {epoch_end_time - epoch_start_time:.3f} seconds" - ) - - -if __name__ == "__main__": - # Profiler().enable("torch") - # Profiler().initialize() - main() - # Profiler().finalize() From f172ce6f61beabc9f4baa4f9571f73277ef06961 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 23 Sep 2025 20:11:56 +0000 Subject: [PATCH 46/98] Update train, inference, and config files. --- .../domino/src/benchmark_dataloader.py | 2 +- .../domino/src/conf/config.yaml | 13 +- .../domino/src/inference_on_stl.py | 224 ++++++++++-------- .../external_aerodynamics/domino/src/train.py | 60 ++--- 4 files changed, 160 insertions(+), 139 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index f24e0ffe16..62a41d3383 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -173,7 +173,7 @@ def main(cfg: DictConfig) -> None: placements=placements, ) train_sampler = DistributedSampler( - train_dataset, num_replicas=dist.world_size, rank=dist.rank + train_dataset, num_replicas=data_mesh.size(), rank=data_mesh.get_local_rank() ) # train_dataloader = DataLoader( diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index 84256a0d97..73a720f343 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -81,6 +81,9 @@ data: # Input directory for training and validation data max: [4.5, 1.2, 1.3] gpu_preprocessing: true gpu_output: true + normalize_coordinates: true + sample_in_bbox: true + sampling: true # ┌───────────────────────────────────────────┐ # │ Domain Parallelism Settings │ @@ -177,7 +180,8 @@ train: # Training configurable parameters checkpoint_interval: 1 dataloader: batch_size: 1 - pin_memory: false # if the preprocessing is outputing GPU data, set this to false + preload_depth: 2 + pin_memory: True # if the preprocessing is outputing GPU data, set this to false sampler: shuffle: true drop_last: false @@ -191,7 +195,8 @@ train: # Training configurable parameters val: # Validation configurable parameters dataloader: batch_size: 1 - pin_memory: false # if the preprocessing is outputing GPU data, set this to false + preload_depth: 1 + pin_memory: true # if the preprocessing is outputing GPU data, set this to false sampler: shuffle: true drop_last: false @@ -205,4 +210,6 @@ eval: # Testing configurable parameters checkpoint_name: DoMINO.0.455.pt # Name of checkpoint to select from saved checkpoints scaling_param_path: /user/scaling_params refine_stl: False # Automatically refine STL during inference - stencil_size: 7 # Stencil size for evaluating surface and volume model + #TODO - This was hardcoded anyways, remove it. + # stencil_size: 7 # Stencil size for evaluating surface and volume model + num_points: 1_240_000 # Number of points to sample on surface and volume per batch diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index 3c6acc3ccd..edeefba937 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -75,7 +75,7 @@ from physicsnemo.models.domino.model import DoMINO from physicsnemo.utils.domino.utils import sample_points_on_mesh -from utils import ScalingFactors +from utils import ScalingFactors, get_keys_to_read, coordinate_distributed_environment # This is included for GPU memory tracking: from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo @@ -92,10 +92,47 @@ from loss import compute_loss_dict from utils import get_num_vars +def reject_interior_volume_points(preprocessed_data: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + """ + Reject volume points that are inside the STL mesh. + """ + ###################################################### + # Use the sign of the volume SDF to filter out points + # That are inside the STL mesh + ###################################################### + sdf_nodes = preprocessed_data["sdf_nodes"] + # The sfd_nodes tensor typically has shape (n_vol_points, 1) + valid_volume_idx = sdf_nodes > 0 + # So remove it if it's there: + valid_volume_idx = valid_volume_idx.squeeze(-1) + # Apply this selection to all the volume points: + for key in ["volume_mesh_centers", "sdf_nodes", "pos_volume_closest", "pos_volume_center_of_mass"]: + preprocessed_data[key] = preprocessed_data[key][valid_volume_idx] + + return preprocessed_data + +def sample_volume_points(c_min: torch.Tensor, c_max: torch.Tensor, n_points: int, device: torch.device, eps: float = 1e-7) -> torch.Tensor: + """ + Generate a set of random points interior to the specified bounding box. + + Args: + c_min: The minimum coordinate of the bounding box. + c_max: The maximum coordinate of the bounding box. + n_points: The number of points to sample. + device: The device to sample the points on. + eps: The small edge factor to shift away from the lower bound. + """ + # We use a small edge factor to shift away from the lower bound, + # which can, in some cases, be exactly on the border. + uniform_points = torch.rand(n_points, 3, device=device, dtype=torch.float32)*(1-2*eps) + eps + sampled_volume_points = (c_max - c_min) * uniform_points + c_min + return sampled_volume_points def inference_on_single_stl( stl_coordinates: torch.Tensor, stl_faces: torch.Tensor, + global_params_values: torch.Tensor, + global_params_reference: torch.Tensor, model: DoMINO, datapipe: DoMINODataPipe, batch_size: int, @@ -115,6 +152,8 @@ def inference_on_single_stl( Args: stl_coordinates: The coordinates of the STL mesh. stl_faces: The faces of the STL mesh. + global_params_values: The values of the global parameters. + global_params_reference: The reference values of the global parameters. model: The model to use for inference. datapipe: The datapipe to use for preprocessing. batch_size: The batch size to use for inference. @@ -169,6 +208,8 @@ def inference_on_single_stl( "stl_faces": stl_faces, "stl_centers": stl_centers, "stl_areas": stl_areas, + "global_params_values": global_params_values, + "global_params_reference": global_params_reference, } # If the surface data is part of the model, sample the surface: @@ -196,17 +237,13 @@ def inference_on_single_stl( if datapipe.model_type == "volume" or datapipe.model_type == "combined": ###################################################### # Build up volume points too with uniform sampling - # TODO - this doesn't filter points that are - # internal to the mesh ###################################################### c_min = datapipe.config.bounding_box_dims[1] c_max = datapipe.config.bounding_box_dims[0] + inference_dict["volume_mesh_centers"] = sample_volume_points( + c_min, c_max, batch_size, device, + ) - sampled_volume_points = (c_max - c_min) * torch.rand( - batch_size, 3, device=device, dtype=torch.float32 - ) + c_min - - inference_dict["volume_mesh_centers"] = (sampled_volume_points,) ###################################################### # Pre-process the data with the datapipe: @@ -214,15 +251,7 @@ def inference_on_single_stl( preprocessed_data = datapipe.process_data(inference_dict) if datapipe.model_type == "volume" or datapipe.model_type == "combined": - ###################################################### - # Use the sign of the volume SDF to filter out points - # That are inside the STL mesh - ###################################################### - sdf_nodes = preprocessed_data["sdf_nodes"] - valid_volume_idx = sdf_nodes > 0 - preprocessed_data["volume_mesh_centers"] = preprocessed_data[ - "volume_mesh_centers" - ][valid_volume_idx] + preprocessed_data = reject_interior_volume_points(preprocessed_data) ###################################################### # Add a batch dimension to the data_dict @@ -276,32 +305,34 @@ def inference_on_single_stl( # of the above logic. ###################################################### if datapipe.model_type == "surface" or datapipe.model_type == "combined": - stl_inference_dict = { + + inference_dict = { "stl_coordinates": stl_coordinates, "stl_faces": stl_faces, "stl_centers": stl_centers, "stl_areas": stl_areas, + "global_params_values": global_params_values, + "global_params_reference": global_params_reference, } inference_dict["surface_mesh_centers"] = stl_centers inference_dict["surface_normals"] = stl_mesh_normals inference_dict["surface_areas"] = stl_areas inference_dict["surface_faces"] = stl_faces - # Just reuse the previous volume samples here if needed: if datapipe.model_type == "combined": - inference_dict["volume_mesh_centers"] = sampled_volume_points + c_min = datapipe.config.bounding_box_dims[1] + c_max = datapipe.config.bounding_box_dims[0] + inference_dict["volume_mesh_centers"] = sample_volume_points( + c_min, c_max, stl_centers.shape[0], device, + ) # Preprocess: preprocessed_data = datapipe.process_data(inference_dict) # Pull out the invalid volume points again, if needed: - if datapipe.model_type == "combined": - sdf_nodes = preprocessed_data["sdf_nodes"] - valid_volume_idx = sdf_nodes > 0 - preprocessed_data["volume_mesh_centers"] = preprocessed_data[ - "volume_mesh_centers" - ][valid_volume_idx] - + if datapipe.model_type == "combined" or datapipe.model_type == "volume": + preprocessed_data = reject_interior_volume_points(preprocessed_data) + # Run the model forward: with torch.no_grad(): preprocessed_data = { @@ -316,18 +347,21 @@ def inference_on_single_stl( stl_center_results = None # Stack up the results into one big tensor for surface and volume: - if all([s is not None for s in surface_results]): + if len(surface_results) > 0 and all([s is not None for s in surface_results]): surface_results = torch.cat(surface_results, dim=1) - if all([v is not None for v in volume_results]): - volume_results = torch.cat(volume_results, dim=0) + else: + surface_results = None + if len(volume_results) > 0 and all([v is not None for v in volume_results]): + volume_results = torch.cat(volume_results, dim=1) + else: + volume_results = None return stl_center_results, surface_results, volume_results def inference_epoch( - dataset: DrivaerMLDataset, + dataloader: DrivaerMLDataset, sampler: DistributedSampler, - datapipe: DoMINODataPipe, model: DoMINO, gpu_handle: int, logger: PythonLogger, @@ -339,44 +373,29 @@ def inference_epoch( # the indices for each rank, which the sampler does ###################################################### - # Convert the indices right to a list: - epoch_indices = list(sampler) - - ###################################################### - # Assuming here there are more than two target meshes - # This will get the IO pipe running in the background - # While we process a dataset. - ###################################################### - dataset.preload(epoch_indices[0]) - dataset.preload(epoch_indices[1]) - - for i_batch, epoch_index in enumerate(epoch_indices): - batch_start_time = time.perf_counter() - ###################################################### - # Put another example in the preload queue while this - # batch is processed - ###################################################### - data_loading_start = time.perf_counter() - if i_batch + 2 < len(epoch_indices): - # Preload next next - dataset.preload(epoch_indices[i_batch + 2]) + batch_start_time = time.perf_counter() + + # N.B. - iterating over the dataset directly here. + # That's because we need to sample on the STL and volume and + # that means we'll preprocess after that. + for i_batch, sample_batched in enumerate(dataloader.dataset): + + + dataloading_time = time.perf_counter() - batch_start_time - ###################################################### - # Get the data for this index: - ###################################################### - sample_batched = dataset[epoch_index] - dataloading_time = time.perf_counter() - data_loading_start logger.info( f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds" ) procesing_time_start = time.perf_counter() - stl_center_resulst, surface_results, volume_results = inference_on_single_stl( + stl_center_results, surface_results, volume_results = inference_on_single_stl( sample_batched["stl_coordinates"], sample_batched["stl_faces"], + sample_batched["global_params_values"], + sample_batched["global_params_reference"], model, - datapipe, + dataloader, batch_size, total_points, gpu_handle, @@ -399,6 +418,10 @@ def inference_epoch( logger.info( f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds" ) + logger.info( + f"Batch {i_batch} stl points: {stl_center_results.shape[1]}" + ) + output_start_time = time.perf_counter() ###################################################### @@ -414,6 +437,8 @@ def inference_epoch( logger.info( f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds" ) + + batch_start_time = time.perf_counter() @hydra.main(version_base="1.3", config_path="conf", config_name="config") @@ -423,6 +448,10 @@ def main(cfg: DictConfig) -> None: ###################################################### DistributedManager.initialize() dist = DistributedManager() + + # DoMINO supports domain parallel training and inference. This function helps coordinate + # how to set that up, if needed. + domain_mesh, data_mesh, placements = coordinate_distributed_environment(cfg) ###################################################### # Initialize NVML @@ -507,21 +536,28 @@ def main(cfg: DictConfig) -> None: # We are applying preprocessing in a separate step # for this - so the dataset and datapipe are separate ###################################################### - + + # This helper function is to determine which keys to read from the data + # (and which to use default values for, if they aren't present - like + # air_density, for example) + keys_to_read, keys_to_read_if_available = get_keys_to_read( + cfg, model_type, get_ground_truth=True + ) # Override the model type # For the inference pipeline, we adjust the tooling a little for the data. # We use only a bare STL dataset that will read the mesh coordinates # and triangle definitions. We'll compute the centers and normals # on the GPU (instead of on the CPU, as pyvista would do) and # then we can sample from that mesh on the GPU. - test_dataset = DrivaerMLDataset( - data_dir=cfg.eval.test_path, - keys_to_read=[ - "stl_coordinates", - "stl_faces", - ], - output_device=dist.device, - ) + # test_dataset = DrivaerMLDataset( + # data_dir=cfg.eval.test_path, + # keys_to_read=[ + # "stl_coordinates", + # "stl_faces", + # ], + # keys_to_read_if_available=keys_to_read_if_available, + # output_device=dist.device, + # ) # Volumetric data will be generated on the fly on the GPU. @@ -538,44 +574,35 @@ def main(cfg: DictConfig) -> None: if hasattr(cfg.data, "gpu_output"): overrides["gpu_output"] = cfg.data.gpu_output - - test_datapipe = DoMINODataPipe( - None, + + test_dataloader = create_domino_dataset( + cfg, phase="test", - grid_resolution=cfg.model.interp_res, - volume_variables=volume_variable_names, - surface_variables=surface_variable_names, - normalize_coordinates=True, - sampling=False, - sample_in_bbox=True, - volume_points_sample=None, - surface_points_sample=None, - geom_points_sample=None, - positional_encoding=cfg.model.positional_encoding, - volume_factors=vol_factors, - surface_factors=surf_factors, - scaling_type=cfg.model.normalization, - model_type=model_type, - bounding_box_dims=cfg.data.bounding_box, - bounding_box_dims_surf=cfg.data.bounding_box_surface, - num_surface_neighbors=cfg.model.num_neighbors_surface, - resample_surfaces=cfg.model.resampling_surface_mesh.resample, - resampling_points=cfg.model.resampling_surface_mesh.points, - surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, - **overrides, + keys_to_read=["stl_coordinates", "stl_faces"], + keys_to_read_if_available=keys_to_read_if_available, + vol_factors=vol_factors, + surf_factors=surf_factors, + normalize_coordinates = cfg.data.normalize_coordinates, + sample_in_bbox = cfg.data.sample_in_bbox, + sampling = cfg.data.sampling, + device_mesh=domain_mesh, + placements=placements, ) - + ###################################################### # The sampler is used in multi-gpu inference to # coordinate the batches used for each rank. ###################################################### test_sampler = DistributedSampler( - test_dataset, - num_replicas=dist.world_size, - rank=dist.rank, + test_dataloader, + num_replicas=data_mesh.size(), + rank=data_mesh.get_local_rank(), **cfg.train.sampler, ) + + + ###################################################### # Configure the model # and move it to the device. @@ -612,9 +639,8 @@ def main(cfg: DictConfig) -> None: epoch_start_time = time.perf_counter() with prof: inference_epoch( - dataset=test_dataset, + dataloader=test_dataloader, sampler=test_sampler, - datapipe=test_datapipe, model=model, logger=logger, gpu_handle=gpu_handle, diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 0176084082..5c0ced9a22 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -39,7 +39,19 @@ from omegaconf import DictConfig, OmegaConf -DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", False) +def srt2bool(val: str): + if isinstance(val, bool): + return val + if val.lower() in ["true", "1", "yes", "y"]: + return True + elif val.lower() in ["false", "0", "no", "n"]: + return False + else: + raise ValueError(f"Invalid boolean value: {val}") + + +DISABLE_RMM = srt2bool(os.environ.get("DOMINO_DISABLE_RMM", False)) + if not DISABLE_RMM: import rmm from rmm.allocators.torch import rmm_torch_allocator @@ -170,9 +182,9 @@ def train_epoch( gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) start_time = time.perf_counter() with Profiler(): - for i_batch, sample_batched in enumerate(dataloader): - sampled_batched = dict_to_device(sample_batched, device) - + for i_batch, sampled_batched in enumerate(dataloader): + if i_batch == 7: + break if add_physics_loss: autocast_enabled = False else: @@ -373,6 +385,9 @@ def main(cfg: DictConfig) -> None: surf_factors=surf_factors, device_mesh=domain_mesh, placements=placements, + normalize_coordinates=cfg.data.normalize_coordinates, + sample_in_bbox=cfg.data.sample_in_bbox, + sampling=cfg.data.sampling, ) train_sampler = DistributedSampler( train_dataloader, @@ -390,6 +405,9 @@ def main(cfg: DictConfig) -> None: surf_factors=surf_factors, device_mesh=domain_mesh, placements=placements, + normalize_coordinates=cfg.data.normalize_coordinates, + sample_in_bbox=cfg.data.sample_in_bbox, + sampling=cfg.data.sampling, ) val_sampler = DistributedSampler( val_dataloader, @@ -398,37 +416,6 @@ def main(cfg: DictConfig) -> None: **cfg.val.sampler, ) - # train_dataloader = create_domino_dataset( - # cfg, - # phase="train", - # volume_variable_names=volume_variable_names, - # surface_variable_names=surface_variable_names, - # vol_factors=vol_factors, - # surf_factors=surf_factors, - # ) - # val_dataloader = create_domino_dataset( - # cfg, - # phase="val", - # volume_variable_names=volume_variable_names, - # surface_variable_names=surface_variable_names, - # vol_factors=vol_factors, - # surf_factors=surf_factors, - # ) - - # train_sampler = DistributedSampler( - # train_dataloader, - # num_replicas=dist.world_size, - # rank=dist.rank, - # **cfg.train.sampler, - # ) - - # val_sampler = DistributedSampler( - # val_dataloader, - # num_replicas=dist.world_size, - # rank=dist.rank, - # **cfg.val.sampler, - # ) - ###################################################### # Configure the model ###################################################### @@ -439,7 +426,6 @@ def main(cfg: DictConfig) -> None: global_features=num_global_features, model_parameters=cfg.model, ).to(dist.device) - # model = torch.compile(model, fullgraph=True, dynamic=True) # TODO make this configurable # Print model summary (structure and parmeter count). logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n") @@ -570,6 +556,8 @@ def main(cfg: DictConfig) -> None: ) epoch_end_time = time.perf_counter() + return + model.eval() avg_vloss = validation_step( dataloader=val_dataloader, From cdbe0ce9bfbf66074349ba1db3e2ce21d5a46769 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 24 Sep 2025 15:23:57 +0000 Subject: [PATCH 47/98] Update scaling factor configuration and location setting --- .../domino/src/compute_statistics.py | 2 +- .../domino/src/conf/config.yaml | 3 +- .../domino/src/inference_on_stl.py | 78 +++++++++++-------- .../external_aerodynamics/domino/src/train.py | 2 +- .../datapipes/cae/drivaer_ml_dataset.py | 2 + 5 files changed, 52 insertions(+), 35 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py index 5c9ef21f04..ac917d5353 100644 --- a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py +++ b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py @@ -67,7 +67,7 @@ def main(cfg: DictConfig) -> None: ################################ # Create output directory ################################ - output_dir = os.path.join(cfg.output, "scaling_factors") + output_dir = os.path.dirname(cfg.data.scaling_factors) os.makedirs(output_dir, exist_ok=True) if dist.world_size > 1: diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index 73a720f343..2686634cf9 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -68,7 +68,7 @@ variables: reference: 1.226 # ┌───────────────────────────────────────────┐ -# │ Training Data Configs │ +# │ Data Configs │ # └───────────────────────────────────────────┘ data: # Input directory for training and validation data input_dir: /user/data/aws_data_all/ @@ -84,6 +84,7 @@ data: # Input directory for training and validation data normalize_coordinates: true sample_in_bbox: true sampling: true + scaling_factors: outputs/AWS_Dataset/1/scaling_factors/scaling_factors.pkl # ┌───────────────────────────────────────────┐ # │ Domain Parallelism Settings │ diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index edeefba937..9228db8584 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -92,7 +92,10 @@ from loss import compute_loss_dict from utils import get_num_vars -def reject_interior_volume_points(preprocessed_data: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + +def reject_interior_volume_points( + preprocessed_data: dict[str, torch.Tensor], +) -> dict[str, torch.Tensor]: """ Reject volume points that are inside the STL mesh. """ @@ -106,15 +109,27 @@ def reject_interior_volume_points(preprocessed_data: dict[str, torch.Tensor]) -> # So remove it if it's there: valid_volume_idx = valid_volume_idx.squeeze(-1) # Apply this selection to all the volume points: - for key in ["volume_mesh_centers", "sdf_nodes", "pos_volume_closest", "pos_volume_center_of_mass"]: + for key in [ + "volume_mesh_centers", + "sdf_nodes", + "pos_volume_closest", + "pos_volume_center_of_mass", + ]: preprocessed_data[key] = preprocessed_data[key][valid_volume_idx] - + return preprocessed_data -def sample_volume_points(c_min: torch.Tensor, c_max: torch.Tensor, n_points: int, device: torch.device, eps: float = 1e-7) -> torch.Tensor: + +def sample_volume_points( + c_min: torch.Tensor, + c_max: torch.Tensor, + n_points: int, + device: torch.device, + eps: float = 1e-7, +) -> torch.Tensor: """ Generate a set of random points interior to the specified bounding box. - + Args: c_min: The minimum coordinate of the bounding box. c_max: The maximum coordinate of the bounding box. @@ -124,10 +139,14 @@ def sample_volume_points(c_min: torch.Tensor, c_max: torch.Tensor, n_points: int """ # We use a small edge factor to shift away from the lower bound, # which can, in some cases, be exactly on the border. - uniform_points = torch.rand(n_points, 3, device=device, dtype=torch.float32)*(1-2*eps) + eps + uniform_points = ( + torch.rand(n_points, 3, device=device, dtype=torch.float32) * (1 - 2 * eps) + + eps + ) sampled_volume_points = (c_max - c_min) * uniform_points + c_min return sampled_volume_points + def inference_on_single_stl( stl_coordinates: torch.Tensor, stl_faces: torch.Tensor, @@ -241,10 +260,12 @@ def inference_on_single_stl( c_min = datapipe.config.bounding_box_dims[1] c_max = datapipe.config.bounding_box_dims[0] inference_dict["volume_mesh_centers"] = sample_volume_points( - c_min, c_max, batch_size, device, + c_min, + c_max, + batch_size, + device, ) - ###################################################### # Pre-process the data with the datapipe: ###################################################### @@ -305,7 +326,6 @@ def inference_on_single_stl( # of the above logic. ###################################################### if datapipe.model_type == "surface" or datapipe.model_type == "combined": - inference_dict = { "stl_coordinates": stl_coordinates, "stl_faces": stl_faces, @@ -323,7 +343,10 @@ def inference_on_single_stl( c_min = datapipe.config.bounding_box_dims[1] c_max = datapipe.config.bounding_box_dims[0] inference_dict["volume_mesh_centers"] = sample_volume_points( - c_min, c_max, stl_centers.shape[0], device, + c_min, + c_max, + stl_centers.shape[0], + device, ) # Preprocess: @@ -332,7 +355,7 @@ def inference_on_single_stl( # Pull out the invalid volume points again, if needed: if datapipe.model_type == "combined" or datapipe.model_type == "volume": preprocessed_data = reject_interior_volume_points(preprocessed_data) - + # Run the model forward: with torch.no_grad(): preprocessed_data = { @@ -374,16 +397,13 @@ def inference_epoch( ###################################################### batch_start_time = time.perf_counter() - + # N.B. - iterating over the dataset directly here. # That's because we need to sample on the STL and volume and # that means we'll preprocess after that. for i_batch, sample_batched in enumerate(dataloader.dataset): - - dataloading_time = time.perf_counter() - batch_start_time - logger.info( f"Batch {i_batch} data loading time: {dataloading_time:.3f} seconds" ) @@ -418,10 +438,7 @@ def inference_epoch( logger.info( f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds" ) - logger.info( - f"Batch {i_batch} stl points: {stl_center_results.shape[1]}" - ) - + logger.info(f"Batch {i_batch} stl points: {stl_center_results.shape[1]}") output_start_time = time.perf_counter() ###################################################### @@ -437,7 +454,7 @@ def inference_epoch( logger.info( f"Batch {i_batch} output time: {output_end_time - output_start_time:.3f} seconds" ) - + batch_start_time = time.perf_counter() @@ -448,7 +465,7 @@ def main(cfg: DictConfig) -> None: ###################################################### DistributedManager.initialize() dist = DistributedManager() - + # DoMINO supports domain parallel training and inference. This function helps coordinate # how to set that up, if needed. domain_mesh, data_mesh, placements = coordinate_distributed_environment(cfg) @@ -463,7 +480,7 @@ def main(cfg: DictConfig) -> None: # Initialize logger ###################################################### - logger = PythonLogger("Train") + logger = PythonLogger("Inference") logger = RankZeroLoggingWrapper(logger, dist) logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") @@ -472,7 +489,7 @@ def main(cfg: DictConfig) -> None: # Get scaling factors # Likely, you want to reuse the scaling factors from training. ###################################################### - pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" + pickle_path = os.path.join(cfg.data.scaling_factors) try: scaling_factors = ScalingFactors.load(pickle_path) @@ -536,7 +553,7 @@ def main(cfg: DictConfig) -> None: # We are applying preprocessing in a separate step # for this - so the dataset and datapipe are separate ###################################################### - + # This helper function is to determine which keys to read from the data # (and which to use default values for, if they aren't present - like # air_density, for example) @@ -574,7 +591,7 @@ def main(cfg: DictConfig) -> None: if hasattr(cfg.data, "gpu_output"): overrides["gpu_output"] = cfg.data.gpu_output - + test_dataloader = create_domino_dataset( cfg, phase="test", @@ -582,13 +599,13 @@ def main(cfg: DictConfig) -> None: keys_to_read_if_available=keys_to_read_if_available, vol_factors=vol_factors, surf_factors=surf_factors, - normalize_coordinates = cfg.data.normalize_coordinates, - sample_in_bbox = cfg.data.sample_in_bbox, - sampling = cfg.data.sampling, + normalize_coordinates=cfg.data.normalize_coordinates, + sample_in_bbox=cfg.data.sample_in_bbox, + sampling=cfg.data.sampling, device_mesh=domain_mesh, placements=placements, ) - + ###################################################### # The sampler is used in multi-gpu inference to # coordinate the batches used for each rank. @@ -600,9 +617,6 @@ def main(cfg: DictConfig) -> None: **cfg.train.sampler, ) - - - ###################################################### # Configure the model # and move it to the device. diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 5c0ced9a22..a1029ceeae 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -292,7 +292,7 @@ def main(cfg: DictConfig) -> None: ###################################################### # Get scaling factors - precompute them if this fails! ###################################################### - pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" + pickle_path = os.path.join(cfg.data.scaling_factors) try: scaling_factors = ScalingFactors.load(pickle_path) diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index 0acec3b7a5..13009fc968 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -880,6 +880,8 @@ def preload(self, idx: int) -> None: def _preload_worker(): data = self._read_file(self._filenames[idx]) + if "stl_faces" in data: + data["stl_faces"] = data["stl_faces"].to(torch.int32) # Convert to torch tensors return self._move_to_gpu(data, idx) From fc5d32a27284aee05af06f9cd0b9e2a8a8671d79 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 24 Sep 2025 16:03:02 +0000 Subject: [PATCH 48/98] Make sure surface grid and sdf calculation always happens. --- .../domino/src/inference_on_stl.py | 6 ++- physicsnemo/datapipes/cae/domino_datapipe2.py | 40 ++++++++++--------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index 9228db8584..f2c3388ada 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -339,7 +339,7 @@ def inference_on_single_stl( inference_dict["surface_areas"] = stl_areas inference_dict["surface_faces"] = stl_faces - if datapipe.model_type == "combined": + if datapipe.model_type == "combined" or datapipe.model_type == "volume": c_min = datapipe.config.bounding_box_dims[1] c_max = datapipe.config.bounding_box_dims[0] inference_dict["volume_mesh_centers"] = sample_volume_points( @@ -438,7 +438,9 @@ def inference_epoch( logger.info( f"Batch {i_batch} GPU processing time: {procesing_time_end - procesing_time_start:.3f} seconds" ) - logger.info(f"Batch {i_batch} stl points: {stl_center_results.shape[1]}") + logger.info( + f"Batch {i_batch} stl points: {stl_center_results.shape[1] if stl_center_results is not None else 0}" + ) output_start_time = time.perf_counter() ###################################################### diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 53abe95822..0cf516f438 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -526,10 +526,6 @@ def process_surface( surf_grid = normalize(surf_grid, s_max, s_min) surface_coordinates = normalize(surface_coordinates, s_max, s_min) surface_neighbors = normalize(surface_neighbors, s_max, s_min) - # This is for the SDF Later: - normed_vertices = normalize(stl_vertices, s_max, s_min) - else: - normed_vertices = stl_vertices ######################################################################## # Apply scaling to the targets, if desired: @@ -539,15 +535,6 @@ def process_surface( surface_fields, self.config.surface_factors ) - # Compute signed distance function for the surface grid: - sdf_surf_grid, _ = signed_distance_field( - mesh_vertices=normed_vertices, - mesh_indices=stl_indices, - input_points=surf_grid, - use_sign_winding_number=True, - ) - return_dict["sdf_surf_grid"] = sdf_surf_grid - return_dict.update( { "pos_surface_center_of_mass": pos_normals_com_surface, @@ -746,10 +733,28 @@ def process_data(self, data_dict): ) return_dict["surf_grid"] = surf_grid + # We always need to calculate the SDF on the surface grid: + # This is for the SDF Later: + if self.config.normalize_coordinates: + normed_vertices = normalize(data_dict["stl_coordinates"], s_max, s_min) + else: + normed_vertices = data_dict["stl_coordinates"] + + # For SDF calculations, make sure the mesh_indices_flattened is an integer array: + mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) + + # Compute signed distance function for the surface grid: + sdf_surf_grid, _ = signed_distance_field( + mesh_vertices=normed_vertices, + mesh_indices=mesh_indices_flattened, + input_points=surf_grid, + use_sign_winding_number=True, + ) + return_dict["sdf_surf_grid"] = sdf_surf_grid + # Store this only if normalization is active: - if self.model_type == "surface" or self.model_type == "combined": - if self.config.normalize_coordinates: - return_dict["surface_min_max"] = torch.stack([s_min, s_max]) + if self.config.normalize_coordinates: + return_dict["surface_min_max"] = torch.stack([s_min, s_max]) # This is a center of mass computation for the stl surface, # using the size of each mesh point as weight. @@ -774,9 +779,6 @@ def process_data(self, data_dict): if self.config.normalize_coordinates: return_dict["volume_min_max"] = torch.stack([c_min, c_max]) - # For SDF calculations, make sure the mesh_indices_flattened is an integer array: - mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) - if self.model_type == "volume" or self.model_type == "combined": volume_fields_raw = ( data_dict["volume_fields"] if "volume_fields" in data_dict else None From 3f4f1107b09b80ad84bbf1caa1b1d06b1fd3444c Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 24 Sep 2025 16:40:02 +0000 Subject: [PATCH 49/98] Update timing printouts for training. --- .../cfd/external_aerodynamics/domino/src/train.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index a1029ceeae..ea71ee2a71 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -182,9 +182,9 @@ def train_epoch( gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) start_time = time.perf_counter() with Profiler(): + io_start_time = time.perf_counter() for i_batch, sampled_batched in enumerate(dataloader): - if i_batch == 7: - break + io_end_time = time.perf_counter() if add_physics_loss: autocast_enabled = False else: @@ -224,6 +224,7 @@ def train_epoch( # Gather data and report running_loss += loss.item() elapsed_time = time.perf_counter() - start_time + io_time = io_end_time - io_start_time start_time = time.perf_counter() gpu_end_info = nvmlDeviceGetMemoryInfo(gpu_handle) gpu_memory_used = gpu_end_info.used / (1024**3) @@ -245,11 +246,11 @@ def train_epoch( ) logging_string += loss_string - logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb\n" - logging_string += f" GPU memory delta: {gpu_memory_delta:.3f} Gb\n" - logging_string += f" Time taken: {elapsed_time:.2f} seconds\n" + logging_string += f" GPU memory used: {gpu_memory_used:.3f} Gb (delta: {gpu_memory_delta:.3f})\n" + logging_string += f" Timings: (IO: {io_time:.2f}, Model: {elapsed_time - io_time:.2f}, Total: {elapsed_time:.2f})s\n" logger.info(logging_string) gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) + io_start_time = time.perf_counter() last_loss = running_loss / (i_batch + 1) # loss per batch if dist.rank == 0: From 2e3c696e0533c7529777a43228e7b8dcdec1b92d Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 26 Sep 2025 11:52:51 -0700 Subject: [PATCH 50/98] Fix bug in output encoding when the number of upstream radii is different. --- physicsnemo/models/domino/encodings.py | 10 +++++----- physicsnemo/models/domino/model.py | 2 ++ .../utils/neighbors/radius_search/_torch_impl.py | 2 -- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py index 068e4b3a1f..ce55ed46d3 100644 --- a/physicsnemo/models/domino/encodings.py +++ b/physicsnemo/models/domino/encodings.py @@ -138,7 +138,6 @@ def __init__( radius=radius, neighbors_in_radius=neighbors_in_radius, ) - self.local_point_conv = LocalPointConv( input_features=total_neighbors_in_radius, base_layer=base_layer, @@ -201,6 +200,7 @@ def __init__( radii: list[float], neighbors_in_radius: list[int], geo_encoding_type: str, + n_upstream_radii: int, base_layer: int, activation: nn.Module, grid_resolution: tuple[int, int, int], @@ -213,7 +213,7 @@ def __init__( radius=r, neighbors_in_radius=n, total_neighbors_in_radius=self.calculate_total_neighbors_in_radius( - geo_encoding_type, n, radii + geo_encoding_type, n, n_upstream_radii ), base_layer=base_layer, activation=activation, @@ -224,12 +224,12 @@ def __init__( ) def calculate_total_neighbors_in_radius( - self, geo_encoding_type: str, neighbors_in_radius: int, radii: list[float] + self, geo_encoding_type: str, neighbors_in_radius: int, n_upstream_radii: int ) -> list[int]: if geo_encoding_type == "both": - total_neighbors_in_radius = neighbors_in_radius * (len(radii) + 1) + total_neighbors_in_radius = neighbors_in_radius * (n_upstream_radii + 1) elif geo_encoding_type == "stl": - total_neighbors_in_radius = neighbors_in_radius * (len(radii)) + total_neighbors_in_radius = neighbors_in_radius * (n_upstream_radii) elif geo_encoding_type == "sdf": total_neighbors_in_radius = neighbors_in_radius diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index bc06289a6b..7a31c2668c 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -391,6 +391,7 @@ def __init__( radii=model_parameters.geometry_local.surface_radii, neighbors_in_radius=model_parameters.geometry_local.surface_neighbors_in_radius, geo_encoding_type=self.geo_encoding_type, + n_upstream_radii=len(model_parameters.geometry_rep.geo_conv.surface_radii), base_layer=512, activation=get_activation(model_parameters.local_point_conv.activation), grid_resolution=self.grid_resolution, @@ -401,6 +402,7 @@ def __init__( radii=model_parameters.geometry_local.volume_radii, neighbors_in_radius=model_parameters.geometry_local.volume_neighbors_in_radius, geo_encoding_type=self.geo_encoding_type, + n_upstream_radii=len(model_parameters.geometry_rep.geo_conv.volume_radii), base_layer=512, activation=get_activation(model_parameters.local_point_conv.activation), grid_resolution=self.grid_resolution, diff --git a/physicsnemo/utils/neighbors/radius_search/_torch_impl.py b/physicsnemo/utils/neighbors/radius_search/_torch_impl.py index c6df0f9e81..2b4c3394c3 100644 --- a/physicsnemo/utils/neighbors/radius_search/_torch_impl.py +++ b/physicsnemo/utils/neighbors/radius_search/_torch_impl.py @@ -56,8 +56,6 @@ def radius_search_impl( dists = torch.empty((0,), device=dists.device, dtype=dists.dtype) else: - print(f"dists shape: {dists.shape}") - # Take the max_points lowest distances for each query closest_points = torch.topk( dists, k=min(max_points, dists.shape[0]), dim=0, largest=False From e062f49b6af431cf1259e7f9511fe75f6373b0bf Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 22 Sep 2025 14:27:40 +0000 Subject: [PATCH 51/98] Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84d50c2128..a322d50071 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,11 +25,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Migrated Stokes MGN example to PyTorch Geometric. - Migrated Lennard Jones example to PyTorch Geometric. +<<<<<<< HEAD - Migrated physicsnemo.utils.sdf.signed_distance_field to a static return, torch-only interface. It also now works on distributed meshes and input fields. - Refactored DiTBlock to be more modular - Added NATTEN 2D neighborhood attention backend for DiTBlock - Migrated blood flow example to PyTorch Geometric. +======= +- Refactored DoMINO model code for performance optimizations and improved readability. +>>>>>>> ddfd884 (Update CHANGELOG) ### Deprecated From 6a26c958597cf6230e3f03e82844107853bb8951 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 29 Sep 2025 13:19:37 +0000 Subject: [PATCH 52/98] Update changelog --- CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a322d50071..bb14a90124 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,15 +25,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Migrated Stokes MGN example to PyTorch Geometric. - Migrated Lennard Jones example to PyTorch Geometric. -<<<<<<< HEAD - Migrated physicsnemo.utils.sdf.signed_distance_field to a static return, torch-only interface. It also now works on distributed meshes and input fields. - Refactored DiTBlock to be more modular - Added NATTEN 2D neighborhood attention backend for DiTBlock - Migrated blood flow example to PyTorch Geometric. -======= - Refactored DoMINO model code for performance optimizations and improved readability. ->>>>>>> ddfd884 (Update CHANGELOG) ### Deprecated From 10bdc955477a50d72748fd1c0c1e3d3ec499194a Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Mon, 29 Sep 2025 08:25:33 -0700 Subject: [PATCH 53/98] resolving bug and optimizing GeoConvOut for memory --- .../domino/src/conf/config.yaml | 20 ++-- .../domino/src/inference_on_stl.py | 36 +++++--- .../external_aerodynamics/domino/src/test.py | 56 +++++------ physicsnemo/datapipes/cae/domino_datapipe.py | 92 +++++-------------- physicsnemo/models/domino/geometry_rep.py | 35 +++---- physicsnemo/models/domino/model.py | 10 +- 6 files changed, 92 insertions(+), 157 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index 84256a0d97..c6308b0231 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -18,7 +18,7 @@ # │ Project Details │ # └───────────────────────────────────────────┘ project: # Project name - name: AWS_Dataset + name: DrivAerML_Dataset exp_tag: 1 # Experiment tag # Main output directory. @@ -65,7 +65,7 @@ variables: reference: [38.89] # vector [30, 0, 0] should be specified as [30], while [30, 30, 0] should be [30, 30]. air_density: type: scalar - reference: 1.226 + reference: 1.0 # ┌───────────────────────────────────────────┐ # │ Training Data Configs │ @@ -77,8 +77,8 @@ data: # Input directory for training and validation data min: [-3.5, -2.25, -0.32] max: [8.5, 2.25, 3.00] bounding_box_surface: # Bounding box dimensions for car surface - min: [-1.1, -1.2, -0.32] - max: [4.5, 1.2, 1.3] + min: [-1.5, -1.4, -0.32] + max: [5.0, 1.4, 1.4] gpu_preprocessing: true gpu_output: true @@ -95,13 +95,12 @@ domain_parallelism: # └───────────────────────────────────────────┘ model: model_type: combined # train which model? surface, volume, combined - activation: "relu" # "relu" or "gelu" + activation: "gelu" # "relu" or "gelu" loss_function: loss_type: "mse" # mse or rmse area_weighing_factor: 10000 # Generally inverse of maximum area interp_res: [128, 64, 64] # resolution of latent space 128, 64, 48 use_sdf_in_basis_func: true # SDF in basis function network - positional_encoding: false # calculate positional encoding? volume_points_sample: 8192 # Number of points to sample in volume per epoch surface_points_sample: 8192 # Number of points to sample on surface per epoch surface_sampling_algorithm: area_weighted #random or area_weighted @@ -109,7 +108,7 @@ model: num_neighbors_surface: 7 # How many neighbors on surface? num_neighbors_volume: 10 # How many neighbors on volume? combine_volume_surface: false # combine volume and surface encodings - return_volume_neighbors: true # Whether to return volume neighbors or not + return_volume_neighbors: false # Whether to return volume neighbors or not use_surface_normals: true # Use surface normals and surface areas for surface computation? use_surface_area: true # Use only surface normals and not surface area integral_loss_scaling_factor: 100 # Scale integral loss by this factor @@ -119,9 +118,6 @@ model: vol_loss_scaling: 1.0 # scale volume loss with this factor in combined mode geometry_encoding_type: both # geometry encoder type, sdf, stl, both solution_calculation_mode: two-loop # one-loop is better for sharded, two-loop is lower memory but more overhead. Physics losses are not supported via one-loop presently. - resampling_surface_mesh: # resampling of surface mesh before constructing kd tree - resample: false #false or true - points: 1_000_000 # number of points geometry_rep: # Hyperparameters for geometry representation network geo_conv: base_neurons: 32 # 256 or 64 @@ -131,8 +127,8 @@ model: surface_radii: [0.01, 0.05, 1.0] # radii for surface surface_hops: 1 # Number of surface iterations volume_hops: 1 # Number of volume iterations - volume_neighbors_in_radius: [10, 10, 10, 10] # Number of neighbors in radius for volume - surface_neighbors_in_radius: [10, 10, 10] # Number of neighbors in radius for surface + volume_neighbors_in_radius: [32, 64, 128, 256] # Number of neighbors in radius for volume + surface_neighbors_in_radius: [8, 16, 128] # Number of neighbors in radius for surface fourier_features: false num_modes: 5 activation: ${model.activation} diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index a85cc7df86..e4ec80f2e0 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -372,6 +372,8 @@ def process_surface_mesh(self, bounding_box=None, bounding_box_surface=None): surf_sdf_grid = torch.reshape(surf_sdf_grid, (nx, ny, nz)) if self.normalize_coordinates: + sdf_grid = 2.0 * (sdf_grid - torch.amax(grid)) / (torch.amax(grid) - torch.amin(grid)) - 1.0 + surf_sdf_grid = 2.0 * (surf_sdf_grid - torch.amax(s_grid)) / (torch.amax(s_grid) - torch.amin(s_grid)) - 1.0 grid = 2.0 * (grid - c_min) / (c_max - c_min) - 1.0 s_grid = 2.0 * (s_grid - surf_min) / (surf_max - surf_min) - 1.0 @@ -533,6 +535,16 @@ def sample_points_on_surface( surface_area = np.float32(boundary["area"]) + if self.normalize_coordinates: + surface_coordinates = ( + 2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0 + ) + center_of_mass_normalized = ( + 2.0 * (center_of_mass - c_min) / (c_max - c_min) - 1.0 + ) + else: + center_of_mass_normalized = center_of_mass + interp_func = KDTree(surface_coordinates) dd, ii = interp_func.query(surface_coordinates, k=stencil_size) surface_neighbors = surface_coordinates[ii] @@ -553,12 +565,7 @@ def sample_points_on_surface( self.device ) - pos_normals_com = surface_coordinates - center_of_mass - - if self.normalize_coordinates: - surface_coordinates = ( - 2.0 * (surface_coordinates - c_min) / (c_max - c_min) - 1.0 - ) + pos_normals_com = surface_coordinates - center_of_mass_normalized surface_coordinates = torch.unsqueeze(surface_coordinates, 0) surface_normals = torch.unsqueeze(surface_normals, 0) @@ -637,13 +644,20 @@ def sample_points_in_volume( ) sdf_nodes = torch.unsqueeze(sdf_nodes, -1) - pos_normals_closest = volume_coordinates - sdf_node_closest_point - pos_normals_com = volume_coordinates - center_of_mass - if self.normalize_coordinates: - volume_coordinates = ( - 2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0 + volume_coordinates = 2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0 + sdf_nodes = 2.0 * (sdf_nodes - torch.amax(c_max)) / (torch.amax(c_max) - torch.amin(c_min)) - 1.0 + sdf_node_closest_point = ( + 2.0 * (sdf_node_closest_point - c_min) / (c_max - c_min) - 1.0 ) + center_of_mass_normalized = ( + 2.0 * (center_of_mass - c_min) / (c_max - c_min) - 1.0 + ) + else: + center_of_mass_normalized = center_of_mass + + pos_normals_closest = volume_coordinates - sdf_node_closest_point + pos_normals_com = volume_coordinates - center_of_mass_normalized volume_coordinates = torch.unsqueeze(volume_coordinates, 0) pos_normals_com = torch.unsqueeze(pos_normals_com, 0) diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py index 944910f9f8..c799e83f64 100644 --- a/examples/cfd/external_aerodynamics/domino/src/test.py +++ b/examples/cfd/external_aerodynamics/domino/src/test.py @@ -464,6 +464,8 @@ def main(cfg: DictConfig): surf_grid = np.float32(surf_grid) sdf_surf_grid = np.float32(sdf_surf_grid) surf_grid_max_min = np.float32(np.asarray([s_min, s_max])) + if cfg.model.normalize_coordinates: + sdf_surf_grid = normalize(sdf_surf_grid, xp.amax(surf_grid), xp.amin(surf_grid)) # Get global parameters and global parameters scaling from config.yaml global_params_names = list(cfg.variables.global_parameters.keys()) @@ -536,6 +538,13 @@ def main(cfg: DictConfig): surface_normals / np.linalg.norm(surface_normals, axis=1)[:, np.newaxis] ) + if cfg.model.normalize_coordinates: + surface_coordinates = normalize(surface_coordinates, s_max, s_min) + surf_grid = normalize(surf_grid, s_max, s_min) + center_of_mass_normalized = normalize(center_of_mass, s_max, s_min) + else: + center_of_mass_normalized = center_of_mass + if cfg.model.num_neighbors_surface > 1: interp_func = KDTree(surface_coordinates) dd, ii = interp_func.query( @@ -554,22 +563,11 @@ def main(cfg: DictConfig): surface_neighbors_normals = surface_normals surface_neighbors_sizes = surface_sizes - dx, dy, dz = ( - (s_max[0] - s_min[0]) / nx, - (s_max[1] - s_min[1]) / ny, - (s_max[2] - s_min[2]) / nz, - ) - - if cfg.model.positional_encoding: - pos_surface_center_of_mass = calculate_normal_positional_encoding( - surface_coordinates, center_of_mass, cell_length=[dx, dy, dz] - ) - else: - pos_surface_center_of_mass = surface_coordinates - center_of_mass + + pos_surface_center_of_mass = surface_coordinates - center_of_mass_normalized - surface_coordinates = normalize(surface_coordinates, s_max, s_min) - surface_neighbors = normalize(surface_neighbors, s_max, s_min) - surf_grid = normalize(surf_grid, s_max, s_min) + # surface_coordinates = normalize(surface_coordinates, s_max, s_min) + # surface_neighbors = normalize(surface_neighbors, s_max, s_min) else: surface_coordinates = None @@ -606,11 +604,6 @@ def main(cfg: DictConfig): c_max = np.float32(bounding_box_dims[0]) c_min = np.float32(bounding_box_dims[1]) - dx, dy, dz = ( - (c_max[0] - c_min[0]) / nx, - (c_max[1] - c_min[1]) / ny, - (c_max[2] - c_min[2]) / nz, - ) # Generate a grid of specified resolution to map the bounding box # The grid is used for capturing structured geometry features and SDF representation of geometry grid = create_grid(c_max, c_min, [nx, ny, nz]) @@ -635,21 +628,20 @@ def main(cfg: DictConfig): return_cupy=False, ) sdf_nodes = sdf_nodes.reshape(-1, 1) + vol_grid_max_min = np.asarray([c_min, c_max]) - if cfg.model.positional_encoding: - pos_volume_closest = calculate_normal_positional_encoding( - volume_coordinates, sdf_node_closest_point, cell_length=[dx, dy, dz] - ) - pos_volume_center_of_mass = calculate_normal_positional_encoding( - volume_coordinates, center_of_mass, cell_length=[dx, dy, dz] - ) + if cfg.model.normalize_coordinates: + volume_coordinates = normalize(volume_coordinates, c_max, c_min) + grid = normalize(grid, c_max, c_min) + center_of_mass_normalized = normalize(center_of_mass, c_max, c_min) + sdf_grid = normalize(sdf_grid, xp.amax(grid), xp.amin(grid)) + sdf_nodes = normalize(sdf_nodes, xp.amax(grid), xp.amin(grid)) + sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) else: - pos_volume_closest = volume_coordinates - sdf_node_closest_point - pos_volume_center_of_mass = volume_coordinates - center_of_mass + center_of_mass_normalized = center_of_mass - volume_coordinates = normalize(volume_coordinates, c_max, c_min) - grid = normalize(grid, c_max, c_min) - vol_grid_max_min = np.asarray([c_min, c_max]) + pos_volume_closest = volume_coordinates - sdf_node_closest_point + pos_volume_center_of_mass = volume_coordinates - center_of_mass_normalized else: volume_coordinates = None diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 0a3ec9e38b..666e4bfad5 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -50,7 +50,6 @@ ArrayType, area_weighted_shuffle_array, calculate_center_of_mass, - calculate_normal_positional_encoding, create_grid, get_filenames, mean_std_sampling, @@ -134,8 +133,6 @@ class DoMINODataConfig: surface_variables: (Surface specific) Names of surface variables. surface_points_sample: (Surface specific) Number of surface points to sample per batch. num_surface_neighbors: (Surface specific) Number of surface neighbors to consider for nearest neighbors approach. - resample_surfaces: (Surface specific) Whether to resample the surface before kdtree/knn. Not available if caching. - resampling_points: (Surface specific) Number of points to resample the surface to. surface_sampling_algorithm: (Surface specific) Algorithm to use for surface sampling ("area_weighted" or "random"). surface_factors: (Surface specific) Non-dimensionalization factors for surface variables. If set, and scaling_type is: @@ -168,10 +165,6 @@ class DoMINODataConfig: - volume.points_sample geom_points_sample: Number of STL points sampled per batch. Independent of volume.points_sample and surface.points_sample. - positional_encoding: Whether to use positional encoding. Affects the calculation of: - - pos_volume_closest - - pos_volume_center_of_mass - - pos_surface_centter_of_mass scaling_type: Scaling type for volume variables. If used, will rescale the volume_fields and surface fields outputs. Requires volume.factor and surface.factor to be set. @@ -193,8 +186,6 @@ class DoMINODataConfig: surface_variables: Optional[Sequence] = ("pMean", "wallShearStress") surface_points_sample: int = 1024 num_surface_neighbors: int = 11 - resample_surfaces: bool = False - resampling_points: int = 1_000_000 surface_sampling_algorithm: str = Literal["area_weighted", "random"] surface_factors: Optional[Sequence] = None bounding_box_dims_surf: Optional[Union[BoundingBox, Sequence]] = None @@ -210,7 +201,6 @@ class DoMINODataConfig: sample_in_bbox: bool = False sampling: bool = False geom_points_sample: int = 300000 - positional_encoding: bool = False scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None compute_scaling_factors: bool = False caching: bool = False @@ -236,8 +226,6 @@ def __post_init__(self): raise ValueError("Sampling should be False for caching") if self.compute_scaling_factors: raise ValueError("Compute scaling factors should be False for caching") - if self.resample_surfaces: - raise ValueError("Resample surface should be False for caching") if self.phase not in [ "train", @@ -547,8 +535,6 @@ def preprocess_combined(self, data_dict): if mesh_indices_flattened.dtype != xp.int32: mesh_indices_flattened = mesh_indices_flattened.astype(xp.int32) - center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes) - if self.config.bounding_box_dims_surf is None: s_max = xp.amax(stl_vertices, 0) s_min = xp.amin(stl_vertices, 0) @@ -556,6 +542,8 @@ def preprocess_combined(self, data_dict): s_max = xp.asarray(self.config.bounding_box_dims_surf[0]) s_min = xp.asarray(self.config.bounding_box_dims_surf[1]) + center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes) + # SDF calculation on the grid using WARP if not self.config.compute_scaling_factors: nx, ny, nz = self.config.grid_resolution @@ -570,6 +558,8 @@ def preprocess_combined(self, data_dict): ) sdf_surf_grid = sdf_surf_grid.reshape(nx, ny, nz) sdf_surf_grid = _convert_torch_to_array(sdf_surf_grid, self.array_provider) + if self.config.normalize_coordinates: + sdf_surf_grid = normalize(sdf_surf_grid, xp.amax(surf_grid), xp.amin(surf_grid)) else: surf_grid = None @@ -634,19 +624,6 @@ def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max) xp = self.array_provider - if self.config.resample_surfaces: - if self.config.resampling_points > surface_coordinates.shape[0]: - resampling_points = surface_coordinates.shape[0] - else: - resampling_points = self.config.resampling_points - - surface_coordinates, idx_s = shuffle_array( - surface_coordinates, resampling_points - ) - surface_normals = surface_normals[idx_s] - surface_sizes = surface_sizes[idx_s] - surface_fields = surface_fields[idx_s] - if not self.config.compute_scaling_factors: c_max = self.config.bounding_box_dims[0] c_min = self.config.bounding_box_dims[1] @@ -667,20 +644,16 @@ def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max) surface_sizes = surface_sizes[ids_in_bbox] surface_fields = surface_fields[ids_in_bbox] - # Compute the positional encoding before sampling - if self.config.positional_encoding: - dx, dy, dz = ( - (s_max[0] - s_min[0]) / nx, - (s_max[1] - s_min[1]) / ny, - (s_max[2] - s_min[2]) / nz, - ) - pos_normals_com_surface = calculate_normal_positional_encoding( - surface_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz] - ) + + # Have to normalize neighbors after the kNN and sampling + if self.config.normalize_coordinates: + core_dict["surf_grid"] = normalize(core_dict["surf_grid"], s_max, s_min) + surface_coordinates = normalize(surface_coordinates, s_max, s_min) + center_of_mass_normalized = normalize(xp.asarray(center_of_mass), s_max, s_min) else: - pos_normals_com_surface = surface_coordinates - xp.asarray( - center_of_mass - ) + center_of_mass_normalized = xp.asarray(center_of_mass) + + pos_normals_com_surface = surface_coordinates - center_of_mass_normalized # Fit the kNN (or KDTree, if CPU) on ALL points: if self.config.num_surface_neighbors > 1: @@ -781,12 +754,6 @@ def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max) surface_neighbors_normals = surface_normals[ii][:, 1:] surface_neighbors_sizes = surface_sizes[ii][:, 1:] - # Have to normalize neighbors after the kNN and sampling - if self.config.normalize_coordinates: - core_dict["surf_grid"] = normalize(core_dict["surf_grid"], s_max, s_min) - surface_coordinates = normalize(surface_coordinates, s_max, s_min) - surface_neighbors = normalize(surface_neighbors, s_max, s_min) - if self.config.scaling_type is not None: if self.config.surface_factors is not None: if self.config.scaling_type == "mean_std_scaling": @@ -870,12 +837,6 @@ def preprocess_volume( volume_coordinates = volume_coordinates[ids_in_bbox] volume_fields = volume_fields[ids_in_bbox] - dx, dy, dz = ( - (c_max[0] - c_min[0]) / nx, - (c_max[1] - c_min[1]) / ny, - (c_max[2] - c_min[2]) / nz, - ) - # Generate a grid of specified resolution to map the bounding box # The grid is used for capturing structured geometry features and SDF representation of geometry grid = create_grid(c_max, c_min, [nx, ny, nz]) @@ -924,22 +885,18 @@ def preprocess_volume( sdf_nodes = sdf_nodes.reshape((-1, 1)) - if self.config.positional_encoding: - pos_normals_closest_vol = calculate_normal_positional_encoding( - volume_coordinates, - sdf_node_closest_point, - cell_dimensions=[dx, dy, dz], - ) - pos_normals_com_vol = calculate_normal_positional_encoding( - volume_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz] - ) - else: - pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point - pos_normals_com_vol = volume_coordinates - center_of_mass - if self.config.normalize_coordinates: volume_coordinates = normalize(volume_coordinates, c_max, c_min) grid = normalize(grid, c_max, c_min) + sdf_grid = normalize(sdf_grid, xp.amax(grid), xp.amin(grid)) + sdf_nodes = normalize(sdf_nodes, xp.amax(grid), xp.amin(grid)) + sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) + center_of_mass_normalized = normalize(xp.asarray(center_of_mass), c_max, c_min) + else: + center_of_mass_normalized = xp.asarray(center_of_mass) + + pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point + pos_normals_com_vol = volume_coordinates - center_of_mass_normalized if self.config.scaling_type is not None: if self.config.volume_factors is not None: @@ -1086,7 +1043,6 @@ def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) - sample_in_bbox=True, volume_points_sample=cfg.model.volume_points_sample, geom_points_sample=cfg.model.geom_points_sample, - positional_encoding=cfg.model.positional_encoding, model_type=cfg.model.model_type, bounding_box_dims=cfg.data.bounding_box, bounding_box_dims_surf=cfg.data.bounding_box_surface, @@ -1200,7 +1156,6 @@ def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) - sample_in_bbox=True, volume_points_sample=cfg.model.volume_points_sample, geom_points_sample=cfg.model.geom_points_sample, - positional_encoding=cfg.model.positional_encoding, model_type=cfg.model.model_type, bounding_box_dims=cfg.data.bounding_box, bounding_box_dims_surf=cfg.data.bounding_box_surface, @@ -1484,7 +1439,6 @@ def create_domino_dataset( volume_points_sample=cfg.model.volume_points_sample, surface_points_sample=cfg.model.surface_points_sample, geom_points_sample=cfg.model.geom_points_sample, - positional_encoding=cfg.model.positional_encoding, volume_factors=vol_factors, surface_factors=surf_factors, scaling_type=cfg.model.normalization, @@ -1492,8 +1446,6 @@ def create_domino_dataset( bounding_box_dims=cfg.data.bounding_box, bounding_box_dims_surf=cfg.data.bounding_box_surface, num_surface_neighbors=cfg.model.num_neighbors_surface, - resample_surfaces=cfg.model.resampling_surface_mesh.resample, - resampling_points=cfg.model.resampling_surface_mesh.points, surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, **overrides, ) diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py index ea77ef5f15..077df840e2 100644 --- a/physicsnemo/models/domino/geometry_rep.py +++ b/physicsnemo/models/domino/geometry_rep.py @@ -65,6 +65,7 @@ class GeoConvOut(nn.Module): def __init__( self, input_features: int, + neighbors_in_radius: int, model_parameters, grid_resolution=None, ): @@ -73,6 +74,7 @@ def __init__( Args: input_features: Number of input feature dimensions + neighbors_in_radius: Number of neighbors in radius model_parameters: Configuration parameters for the model grid_resolution: Resolution of the output grid [nx, ny, nz] """ @@ -84,9 +86,9 @@ def __init__( self.num_modes = model_parameters.num_modes if self.fourier_features: - input_features_calculated = input_features * (1 + 2 * self.num_modes) + input_features_calculated = input_features * (1 + 2 * self.num_modes) * neighbors_in_radius else: - input_features_calculated = input_features + input_features_calculated = input_features * neighbors_in_radius self.fc1 = nn.Linear(input_features_calculated, base_neurons) self.fc2 = nn.Linear(base_neurons, base_neurons // 2) @@ -96,6 +98,8 @@ def __init__( self.activation = get_activation(model_parameters.activation) + self.neighbors_in_radius = neighbors_in_radius + if self.fourier_features: self.register_buffer( "freqs", torch.exp(torch.linspace(0, math.pi, self.num_modes)) @@ -127,13 +131,8 @@ def forward( self.grid_resolution[2], ) grid = grid.reshape(1, nx * ny * nz, 3, 1) - x_transposed = torch.transpose(x, 2, 3) - dist_weights = 1.0 / (1e-6 + (x_transposed - grid) ** 2.0) - dist_weights = torch.transpose(dist_weights, 2, 3) - # x = torch.sum(x * dist_weights, 2) / torch.sum(dist_weights, 2) - # x = torch.sum(x, 2) - mask = abs(x - 0) > 1e-6 + x = rearrange(x, "b x y z -> b x (y z)", x=nx*ny*nz, y=self.neighbors_in_radius, z=3) if self.fourier_features: facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1) else: @@ -142,12 +141,8 @@ def forward( x = self.activation(self.fc2(x)) x = F.tanh(self.fc3(x)) - mask = mask[:, :, :, 0:1].expand( - mask.shape[0], mask.shape[1], mask.shape[2], x.shape[-1] - ) - - x = torch.sum(x * mask, 2) x = rearrange(x, "b (x y z) c -> b c x y z", x=nx, y=ny, z=nz) + return x @@ -337,11 +332,6 @@ def __init__( output_filters=geometry_rep.geo_conv.base_neurons_out, model_parameters=geometry_rep.geo_processor, ), - GeoProcessor( - input_filters=geometry_rep.geo_conv.base_neurons_in, - output_filters=geometry_rep.geo_conv.base_neurons_out, - model_parameters=geometry_rep.geo_processor, - ), ) ) else: @@ -349,10 +339,11 @@ def __init__( self.geo_conv_out = nn.ModuleList() self.geo_processor_out = nn.ModuleList() - for _ in range(len(radii)): + for u in range(len(radii)): self.geo_conv_out.append( GeoConvOut( input_features=input_features, + neighbors_in_radius=neighbors_in_radius[u], model_parameters=geometry_rep.geo_conv, grid_resolution=model_parameters.interp_res, ) @@ -403,15 +394,11 @@ def __init__( output_filters=geometry_rep.geo_conv.base_neurons_out, model_parameters=geometry_rep.geo_processor, ), - GeoProcessor( - input_filters=geometry_rep.geo_conv.base_neurons_out, - output_filters=geometry_rep.geo_conv.base_neurons_out, - model_parameters=geometry_rep.geo_processor, - ), ) else: raise ValueError("Invalid prompt. Specify unet or conv ...") self.radii = radii + self.neighbors_in_radius = neighbors_in_radius self.hops = hops self.geo_processor_sdf_out = nn.Conv3d( diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 7a31c2668c..8ac54f8e9a 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -359,10 +359,7 @@ def __init__( self.activation = get_activation(model_parameters.activation) self.use_sdf_in_basis_func = model_parameters.use_sdf_in_basis_func if self.output_features_vol is not None: - if model_parameters.positional_encoding: - inp_pos_vol = 25 if model_parameters.use_sdf_in_basis_func else 12 - else: - inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3 + inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3 self.fc_p_vol = EncodingMLP( input_features=inp_pos_vol, @@ -373,10 +370,7 @@ def __init__( ) if self.output_features_surf is not None: - if model_parameters.positional_encoding: - inp_pos_surf = 12 - else: - inp_pos_surf = 3 + inp_pos_surf = 3 self.fc_p_surf = EncodingMLP( input_features=inp_pos_surf, From ffedfaa7defe560bdf668a273cc777bf38df58c7 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 29 Sep 2025 16:34:04 +0000 Subject: [PATCH 54/98] Resolve most of the feedback from PR review. --- physicsnemo/models/domino/ball_query.py | 106 ---- physicsnemo/models/domino/encodings.py | 82 +-- physicsnemo/models/domino/geometry_rep.py | 43 +- physicsnemo/models/domino/mlps.py | 84 +-- physicsnemo/models/domino/model.py | 17 +- physicsnemo/models/domino/solutions.py | 73 +-- physicsnemo/models/layers/__init__.py | 9 +- physicsnemo/models/layers/ball_query.py | 550 +++--------------- physicsnemo/models/layers/fourier_layers.py | 80 +++ physicsnemo/models/layers/mlp_layers.py | 87 ++- test/models/data/mlp_output.pth | Bin 0 -> 1598 bytes test/models/domino/test_domino_encodings.py | 22 +- .../models/domino/test_domino_geometry_rep.py | 5 +- test/models/domino/test_domino_mlps.py | 47 +- test/models/domino/test_domino_solutions.py | 15 +- test/models/test_mlp_layers.py | 75 +++ 16 files changed, 430 insertions(+), 865 deletions(-) delete mode 100644 physicsnemo/models/domino/ball_query.py create mode 100644 test/models/data/mlp_output.pth create mode 100644 test/models/test_mlp_layers.py diff --git a/physicsnemo/models/domino/ball_query.py b/physicsnemo/models/domino/ball_query.py deleted file mode 100644 index 681fe80733..0000000000 --- a/physicsnemo/models/domino/ball_query.py +++ /dev/null @@ -1,106 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This code contains the DoMINO model architecture. -The DoMINO class contains an architecture to model both surface and -volume quantities together as well as separately (controlled using -the config.yaml file) -""" - -import torch -import torch.nn as nn -from einops import rearrange - -from physicsnemo.utils.neighbors import radius_search - - -class BQWarp(nn.Module): - """ - Warp-based ball-query layer for finding neighboring points within a specified radius. - - This layer uses an accelerated ball query implementation to efficiently find points - within a specified radius of query points. - """ - - def __init__( - self, - radius: float = 0.25, - neighbors_in_radius: int = 10, - ): - """ - Initialize the BQWarp layer. - - Args: - radius: Radius for ball query operation - neighbors_in_radius: Maximum number of neighbors to return within radius - """ - super().__init__() - - self.radius = radius - self.neighbors_in_radius = neighbors_in_radius - - def forward( - self, x: torch.Tensor, p_grid: torch.Tensor, reverse_mapping: bool = True - ) -> tuple[torch.Tensor, torch.Tensor]: - """ - Performs ball query operation to find neighboring points and their features. - - This method uses the Warp-accelerated ball query implementation to find points - within a specified radius. It can operate in two modes: - - Forward mapping: Find points from x that are near p_grid points (reverse_mapping=False) - - Reverse mapping: Find points from p_grid that are near x points (reverse_mapping=True) - - Args: - x: Tensor of shape (batch_size, num_points, 3+features) containing point coordinates - and their features - p_grid: Tensor of shape (batch_size, grid_x, grid_y, grid_z, 3) containing grid point - coordinates - reverse_mapping: Boolean flag to control the direction of the mapping: - - True: Find p_grid points near x points - - False: Find x points near p_grid points - - Returns: - tuple containing: - - mapping: Tensor containing indices of neighboring points - - outputs: Tensor containing coordinates of the neighboring points - """ - - if p_grid.ndim != 3: - p_grid = rearrange(p_grid, "b nx ny nz c -> b (nx ny nz) c") - - if reverse_mapping: - mapping, outputs = radius_search( - x[0], - p_grid[0], - self.radius, - self.neighbors_in_radius, - return_points=True, - ) - mapping = mapping.unsqueeze(0) - outputs = outputs.unsqueeze(0) - else: - mapping, outputs = radius_search( - p_grid[0], - x[0], - self.radius, - self.neighbors_in_radius, - return_points=True, - ) - mapping = mapping.unsqueeze(0) - outputs = outputs.unsqueeze(0) - - return mapping, outputs diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py index ce55ed46d3..55ce655090 100644 --- a/physicsnemo/models/domino/encodings.py +++ b/physicsnemo/models/domino/encodings.py @@ -21,91 +21,13 @@ the config.yaml file) """ -import math - import torch import torch.nn as nn from einops import rearrange -from .ball_query import BQWarp -from .mlps import MLP, LocalPointConv - - -def fourier_encode_vectorized( - coords: torch.Tensor, freqs: torch.Tensor -) -> torch.Tensor: - """Vectorized Fourier feature encoding - - Args: - coords: Tensor containing coordinates, of shape (batch_size, D) - freqs: Tensor containing frequencies, of shape (F,) (num frequencies) - - Returns: - Tensor containing Fourier features, of shape (batch_size, D * 2 * F) - """ - - D = coords.shape[-1] - F = freqs.shape[0] - - freqs = freqs[None, None, :, None] # reshape to [*, F, 1] for broadcasting - - coords = coords.unsqueeze(-2) # [*, 1, D] - scaled = (coords * freqs).reshape(*coords.shape[:-2], D * F) # [*, D, F] - features = torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1) # [*, D, 2F] - - return features.reshape(*coords.shape[:-2], D * 2 * F) # [*, D * 2F] - - -class EncodingMLP(nn.Module): - """ - This is an MLP that will, optionally, fourier encode the input features. - - The encoded features are concatenated to the original inputs, and then - processed with an MLP. - - Args: - input_features: The number of input features to the MLP. - base_layer: The number of neurons in the hidden layer of the MLP. - fourier_features: Whether to fourier encode the input features. - num_modes: The number of modes to use for the fourier encoding. - activation: The activation function to use in the MLP. - - """ - - def __init__( - self, - input_features: int, - base_layer: int, - fourier_features: bool, - num_modes: int, - activation: nn.Module, - ): - super().__init__() - self.fourier_features = fourier_features - - # self.num_modes = model_parameters.num_modes - - if self.fourier_features: - input_features_calculated = input_features + input_features * num_modes * 2 - self.register_buffer( - "freqs", torch.exp(torch.linspace(0, math.pi, num_modes)) - ) - else: - input_features_calculated = input_features - - self.mlp = MLP( - input_features=input_features_calculated, - base_layer=base_layer, - output_features=base_layer, - activation=activation, - n_layers=3, - ) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - if self.fourier_features: - x = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), dim=-1) +from physicsnemo.models.layers import BQWarp - return self.mlp(x) +from .mlps import LocalPointConv class LocalGeometryEncoding(nn.Module): diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py index ea77ef5f15..698e5ff19f 100644 --- a/physicsnemo/models/domino/geometry_rep.py +++ b/physicsnemo/models/domino/geometry_rep.py @@ -15,29 +15,17 @@ # limitations under the License. import math -from typing import Callable, Literal, Sequence +from typing import Sequence import torch import torch.nn as nn import torch.nn.functional as F from einops import rearrange +from physicsnemo.models.layers import BQWarp, Mlp, fourier_encode, get_activation from physicsnemo.models.unet import UNet -from .ball_query import BQWarp -from .encodings import fourier_encode_vectorized - - -def get_activation(activation: Literal["relu", "gelu"]) -> Callable: - """ - Return a PyTorch activation function corresponding to the given name. - """ - if activation == "relu": - return nn.ReLU() - elif activation == "gelu": - return nn.GELU() - else: - raise ValueError(f"Activation function {activation} not found") +# from .encodings import fourier_encode def scale_sdf(sdf: torch.Tensor) -> torch.Tensor: @@ -88,9 +76,17 @@ def __init__( else: input_features_calculated = input_features - self.fc1 = nn.Linear(input_features_calculated, base_neurons) - self.fc2 = nn.Linear(base_neurons, base_neurons // 2) - self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in) + self.mlp = Mlp( + in_features=input_features_calculated, + hidden_features=[base_neurons, base_neurons // 2], + out_features=model_parameters.base_neurons_in, + act_layer=get_activation(model_parameters.activation), + drop=0.0, + ) + + # self.fc1 = nn.Linear(input_features_calculated, base_neurons) + # self.fc2 = nn.Linear(base_neurons, base_neurons // 2) + # self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in) self.grid_resolution = grid_resolution @@ -135,12 +131,13 @@ def forward( # x = torch.sum(x, 2) mask = abs(x - 0) > 1e-6 if self.fourier_features: - facets = torch.cat((x, fourier_encode_vectorized(x, self.freqs)), axis=-1) + facets = torch.cat((x, fourier_encode(x, self.freqs)), axis=-1) else: facets = x - x = self.activation(self.fc1(facets)) - x = self.activation(self.fc2(x)) - x = F.tanh(self.fc3(x)) + # x = self.activation(self.fc1(facets)) + # x = self.activation(self.fc2(x)) + # x = F.tanh(self.fc3(x)) + x = F.tanh(self.mlp(facets)) mask = mask[:, :, :, 0:1].expand( mask.shape[0], mask.shape[1], mask.shape[2], x.shape[-1] @@ -272,6 +269,8 @@ def __init__( neighbors_in_radius, hops=1, model_parameters=None, + # activation_conv: nn.Module, + # activation_processor: nn.Module, ): """ Initialize the GeometryRep module. diff --git a/physicsnemo/models/domino/mlps.py b/physicsnemo/models/domino/mlps.py index e74583dea0..f074fa7735 100644 --- a/physicsnemo/models/domino/mlps.py +++ b/physicsnemo/models/domino/mlps.py @@ -15,60 +15,17 @@ # limitations under the License. """ -This code contains the DoMINO model architecture. -The DoMINO class contains an architecture to model both surface and -volume quantities together as well as separately (controlled using -the config.yaml file) +This file contains specific MLPs for the DoMINO model. + +The main feature here is we've locked in the number of layers. """ -import torch import torch.nn as nn +from physicsnemo.models.layers import Mlp -class MLP(nn.Module): - """ - FlexibleMulti-layer perceptron (MLP) module. - This is reused in various domino layers to simplify and unify - the MLP implementations. - """ - - def __init__( - self, - input_features: int, - output_features: int, - base_layer: int, - activation: nn.Module, - n_layers: int, - ): - super(MLP, self).__init__() - self.input_features = input_features - - modules = [] - - if n_layers == 1: - # Single layer: input_features -> output_features - modules.append(nn.Linear(input_features, output_features)) - else: - # First layer: input_features -> base_layer - modules.append(nn.Linear(input_features, base_layer)) - modules.append(activation) - - # Hidden layers: base_layer -> base_layer - for _ in range(n_layers - 2): - modules.append(nn.Linear(base_layer, base_layer)) - modules.append(activation) - - # Final layer: base_layer -> output_features (no activation) - modules.append(nn.Linear(base_layer, output_features)) - - self.mlp_modules = torch.nn.Sequential(*modules) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - return self.mlp_modules(x) - - -class AggregationModel(MLP): +class AggregationModel(Mlp): """ Neural network module to aggregate local geometry encoding with basis functions. @@ -76,6 +33,8 @@ class AggregationModel(MLP): to predict the final output quantities. It serves as the final prediction layer that integrates all available information sources. + It is implemented as a straightforward MLP with 5 total layers. + """ def __init__( @@ -85,17 +44,22 @@ def __init__( base_layer: int, activation: nn.Module, ): + hidden_features = [base_layer, base_layer, base_layer, base_layer] + super().__init__( - input_features=input_features, - output_features=output_features, - base_layer=base_layer, - activation=activation, - n_layers=5, + in_features=input_features, + hidden_features=hidden_features, + out_features=output_features, + act_layer=activation, + drop=0.0, ) -class LocalPointConv(MLP): - """Layer for local geometry point kernel""" +class LocalPointConv(Mlp): + """Layer for local geometry point kernel + + This is a straight forward MLP, with exactly two layers. + """ def __init__( self, @@ -105,9 +69,9 @@ def __init__( activation: nn.Module, ): super().__init__( - input_features=input_features, - base_layer=base_layer, - output_features=output_features, - activation=activation, - n_layers=2, + in_features=input_features, + hidden_features=base_layer, + out_features=output_features, + act_layer=activation, + drop=0.0, ) diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 7a31c2668c..23a68ca56c 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -24,13 +24,13 @@ import torch import torch.nn as nn +from physicsnemo.models.layers import FourierMLP, get_activation from physicsnemo.models.unet import UNet from .encodings import ( - EncodingMLP, MultiGeometryEncoding, ) -from .geometry_rep import GeometryRep, get_activation +from .geometry_rep import GeometryRep from .mlps import AggregationModel from .solutions import SolutionCalculatorSurface, SolutionCalculatorVolume @@ -283,7 +283,7 @@ def __init__( if self.encode_parameters: # Defining the parameter model base_layer_p = model_parameters.parameter_model.base_layer - self.parameter_model = EncodingMLP( + self.parameter_model = FourierMLP( input_features=self.global_features, fourier_features=model_parameters.parameter_model.fourier_features, num_modes=model_parameters.parameter_model.num_modes, @@ -324,7 +324,7 @@ def __init__( self.num_variables_surf ): # Have the same basis function for each variable self.nn_basis_surf.append( - EncodingMLP( + FourierMLP( input_features=input_features_surface, base_layer=model_parameters.nn_basis_functions.base_layer, fourier_features=model_parameters.nn_basis_functions.fourier_features, @@ -342,7 +342,7 @@ def __init__( self.num_variables_vol ): # Have the same basis function for each variable self.nn_basis_vol.append( - EncodingMLP( + FourierMLP( input_features=input_features, base_layer=model_parameters.nn_basis_functions.base_layer, fourier_features=model_parameters.nn_basis_functions.fourier_features, @@ -364,7 +364,7 @@ def __init__( else: inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3 - self.fc_p_vol = EncodingMLP( + self.fc_p_vol = FourierMLP( input_features=inp_pos_vol, fourier_features=model_parameters.position_encoder.fourier_features, num_modes=model_parameters.position_encoder.num_modes, @@ -378,7 +378,7 @@ def __init__( else: inp_pos_surf = 3 - self.fc_p_surf = EncodingMLP( + self.fc_p_surf = FourierMLP( input_features=inp_pos_surf, fourier_features=model_parameters.position_encoder.fourier_features, num_modes=model_parameters.position_encoder.num_modes, @@ -450,7 +450,6 @@ def __init__( num_sample_points=self.num_sample_points_surface, use_surface_normals=self.use_surface_normals, use_surface_area=self.use_surface_area, - noise_intensity=50, encode_parameters=self.encode_parameters, parameter_model=self.parameter_model if self.encode_parameters @@ -498,7 +497,7 @@ def __init__( nn_basis=self.nn_basis_vol, ) - def forward(self, data_dict, return_volume_neighbors=False): + def forward(self, data_dict): # Loading STL inputs, bounding box grids, precomputed SDF and scaling factors # STL nodes diff --git a/physicsnemo/models/domino/solutions.py b/physicsnemo/models/domino/solutions.py index c3968e8dcf..23a7e36f39 100644 --- a/physicsnemo/models/domino/solutions.py +++ b/physicsnemo/models/domino/solutions.py @@ -27,6 +27,27 @@ import torch.nn as nn +def apply_parameter_encoding( + mesh_centers: torch.Tensor, + global_params_values: torch.Tensor, + global_params_reference: torch.Tensor, +) -> torch.Tensor: + processed_parameters = [] + for k in range(global_params_values.shape[1]): + param = torch.unsqueeze(global_params_values[:, k, :], 1) + ref = torch.unsqueeze(global_params_reference[:, k, :], 1) + param = param.expand( + param.shape[0], + mesh_centers.shape[1], + param.shape[2], + ) + param = param / ref + processed_parameters.append(param) + processed_parameters = torch.cat(processed_parameters, axis=-1) + + return processed_parameters + + def sample_sphere(center, r, num_points): """Uniformly sample points in a 3D sphere around the center. @@ -122,28 +143,6 @@ def __init__( "Parameter model is required when encode_parameters is True" ) - def apply_parameter_encoding( - self, - mesh_centers: torch.Tensor, - global_params_values: torch.Tensor, - global_params_reference: torch.Tensor, - ) -> torch.Tensor: - processed_parameters = [] - for k in range(global_params_values.shape[1]): - param = torch.unsqueeze(global_params_values[:, k, :], 1) - ref = torch.unsqueeze(global_params_reference[:, k, :], 1) - param = param.expand( - param.shape[0], - mesh_centers.shape[1], - param.shape[2], - ) - param = param / ref - processed_parameters.append(param) - processed_parameters = torch.cat(processed_parameters, axis=-1) - param_encoding = self.parameter_model(processed_parameters) - - return param_encoding - def forward( self, volume_mesh_centers: torch.Tensor, @@ -156,9 +155,10 @@ def forward( Forward pass of the SolutionCalculator module. """ if self.encode_parameters: - param_encoding = self.apply_parameter_encoding( + param_encoding = apply_parameter_encoding( volume_mesh_centers, global_params_values, global_params_reference ) + param_encoding = self.parameter_model(param_encoding) volume_m_c_perturbed = [volume_mesh_centers.unsqueeze(2)] @@ -266,7 +266,6 @@ def __init__( self, num_variables: int, num_sample_points: int, - noise_intensity: float, encode_parameters: bool, use_surface_normals: bool, use_surface_area: bool, @@ -277,7 +276,6 @@ def __init__( super().__init__() self.num_variables = num_variables self.num_sample_points = num_sample_points - self.noise_intensity = noise_intensity self.encode_parameters = encode_parameters self.use_surface_normals = use_surface_normals self.use_surface_area = use_surface_area @@ -291,28 +289,6 @@ def __init__( "Parameter model is required when encode_parameters is True" ) - def apply_parameter_encoding( - self, - mesh_centers: torch.Tensor, - global_params_values: torch.Tensor, - global_params_reference: torch.Tensor, - ) -> torch.Tensor: - processed_parameters = [] - for k in range(global_params_values.shape[1]): - param = torch.unsqueeze(global_params_values[:, k, :], 1) - ref = torch.unsqueeze(global_params_reference[:, k, :], 1) - param = param.expand( - param.shape[0], - mesh_centers.shape[1], - param.shape[2], - ) - param = param / ref - processed_parameters.append(param) - processed_parameters = torch.cat(processed_parameters, axis=-1) - param_encoding = self.parameter_model(processed_parameters) - - return param_encoding - def forward( self, surface_mesh_centers: torch.Tensor, @@ -329,9 +305,10 @@ def forward( """Function to approximate solution given the neighborhood information""" if self.encode_parameters: - param_encoding = self.apply_parameter_encoding( + param_encoding = apply_parameter_encoding( surface_mesh_centers, global_params_values, global_params_reference ) + param_encoding = self.parameter_model(param_encoding) centers_inputs = [ surface_mesh_centers, diff --git a/physicsnemo/models/layers/__init__.py b/physicsnemo/models/layers/__init__.py index 627fa4f07f..cfebf5e38d 100644 --- a/physicsnemo/models/layers/__init__.py +++ b/physicsnemo/models/layers/__init__.py @@ -22,9 +22,16 @@ Stan, get_activation, ) +from .ball_query import BQWarp from .conv_layers import ConvBlock, CubeEmbedding from .dgm_layers import DGMLayer -from .fourier_layers import FourierFilter, FourierLayer, GaborFilter +from .fourier_layers import ( + FourierFilter, + FourierLayer, + FourierMLP, + GaborFilter, + fourier_encode, +) from .fully_connected_layers import ( Conv1dFCLayer, Conv2dFCLayer, diff --git a/physicsnemo/models/layers/ball_query.py b/physicsnemo/models/layers/ball_query.py index ee3e1538a9..795958800a 100644 --- a/physicsnemo/models/layers/ball_query.py +++ b/physicsnemo/models/layers/ball_query.py @@ -14,504 +14,104 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple +""" +This layer is a compilable, ball-query operation. -import torch -import warp as wp -from torch.overrides import handle_torch_function, has_torch_function +By default, it will project a grid of points to a 1D set of points. +It does not support batch size > 1. +""" -@wp.kernel -def ball_query( - points1: wp.array(dtype=wp.vec3), - points2: wp.array(dtype=wp.vec3), - grid: wp.uint64, - k: wp.int32, - radius: wp.float32, - mapping: wp.array3d(dtype=wp.int32), - num_neighbors: wp.array2d(dtype=wp.int32), -): - """ - Performs ball query operation to find neighboring points within a specified radius. +import torch +import torch.nn as nn +from einops import rearrange - For each point in points1, finds up to k neighboring points from points2 that are - within the specified radius. Uses a hash grid for efficient spatial queries. +from physicsnemo.utils.neighbors import radius_search - Note that the neighbors found are not strictly guaranteed to be the closest k neighbors, - in the event that more than k neighbors are found within the radius. - Args: - points1: Array of query points - points2: Array of points to search - grid: Pre-computed hash grid for accelerated spatial queries - k: Maximum number of neighbors to find for each query point - radius: Maximum search radius for finding neighbors - mapping: Output array to store indices of neighboring points. Should be instantiated as zeros(1, len(points1), k) - num_neighbors: Output array to store the number of neighbors found for each query point. Should be instantiated as zeros(1, len(points1)) +class BQWarp(nn.Module): """ - tid = wp.tid() - - # Get position from points1 - pos = points1[tid] - - # particle contact - neighbors = wp.hash_grid_query(id=grid, point=pos, max_dist=radius) - - # Keep track of the number of neighbors found - neighbors_found = wp.int32(0) - - # loop through neighbors to compute density - for index in neighbors: - # Check if outside the radius - pos2 = points2[index] - if wp.length(pos - pos2) > radius: - continue - - # Add neighbor to the list - mapping[0, tid, neighbors_found] = index - - # Increment the number of neighbors found - neighbors_found += 1 - - # Break if we have found enough neighbors - if neighbors_found == k: - num_neighbors[0, tid] = k - break - - # Set the number of neighbors - num_neighbors[0, tid] = neighbors_found - - -@wp.kernel -def sparse_ball_query( - points2: wp.array(dtype=wp.vec3), - mapping: wp.array3d(dtype=wp.int32), - num_neighbors: wp.array2d(dtype=wp.int32), - outputs: wp.array4d(dtype=wp.float32), -): - tid = wp.tid() - - # Get number of neighbors - k = num_neighbors[0, tid] - - # Loop through neighbors - for _k in range(k): - # Get point2 index - index = mapping[0, tid, _k] - - # Get position from points2 - pos = points2[index] - - # Set the output - outputs[0, tid, _k, 0] = pos[0] - outputs[0, tid, _k, 1] = pos[1] - outputs[0, tid, _k, 2] = pos[2] - + Warp-based ball-query layer for finding neighboring points within a specified radius. -def _ball_query_forward_primitive_( - points1: torch.Tensor, - points2: torch.Tensor, - k: int, - radius: float, - hash_grid: wp.HashGrid, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - # Create output tensors: - mapping = torch.zeros( - (1, points1.shape[0], k), - dtype=torch.int32, - device=points1.device, - requires_grad=False, - ) - num_neighbors = torch.zeros( - (1, points1.shape[0]), - dtype=torch.int32, - device=points1.device, - requires_grad=False, - ) - outputs = torch.zeros( - (1, points1.shape[0], k, 3), - dtype=torch.float32, - device=points1.device, - requires_grad=(points1.requires_grad or points2.requires_grad), - ) + This layer uses an accelerated ball query implementation to efficiently find points + within a specified radius of query points. - # Convert from torch to warp - points1 = wp.from_torch(points1, dtype=wp.vec3, requires_grad=points1.requires_grad) - points2 = wp.from_torch(points2, dtype=wp.vec3, requires_grad=points2.requires_grad) - - wp_mapping = wp.from_torch(mapping, dtype=wp.int32, requires_grad=False) - wp_num_neighbors = wp.from_torch(num_neighbors, dtype=wp.int32, requires_grad=False) - wp_outputs = wp.from_torch( - outputs, - dtype=wp.float32, - requires_grad=(points1.requires_grad or points2.requires_grad), - ) - - # Build the grid - hash_grid.build(points2, radius) - - # Run the kernel to get mapping - wp.launch( - ball_query, - inputs=[ - points1, - points2, - hash_grid.id, - k, - radius, - ], - outputs=[ - wp_mapping, - wp_num_neighbors, - ], - dim=[points1.shape[0]], - ) - - # Run the kernel to get outputs - wp.launch( - sparse_ball_query, - inputs=[ - points2, - wp_mapping, - wp_num_neighbors, - ], - outputs=[ - wp_outputs, - ], - dim=[points1.shape[0]], - ) - - return mapping, num_neighbors, outputs - - -def _ball_query_backward_primitive_( - points1, - points2, - mapping, - num_neighbors, - outputs, - grad_mapping, - grad_num_neighbors, - grad_outputs, -) -> Tuple[torch.Tensor, torch.Tensor]: - p2_grad = torch.zeros_like(points2) - - # Run the kernel in adjoint mode - wp.launch( - sparse_ball_query, - inputs=[ - wp.from_torch(points2, dtype=wp.vec3, requires_grad=points2.requires_grad), - wp.from_torch(mapping, dtype=wp.int32, requires_grad=False), - wp.from_torch(num_neighbors, dtype=wp.int32, requires_grad=False), - ], - outputs=[ - wp.from_torch(outputs, dtype=wp.float32, requires_grad=False), - ], - adj_inputs=[ - wp.from_torch(p2_grad, dtype=wp.vec3, requires_grad=points2.requires_grad), - wp.from_torch( - grad_mapping, dtype=wp.int32, requires_grad=mapping.requires_grad - ), - wp.from_torch( - grad_num_neighbors, - dtype=wp.int32, - requires_grad=num_neighbors.requires_grad, - ), - ], - adj_outputs=[ - wp.from_torch(grad_outputs, dtype=wp.float32), - ], - dim=[points1.shape[0]], - adjoint=True, - ) - - return p2_grad - - -class BallQuery(torch.autograd.Function): + Only supports batch size 1. """ - Warp based Ball Query. - - Note: only differentiable with respect to points1 and points2. - """ - - @staticmethod - def forward( - ctx, - points1: torch.Tensor, - points2: torch.Tensor, - k: int, - radius: float, - hash_grid: wp.HashGrid, - ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - # Only works for batch size 1 - if points1.shape[0] != 1: - raise AssertionError("Ball Query only works for batch size 1") - - # CJA - 5/15/25 - This was added recently, but it looks like I also - # addressed it. The primitive functions below handle device selection - # via compute-follows-data: they will allocate new tensors on the device - # where points1 currently resides (forward) and points2 resides (backward). - # there isn't checking that the devices match, but it will crash if they do not. - # try: - # device = str(wp.get_device()) - # except Exception: - # device = "cuda" - - ctx.k = k - ctx.radius = radius - - # Make grid - ctx.hash_grid = hash_grid - # Apply the primitive. Note the batch index is removed. - mapping, num_neighbors, outputs = _ball_query_forward_primitive_( - points1[0], - points2[0], - k, - radius, - hash_grid, - ) - ctx.save_for_backward(points1, points2, mapping, num_neighbors, outputs) - - return mapping, num_neighbors, outputs - - @staticmethod - def backward(ctx, grad_mapping, grad_num_neighbors, grad_outputs): - points1, points2, mapping, num_neighbors, outputs = ctx.saved_tensors - # Apply the primitive - p2_grad = _ball_query_backward_primitive_( - points1[0], - points2[0], - mapping, - num_neighbors, - outputs, - grad_mapping, - grad_num_neighbors, - grad_outputs, - ) - p2_grad = p2_grad.unsqueeze(0) - - # Return the gradients - return ( - torch.zeros_like(points1), - p2_grad, - None, - None, - None, - ) - - -def ball_query_layer( - points1: torch.Tensor, - points2: torch.Tensor, - k: int, - radius: float, - hash_grid: wp.HashGrid, -): - """ - Wrapper for BallQuery.apply to support a functional interface. - """ - if has_torch_function((points1, points2)): - return handle_torch_function( - ball_query_layer, (points1, points2), points1, points2, k, radius, hash_grid - ) - return BallQuery.apply(points1, points2, k, radius, hash_grid) - - -class BallQueryLayer(torch.nn.Module): - """ - Torch layer for differentiable and accelerated Ball Query - operation using Warp. - Args: - k (int): Number of neighbors. - radius (float): Radius of influence. - grid_size (int): Resolution of the hash grid. (Assumed to be uniform in all dimensions.) - """ + def __init__( + self, + radius: float = 0.25, + neighbors_in_radius: int | None = 10, + ): + """ + Initialize the BQWarp layer. - def __init__(self, k: int, radius: float, grid_size: int = 32): + Args: + radius: Radius for ball query operation + neighbors_in_radius: Maximum number of neighbors to return within radius. If None, all neighbors will be returned. + """ super().__init__() - wp.init() - self.k = k + self.radius = radius - self.hash_grid = wp.HashGrid(grid_size, grid_size, grid_size) + self.neighbors_in_radius = neighbors_in_radius def forward( - self, points1: torch.Tensor, points2: torch.Tensor - ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + self, x: torch.Tensor, p_grid: torch.Tensor, reverse_mapping: bool = True + ) -> tuple[torch.Tensor, torch.Tensor]: """ - Performs ball query operation to find neighboring points within a specified radius. + Performs ball query operation to find neighboring points and their features. - For each point in points1, finds up to k neighboring points from points2 that are - within the specified radius. Uses a hash grid for efficient spatial queries. + This method uses the Warp-accelerated ball query implementation to find points + within a specified radius. It can operate in two modes: + - Forward mapping: Find points from x that are near p_grid points (reverse_mapping=False) + - Reverse mapping: Find points from p_grid that are near x points (reverse_mapping=True) Args: - points1: Tensor of shape (batch_size, num_points1, 3) containing query points - points2: Tensor of shape (batch_size, num_points2, 3) containing points to search + x: Tensor of shape (batch_size, num_points, 3+features) containing point coordinates + and their features + p_grid: Tensor of shape (batch_size, grid_x, grid_y, grid_z, 3) containing grid point + coordinates + reverse_mapping: Boolean flag to control the direction of the mapping: + - True: Find p_grid points near x points + - False: Find x points near p_grid points Returns: tuple containing: - mapping: Tensor containing indices of neighboring points - - num_neighbors: Tensor containing the number of neighbors found for each query point - - outputs: Tensor containing features or coordinates of the neighboring points + - outputs: Tensor containing coordinates of the neighboring points """ - return ball_query_layer( - points1, - points2, - self.k, - self.radius, - self.hash_grid, - ) - - -if __name__ == "__main__": - # Make function for saving point clouds - import pyvista as pv - - from physicsnemo.utils.neighbors import radius_search - - radius_search = torch.compile(radius_search) - - torch.random.manual_seed(0) - torch.cuda.manual_seed(0) - - def save_point_cloud(points, name): - cloud = pv.PolyData(points.detach().cpu().numpy()) - cloud.save(name) - - # Check forward pass - # Initialize tensors - n = 1 # number of point clouds - p1 = 1600_000 # 100000 # number of points in point cloud 1 - d = 3 # dimension of the points - p2 = 1600_000 # 100000 # number of points in point cloud 2 - points1 = torch.rand(n, p1, d, device="cuda", requires_grad=False) - - points2 = torch.rand(n, p2, d, device="cuda", requires_grad=True) - k = 256 # maximum number of neighbors - radius = 0.1 - - # Make ball query layer - layer = BallQueryLayer(k, radius) - - # Make ball query - - for i in range(5): - mapping, num_neighbors, outputs = layer( - points1, - points2, - ) - indices, points = radius_search( - points=points2[0], - queries=points1[0], - radius=radius, - max_points=k, - return_dists=False, - return_points=True, - ) - - # sorted_bq_indices = torch.sort(mapping[0][0]).values - # sorted_rs_indices = torch.sort(indices[0]).values - - # print(sorted_bq_indices - sorted_rs_indices) - # print(sorted_bq_indices) - # print(sorted_rs_indices) - - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - torch.cuda.synchronize() - for i in range(25): - if i == 5: - start_event.record() - mapping, num_neighbors, outputs = layer( - points1, - points2, - ) - end_event.record() - torch.cuda.synchronize() - print( - f"Ball Query Time taken: {start_event.elapsed_time(end_event) / 20} ms per iteration" - ) - - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - for i in range(25): - if i == 5: - torch.cuda.synchronize() - start_event.record() - indices, points = radius_search( - points=points2[0], - queries=points1[0], - radius=radius, - max_points=k, - return_dists=False, - return_points=True, - ) - end_event.record() - torch.cuda.synchronize() - print( - f"Radius Search Time taken: {start_event.elapsed_time(end_event) / 20} ms per iteration" - ) - - # Optimize the background points to move to the query points - optimizer = torch.optim.SGD([points2], 0.00) - - # Test optimization - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - torch.cuda.synchronize() - target = points1.unsqueeze(2).clone().detach() - for i in range(25): - if i == 5: - start_event.record() - optimizer.zero_grad() - # mapping, num_neighbors, outputs = layer(points1, points2, lengths1, lengths2) - mapping, num_neighbors, outputs = layer(points1, points2) - # print(mapping[0][3]) - # print(torch.where(mapping == 1)) - loss = (points1.unsqueeze(2) - outputs).pow(2).sum() - loss.backward() - # print(f"ball query Points1 grad: {points1.grad}") - optimizer.step() - optimizer.zero_grad() - - end_event.record() - torch.cuda.synchronize() - print( - f"Ball Query + backwards Time taken: {start_event.elapsed_time(end_event) / 20} ms per iteration" - ) - - # Optimize the background points to move to the query points - optimizer = torch.optim.SGD( - [points2], 0.00 - ) # Setting the LR to 0.0 ensures the same gradients each time - - # Test optimization - start_event = torch.cuda.Event(enable_timing=True) - end_event = torch.cuda.Event(enable_timing=True) - torch.cuda.synchronize() - start_event.record() - for i in range(25): - if i == 5: - start_event.record() - optimizer.zero_grad() - # mapping, num_neighbors, outputs = layer(points1, points2, lengths1, lengths2) - indexes, points = radius_search( - points=points2[0], - queries=points1[0], - radius=radius, - max_points=k, - return_dists=False, - return_points=True, - ) - loss = (target - points).pow(2).sum() - loss.backward() - optimizer.step() - # print(f"radius search Points1 grad: {points1.grad}") - optimizer.zero_grad() - end_event.record() - torch.cuda.synchronize() - print( - f"radius search + backwards Time taken: {start_event.elapsed_time(end_event) / 20} ms per iteration" - ) + if p_grid.shape[-1] != x.shape[-1] or x.shape[-1] != 3: + raise ValueError("The last dimension of p_grid and x must be 3") + + if p_grid.ndim != 3: + if p_grid.ndim == 4: + p_grid = rearrange(p_grid, "b nx ny c -> b (nx ny) c") + elif p_grid.ndim == 5: + p_grid = rearrange(p_grid, "b nx ny nz c -> b (nx ny nz) c") + else: + raise ValueError("p_grid must be 3D, 4D, 5D only") + + if reverse_mapping: + mapping, outputs = radius_search( + x[0], + p_grid[0], + self.radius, + self.neighbors_in_radius, + return_points=True, + ) + mapping = mapping.unsqueeze(0) + outputs = outputs.unsqueeze(0) + else: + mapping, outputs = radius_search( + p_grid[0], + x[0], + self.radius, + self.neighbors_in_radius, + return_points=True, + ) + mapping = mapping.unsqueeze(0) + outputs = outputs.unsqueeze(0) + + return mapping, outputs diff --git a/physicsnemo/models/layers/fourier_layers.py b/physicsnemo/models/layers/fourier_layers.py index 35cb4d81a1..ba7db24a68 100644 --- a/physicsnemo/models/layers/fourier_layers.py +++ b/physicsnemo/models/layers/fourier_layers.py @@ -21,6 +21,86 @@ import torch.nn as nn from torch import Tensor +from .mlp_layers import Mlp + + +def fourier_encode(coords: torch.Tensor, freqs: torch.Tensor) -> torch.Tensor: + """Vectorized Fourier feature encoding + + Args: + coords: Tensor containing coordinates, of shape (batch_size, D) + freqs: Tensor containing frequencies, of shape (F,) (num frequencies) + + Returns: + Tensor containing Fourier features, of shape (batch_size, D * 2 * F) + """ + + D = coords.shape[-1] + F = freqs.shape[0] + + freqs = freqs[None, None, :, None] # reshape to [*, F, 1] for broadcasting + + coords = coords.unsqueeze(-2) # [*, 1, D] + scaled = (coords * freqs).reshape(*coords.shape[:-2], D * F) # [*, D, F] + features = torch.cat([torch.sin(scaled), torch.cos(scaled)], dim=-1) # [*, D, 2F] + + return features.reshape(*coords.shape[:-2], D * 2 * F) # [*, D * 2F] + + +class FourierMLP(nn.Module): + """ + This is an MLP that will, optionally, fourier encode the input features. + + The encoded features are concatenated to the original inputs, and then + processed with an MLP. + + Args: + input_features: The number of input features to the MLP. + base_layer: The number of neurons in the hidden layer of the MLP. + fourier_features: Whether to fourier encode the input features. + num_modes: The number of modes to use for the fourier encoding. + activation: The activation function to use in the MLP. + + """ + + def __init__( + self, + input_features: int, + base_layer: int, + fourier_features: bool, + num_modes: int, + activation: nn.Module | str, + ): + super().__init__() + self.fourier_features = fourier_features + + # self.num_modes = model_parameters.num_modes + + if self.fourier_features: + input_features_calculated = input_features + input_features * num_modes * 2 + self.register_buffer( + "freqs", torch.exp(torch.linspace(0, math.pi, num_modes)) + ) + else: + input_features_calculated = input_features + + self.mlp = Mlp( + in_features=input_features_calculated, + hidden_features=[ + base_layer, + base_layer, + ], + out_features=base_layer, + act_layer=activation, + drop=0.0, + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.fourier_features: + x = torch.cat((x, fourier_encode(x, self.freqs)), dim=-1) + + return self.mlp(x) + class FourierLayer(nn.Module): """Fourier layer used in the Fourier feature network""" diff --git a/physicsnemo/models/layers/mlp_layers.py b/physicsnemo/models/layers/mlp_layers.py index 8e9a18858b..ec832ad6b1 100644 --- a/physicsnemo/models/layers/mlp_layers.py +++ b/physicsnemo/models/layers/mlp_layers.py @@ -17,28 +17,83 @@ import torch from torch import nn +from .activations import get_activation + +# class Mlp(nn.Module): +# def __init__( +# self, +# in_features, +# hidden_features=None, +# out_features=None, +# act_layer=nn.GELU, +# drop=0.0, +# ): +# super().__init__() +# out_features = out_features or in_features +# hidden_features = hidden_features or in_features +# self.fc1 = nn.Linear(in_features, hidden_features) +# self.act = act_layer() +# self.fc2 = nn.Linear(hidden_features, out_features) +# self.drop = nn.Dropout(drop) + +# def forward(self, x: torch.Tensor): +# x = self.fc1(x) +# x = self.act(x) +# x = self.drop(x) +# x = self.fc2(x) +# x = self.drop(x) +# return x + class Mlp(nn.Module): def __init__( self, - in_features, - hidden_features=None, - out_features=None, - act_layer=nn.GELU, - drop=0.0, + in_features: int, + hidden_features: int | list[int] | None = None, + out_features: int | None = None, + act_layer: nn.Module | str = nn.GELU, + drop: float = 0.0, ): super().__init__() out_features = out_features or in_features - hidden_features = hidden_features or in_features - self.fc1 = nn.Linear(in_features, hidden_features) - self.act = act_layer() - self.fc2 = nn.Linear(hidden_features, out_features) - self.drop = nn.Dropout(drop) + if isinstance(hidden_features, int): + hidden_features = [ + hidden_features, + ] + elif hidden_features is None: + hidden_features = [ + in_features, + ] + + # If the activation is a string, get it. + # It's it's a type, instantiate it. + # If it's a module, leave it be. + if isinstance(act_layer, str): + act_layer = get_activation(act_layer) + elif isinstance(act_layer, nn.Module): + pass + else: + act_layer = act_layer() + if not isinstance(act_layer, nn.Module): + raise ValueError( + f"Activation layer must be a string or a module, got {type(act_layer)}" + ) + + layers = [] + input_dim = in_features + for hidden_dim in hidden_features: + layers.append(nn.Linear(input_dim, hidden_dim)) + layers.append(act_layer) + if drop != 0: + layers.append(nn.Dropout(drop)) + input_dim = hidden_dim + + # Add the last layers: + layers.append(nn.Linear(input_dim, out_features)) + if drop != 0: + layers.append(nn.Dropout(drop)) + + self.layers = nn.Sequential(*layers) def forward(self, x: torch.Tensor): - x = self.fc1(x) - x = self.act(x) - x = self.drop(x) - x = self.fc2(x) - x = self.drop(x) - return x + return self.layers(x) diff --git a/test/models/data/mlp_output.pth b/test/models/data/mlp_output.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc2f0ea9de0d96a3e72c4317c9f36b2d1c06cd09 GIT binary patch literal 1598 zcmbVM-)qxQ6uxbjwq}ZK>w^v^?ybYwCS97fXeR{8OCHQBO7SIRX;y=2)0^B*6huKb z@WsDCMSSs35LEUDC(IU$x zyzBh2=`8Vpw)3KO|E+%m;lBw*x_>*<`%WzFmtKV Xxc#L*H~5BSUE`p!X-o#e{=wcqwx@0G literal 0 HcmV?d00001 diff --git a/test/models/domino/test_domino_encodings.py b/test/models/domino/test_domino_encodings.py index a27e2dd0a9..6570e0a686 100644 --- a/test/models/domino/test_domino_encodings.py +++ b/test/models/domino/test_domino_encodings.py @@ -25,19 +25,18 @@ @pytest.mark.parametrize("device", ["cuda:0"]) @pytest.mark.parametrize("fourier_features", [True, False]) @pytest.mark.parametrize("num_modes", [3, 5, 10]) -def test_encoding_mlp(device, fourier_features, num_modes): - """Test EncodingMLP with various configurations""" - from physicsnemo.models.domino.encodings import EncodingMLP - from physicsnemo.models.domino.model import get_activation +def test_fourier_mlp(device, fourier_features, num_modes): + """Test FourierMLP with various configurations""" + from physicsnemo.models.layers import FourierMLP torch.manual_seed(0) - model = EncodingMLP( + model = FourierMLP( input_features=3, base_layer=64, fourier_features=fourier_features, num_modes=num_modes, - activation=get_activation("relu"), + activation="relu", ).to(device) x = torch.randn(2, 100, 3).to(device) @@ -49,14 +48,14 @@ def test_encoding_mlp(device, fourier_features, num_modes): @pytest.mark.parametrize("device", ["cuda:0"]) def test_fourier_encode_vectorized(device): """Test fourier encoding function""" - from physicsnemo.models.domino.encodings import fourier_encode_vectorized + from physicsnemo.models.layers import fourier_encode torch.manual_seed(0) coords = torch.randn(4, 20, 3).to(device) freqs = torch.exp(torch.linspace(0, math.pi, 5)).to(device) - output = fourier_encode_vectorized(coords, freqs) + output = fourier_encode(coords, freqs) # Output should be [batch, points, D * 2 * F] = [4, 20, 3 * 2 * 5] = [4, 20, 30] validate_output_shape_and_values(output, (4, 20, 30)) @@ -118,6 +117,7 @@ def test_multi_geometry_encoding(device, geo_encoding_type): neighbors_in_radius=neighbors_in_radius, geo_encoding_type=geo_encoding_type, base_layer=64, + n_upstream_radii=2, activation=get_activation("relu"), grid_resolution=GRID_RESOLUTION, ).to(device) @@ -133,14 +133,8 @@ def test_multi_geometry_encoding(device, geo_encoding_type): volume_mesh_centers = torch.randn(BATCH_SIZE, N_MESH_POINTS, 3).to(device) p_grid = torch.randn(BATCH_SIZE, *GRID_RESOLUTION, 3).to(device) - print(f"encoding_g.shape: {encoding_g.shape}") - print(f"volume_mesh_centers.shape: {volume_mesh_centers.shape}") - print(f"p_grid.shape: {p_grid.shape}") - output = model(encoding_g, volume_mesh_centers, p_grid) - print(f"output.shape: {output.shape}") - expected_output_dim = sum(neighbors_in_radius) validate_output_shape_and_values( diff --git a/test/models/domino/test_domino_geometry_rep.py b/test/models/domino/test_domino_geometry_rep.py index 813b246bae..628e760aa5 100644 --- a/test/models/domino/test_domino_geometry_rep.py +++ b/test/models/domino/test_domino_geometry_rep.py @@ -24,7 +24,8 @@ @pytest.mark.parametrize("device", ["cuda:0"]) @pytest.mark.parametrize("act", ["relu", "gelu"]) -def test_geo_conv_out(device, act): +@pytest.mark.parametrize("fourier_features", [True, False]) +def test_geo_conv_out(device, act, fourier_features): """Test GeoConvOut layer""" from physicsnemo.models.domino.geometry_rep import GeoConvOut @@ -39,6 +40,8 @@ class TestParams: activation: str = act params = TestParams() + params.fourier_features = fourier_features + grid_resolution = [32, 32, 32] layer = GeoConvOut( diff --git a/test/models/domino/test_domino_mlps.py b/test/models/domino/test_domino_mlps.py index 86d2d9a208..d181f24401 100644 --- a/test/models/domino/test_domino_mlps.py +++ b/test/models/domino/test_domino_mlps.py @@ -19,33 +19,33 @@ from .utils import validate_output_shape_and_values +# @pytest.mark.parametrize("device", ["cuda:0"]) +# @pytest.mark.parametrize("activation", ["relu", "gelu"]) +# @pytest.mark.parametrize("n_layers", [1, 2, 3, 5]) +# def test_mlp(device, activation, n_layers): +# """Test basic MLP functionality""" +# from physicsnemo.models.domino.mlps import MLP +# from physicsnemo.models.domino.model import get_activation -@pytest.mark.parametrize("device", ["cuda:0"]) -@pytest.mark.parametrize("activation", ["relu", "gelu"]) -@pytest.mark.parametrize("n_layers", [1, 2, 3, 5]) -def test_mlp(device, activation, n_layers): - """Test basic MLP functionality""" - from physicsnemo.models.domino.mlps import MLP - from physicsnemo.models.domino.model import get_activation +# torch.manual_seed(0) - torch.manual_seed(0) +# mlp = MLP( +# input_features=10, +# output_features=5, +# base_layer=32, +# activation=get_activation(activation), +# n_layers=n_layers, +# ).to(device) - mlp = MLP( - input_features=10, - output_features=5, - base_layer=32, - activation=get_activation(activation), - n_layers=n_layers, - ).to(device) +# x = torch.randn(4, 50, 10).to(device) +# output = mlp(x) - x = torch.randn(4, 50, 10).to(device) - output = mlp(x) - - validate_output_shape_and_values(output, (4, 50, 5)) +# validate_output_shape_and_values(output, (4, 50, 5)) @pytest.mark.parametrize("device", ["cuda:0"]) -def test_aggregation_model(device): +@pytest.mark.parametrize("activation", ["relu", "gelu"]) +def test_aggregation_model(device, activation): """Test AggregationModel""" from physicsnemo.models.domino.mlps import AggregationModel from physicsnemo.models.domino.model import get_activation @@ -56,7 +56,7 @@ def test_aggregation_model(device): input_features=100, output_features=1, base_layer=64, - activation=get_activation("relu"), + activation=get_activation(activation), ).to(device) x = torch.randn(2, 30, 100).to(device) @@ -66,7 +66,8 @@ def test_aggregation_model(device): @pytest.mark.parametrize("device", ["cuda:0"]) -def test_local_point_conv(device): +@pytest.mark.parametrize("activation", ["relu", "gelu"]) +def test_local_point_conv(device, activation): """Test LocalPointConv""" from physicsnemo.models.domino.mlps import LocalPointConv from physicsnemo.models.domino.model import get_activation @@ -77,7 +78,7 @@ def test_local_point_conv(device): input_features=50, base_layer=128, output_features=32, - activation=get_activation("relu"), + activation=get_activation(activation), ).to(device) x = torch.randn(2, 100, 50).to(device) diff --git a/test/models/domino/test_domino_solutions.py b/test/models/domino/test_domino_solutions.py index be4797eafc..36ddd0d3db 100644 --- a/test/models/domino/test_domino_solutions.py +++ b/test/models/domino/test_domino_solutions.py @@ -29,10 +29,9 @@ def test_solution_calculator_volume( device, num_variables, num_sample_points, encode_parameters ): """Test SolutionCalculatorVolume with various configurations""" - from physicsnemo.models.domino.encodings import EncodingMLP from physicsnemo.models.domino.mlps import AggregationModel - from physicsnemo.models.domino.model import get_activation from physicsnemo.models.domino.solutions import SolutionCalculatorVolume + from physicsnemo.models.layers import FourierMLP, get_activation torch.manual_seed(0) @@ -40,7 +39,7 @@ def test_solution_calculator_volume( # Create parameter model if needed parameter_model = ( - EncodingMLP( + FourierMLP( input_features=2, base_layer=32, fourier_features=True, @@ -67,7 +66,7 @@ def test_solution_calculator_volume( # Create basis functions nn_basis = nn.ModuleList( [ - EncodingMLP( + FourierMLP( input_features=3, base_layer=32, fourier_features=False, @@ -115,10 +114,9 @@ def test_solution_calculator_surface( device, num_variables, use_surface_normals, use_surface_area ): """Test SolutionCalculatorSurface with various configurations""" - from physicsnemo.models.domino.encodings import EncodingMLP from physicsnemo.models.domino.mlps import AggregationModel - from physicsnemo.models.domino.model import get_activation from physicsnemo.models.domino.solutions import SolutionCalculatorSurface + from physicsnemo.models.layers import FourierMLP, get_activation torch.manual_seed(0) @@ -131,8 +129,6 @@ def test_solution_calculator_surface( if use_surface_area: input_features += 1 - print(f"Input features: {input_features}") - # Create aggregation models aggregation_model = nn.ModuleList( [ @@ -149,7 +145,7 @@ def test_solution_calculator_surface( # Create basis functions nn_basis = nn.ModuleList( [ - EncodingMLP( + FourierMLP( input_features=input_features, base_layer=32, fourier_features=False, @@ -163,7 +159,6 @@ def test_solution_calculator_surface( model = SolutionCalculatorSurface( num_variables=num_variables, num_sample_points=3, - noise_intensity=50.0, encode_parameters=False, use_surface_normals=use_surface_normals, use_surface_area=use_surface_area, diff --git a/test/models/test_mlp_layers.py b/test/models/test_mlp_layers.py new file mode 100644 index 0000000000..19db339ba8 --- /dev/null +++ b/test/models/test_mlp_layers.py @@ -0,0 +1,75 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import torch + +from physicsnemo.models.layers import Mlp + +from .common import ( + validate_forward_accuracy, +) + + +@pytest.mark.parametrize("device", ["cuda:0", "cpu"]) +def test_mlp_forward_accuracy(device): + torch.manual_seed(7) + target_device = torch.device(device) + + model = Mlp(in_features=10, hidden_features=20, out_features=5).to(target_device) + input_tensor = torch.randn(1, 10).to( + target_device + ) # Assuming a batch size of 1 for simplicity + model(input_tensor) + + file_name = "mlp_output.pth" + + # Tack this on for the test, since model is not a physicsnemo Module: + model.device = target_device + + assert validate_forward_accuracy( + model, + (input_tensor,), + file_name=file_name, + atol=1e-3, + ) + + +def test_mlp_activation_and_dropout(): + model = Mlp(in_features=10, hidden_features=20, out_features=5, drop=0.5) + input_tensor = torch.randn(2, 10) # Assuming a batch size of 1 for simplicity + + output_tensor = model(input_tensor) + + assert output_tensor.shape == torch.Size([2, 5]) + + +def test_mlp_different_activation(): + model = Mlp( + in_features=10, hidden_features=20, out_features=7, act_layer=torch.nn.ReLU + ) + input_tensor = torch.randn(3, 10) # Assuming a batch size of 1 for simplicity + + output_tensor = model(input_tensor) + assert output_tensor.shape == torch.Size([3, 7]) + + +def test_multiple_hidden_layers(): + model = Mlp(in_features=10, hidden_features=[20, 30], out_features=5) + input_tensor = torch.randn(4, 10) # Assuming a batch size of 1 for simplicity + + output_tensor = model(input_tensor) + assert output_tensor.shape == torch.Size([4, 5]) From a87f666e06d71143ed4060473e8fe772f4e8b7df Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 29 Sep 2025 20:38:06 +0000 Subject: [PATCH 55/98] Align new datapipe with Rishi's --- physicsnemo/datapipes/cae/domino_datapipe2.py | 127 ++++++++---------- 1 file changed, 59 insertions(+), 68 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py index 0cf516f438..d953e1c9df 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ b/physicsnemo/datapipes/cae/domino_datapipe2.py @@ -17,7 +17,7 @@ """ This code provides the datapipe for reading the processed npy files, generating multi-res grids, calculating signed distance fields, -positional encodings, sampling random points in the volume and on surface, +sampling random points in the volume and on surface, normalizing fields and returning the output tensors as a dictionary. This datapipe also non-dimensionalizes the fields, so the order in which the variables should @@ -34,6 +34,7 @@ import torch import torch.cuda.nvtx as nvtx from omegaconf import DictConfig +from torch.distributed.tensor.placement_types import Replicate from torch.utils.data import Dataset from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( @@ -41,9 +42,9 @@ compute_mean_std_min_max, ) from physicsnemo.distributed import DistributedManager +from physicsnemo.distributed.shard_tensor import ShardTensor, scatter_tensor from physicsnemo.utils.domino.utils import ( calculate_center_of_mass, - calculate_normal_positional_encoding, create_grid, get_filenames, normalize, @@ -77,8 +78,6 @@ class DoMINODataConfig: surface_variables: (Surface specific) Names of surface variables. surface_points_sample: (Surface specific) Number of surface points to sample per batch. num_surface_neighbors: (Surface specific) Number of surface neighbors to consider for nearest neighbors approach. - resample_surfaces: (Surface specific) Whether to resample the surface before kdtree/knn. Not available if caching. - resampling_points: (Surface specific) Number of points to resample the surface to. surface_sampling_algorithm: (Surface specific) Algorithm to use for surface sampling ("area_weighted" or "random"). surface_factors: (Surface specific) Non-dimensionalization factors for surface variables. If set, and scaling_type is: @@ -111,10 +110,6 @@ class DoMINODataConfig: - volume.points_sample geom_points_sample: Number of STL points sampled per batch. Independent of volume.points_sample and surface.points_sample. - positional_encoding: Whether to use positional encoding. Affects the calculation of: - - pos_volume_closest - - pos_volume_center_of_mass - - pos_surface_centter_of_mass scaling_type: Scaling type for volume variables. If used, will rescale the volume_fields and surface fields outputs. Requires volume.factor and surface.factor to be set. @@ -136,8 +131,6 @@ class DoMINODataConfig: surface_variables: Optional[Sequence] = ("pMean", "wallShearStress") surface_points_sample: int = 1024 num_surface_neighbors: int = 11 - resample_surfaces: bool = False - resampling_points: int = 1_000_000 surface_sampling_algorithm: str = Literal["area_weighted", "random"] surface_factors: Optional[Sequence] = None bounding_box_dims_surf: Optional[Union[BoundingBox, Sequence]] = None @@ -153,7 +146,6 @@ class DoMINODataConfig: sample_in_bbox: bool = False sampling: bool = False geom_points_sample: int = 300000 - positional_encoding: bool = False scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None compute_scaling_factors: bool = False caching: bool = False @@ -180,8 +172,6 @@ def __post_init__(self): raise ValueError("Sampling should be False for caching") if self.compute_scaling_factors: raise ValueError("Compute scaling factors should be False for caching") - if self.resample_surfaces: - raise ValueError("Resample surface should be False for caching") if self.phase not in [ "train", @@ -202,7 +192,8 @@ def __post_init__(self): ##### TODO -# - check the bounding box protocol works +# - The SDF normalization here is based on using a normalized mesh and +# a normalized coordinate. The alternate method is to normalize to the min/max of the grid. class DoMINODataPipe(Dataset): @@ -373,6 +364,7 @@ def downsample_geometry( if self.config.sampling: geometry_points = self.config.geom_points_sample + geometry_coordinates_sampled, idx_geometry = shuffle_array( stl_vertices, geometry_points ) @@ -416,23 +408,6 @@ def process_surface( if surface_fields is not None: surface_fields = surface_fields[idx] - ######################################################################## - # Surface resampling ... - ######################################################################## - if self.config.resample_surfaces: - if self.config.resampling_points > surface_coordinates.shape[0]: - resampling_points = surface_coordinates.shape[0] - else: - resampling_points = self.config.resampling_points - - surface_coordinates, idx_s = shuffle_array( - surface_coordinates, resampling_points - ) - surface_normals = surface_normals[idx_s] - surface_sizes = surface_sizes[idx_s] - if surface_fields is not None: - surface_fields = surface_fields[idx_s] - ######################################################################## # Reject surface points outside of the Bounding Box # NOTE - this is using the VOLUME bounding box! @@ -450,19 +425,6 @@ def process_surface( if surface_fields is not None: surface_fields = surface_fields[ids_in_bbox] - # Compute the positional encoding before sampling - if self.config.positional_encoding: - dx, dy, dz = ( - (s_max[0] - s_min[0]) / nx, - (s_max[1] - s_min[1]) / ny, - (s_max[2] - s_min[2]) / nz, - ) - pos_normals_com_surface = calculate_normal_positional_encoding( - surface_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz] - ) - else: - pos_normals_com_surface = surface_coordinates - center_of_mass - ######################################################################## # Perform Down sampling of the surface fields. # Note that we snapshot the full surface coordinates for @@ -496,7 +458,7 @@ def process_surface( # Select out the sampled points for non-neighbor arrays: if surface_fields is not None: surface_fields = surface_fields[idx_surface] - pos_normals_com_surface = pos_normals_com_surface[idx_surface] + # Subsample the normals and sizes: surface_normals = surface_normals[idx_surface] surface_sizes = surface_sizes[idx_surface] @@ -526,6 +488,10 @@ def process_surface( surf_grid = normalize(surf_grid, s_max, s_min) surface_coordinates = normalize(surface_coordinates, s_max, s_min) surface_neighbors = normalize(surface_neighbors, s_max, s_min) + # Make sure to normalize the center of mass for the normals_com_surface calc + center_of_mass = normalize(center_of_mass, s_max, s_min) + + pos_normals_com_surface = surface_coordinates - center_of_mass ######################################################################## # Apply scaling to the targets, if desired: @@ -691,26 +657,13 @@ def calculate_volume_encoding( sdf_node_closest_point: torch.Tensor, center_of_mass: torch.Tensor, ): - nx, ny, nz = self.config.grid_resolution - - dx, dy, dz = ( - (c_max[0] - c_min[0]) / nx, - (c_max[1] - c_min[1]) / ny, - (c_max[2] - c_min[2]) / nz, - ) + if self.config.normalize_coordinates: + volume_coordinates = normalize(volume_coordinates, c_max, c_min) + sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) + center_of_mass = normalize(center_of_mass, c_max, c_min) - if self.config.positional_encoding: - pos_normals_closest_vol = calculate_normal_positional_encoding( - volume_coordinates, - sdf_node_closest_point, - cell_dimensions=[dx, dy, dz], - ) - pos_normals_com_vol = calculate_normal_positional_encoding( - volume_coordinates, center_of_mass, cell_dimensions=[dx, dy, dz] - ) - else: - pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point - pos_normals_com_vol = volume_coordinates - center_of_mass + pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point + pos_normals_com_vol = volume_coordinates - center_of_mass return pos_normals_closest_vol, pos_normals_com_vol @@ -728,9 +681,50 @@ def process_data(self, data_dict): # This function gets information about the surface scale, # and decides what the surface grid will be: + + stl_coordinates = data_dict["stl_coordinates"] + s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids( - data_dict["stl_coordinates"] + stl_coordinates ) + + if isinstance(stl_coordinates, ShardTensor): + mesh = stl_coordinates._spec.mesh + # Then, replicate the bounding box along the mesh if present. + s_max = scatter_tensor( + s_max, + 0, + mesh=mesh, + placements=[ + Replicate(), + ], + global_shape=s_max.shape, + dtype=s_max.dtype, + requires_grad=False, + ) + s_min = scatter_tensor( + s_min, + 0, + mesh=mesh, + placements=[ + Replicate(), + ], + global_shape=s_min.shape, + dtype=s_min.dtype, + requires_grad=False, + ) + surf_grid = scatter_tensor( + surf_grid, + 0, + mesh=mesh, + placements=[ + Replicate(), + ], + global_shape=surf_grid.shape, + dtype=surf_grid.dtype, + requires_grad=False, + ) + return_dict["surf_grid"] = surf_grid # We always need to calculate the SDF on the surface grid: @@ -1203,7 +1197,6 @@ def create_domino_dataset( volume_points_sample=cfg.model.volume_points_sample, surface_points_sample=cfg.model.surface_points_sample, geom_points_sample=cfg.model.geom_points_sample, - positional_encoding=cfg.model.positional_encoding, volume_factors=vol_factors, surface_factors=surf_factors, scaling_type=cfg.model.normalization, @@ -1211,8 +1204,6 @@ def create_domino_dataset( bounding_box_dims=cfg.data.bounding_box, bounding_box_dims_surf=cfg.data.bounding_box_surface, num_surface_neighbors=cfg.model.num_neighbors_surface, - resample_surfaces=cfg.model.resampling_surface_mesh.resample, - resampling_points=cfg.model.resampling_surface_mesh.points, surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, **overrides, ) From 1c191b5e4923429b070acc6f32396301bb520466 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 29 Sep 2025 20:38:40 +0000 Subject: [PATCH 56/98] Use ones_like to create a tensor --- physicsnemo/utils/domino/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index 95a7011976..394b86a420 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -442,7 +442,7 @@ def shuffle_array( # If there are no weights, use uniform weights: if weights is None: - weights = torch.ones(N_input_points, device=points.device) + weights = torch.ones_like(points, device=points.device) # Using torch multinomial for this. # Multinomial can't work with more than 2^24 input points. From 4310e524781e1e554c4884cbee6e924eda29664a Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 29 Sep 2025 20:43:15 +0000 Subject: [PATCH 57/98] Move old script to new location --- .../src/{ => deprecated}/inference_on_stl.py | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) rename examples/cfd/external_aerodynamics/domino/src/{ => deprecated}/inference_on_stl.py (98%) diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py similarity index 98% rename from examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py rename to examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py index e4ec80f2e0..b48e2b50f2 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/deprecated/inference_on_stl.py @@ -372,8 +372,18 @@ def process_surface_mesh(self, bounding_box=None, bounding_box_surface=None): surf_sdf_grid = torch.reshape(surf_sdf_grid, (nx, ny, nz)) if self.normalize_coordinates: - sdf_grid = 2.0 * (sdf_grid - torch.amax(grid)) / (torch.amax(grid) - torch.amin(grid)) - 1.0 - surf_sdf_grid = 2.0 * (surf_sdf_grid - torch.amax(s_grid)) / (torch.amax(s_grid) - torch.amin(s_grid)) - 1.0 + sdf_grid = ( + 2.0 + * (sdf_grid - torch.amax(grid)) + / (torch.amax(grid) - torch.amin(grid)) + - 1.0 + ) + surf_sdf_grid = ( + 2.0 + * (surf_sdf_grid - torch.amax(s_grid)) + / (torch.amax(s_grid) - torch.amin(s_grid)) + - 1.0 + ) grid = 2.0 * (grid - c_min) / (c_max - c_min) - 1.0 s_grid = 2.0 * (s_grid - surf_min) / (surf_max - surf_min) - 1.0 @@ -645,8 +655,15 @@ def sample_points_in_volume( sdf_nodes = torch.unsqueeze(sdf_nodes, -1) if self.normalize_coordinates: - volume_coordinates = 2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0 - sdf_nodes = 2.0 * (sdf_nodes - torch.amax(c_max)) / (torch.amax(c_max) - torch.amin(c_min)) - 1.0 + volume_coordinates = ( + 2.0 * (volume_coordinates - c_min) / (c_max - c_min) - 1.0 + ) + sdf_nodes = ( + 2.0 + * (sdf_nodes - torch.amax(c_max)) + / (torch.amax(c_max) - torch.amin(c_min)) + - 1.0 + ) sdf_node_closest_point = ( 2.0 * (sdf_node_closest_point - c_min) / (c_max - c_min) - 1.0 ) From cd64439fce5c22dcedcbaba913d0f2d9f3413e3b Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 30 Sep 2025 03:32:39 +0000 Subject: [PATCH 58/98] Update some tests to match the new datapipe structure --- .../datapipes/cae/drivaer_ml_dataset.py | 6 +- physicsnemo/utils/domino/utils.py | 2 +- test/datapipes/test_domino_datapipe.py | 203 +++++++++++------- .../shard_tensor/ops/test_radius_search.py | 3 +- 4 files changed, 132 insertions(+), 82 deletions(-) diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index 13009fc968..1e6ae62f81 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -669,7 +669,11 @@ def __init__( raise NotADirectoryError(f"Data directory {data_dir} is not a directory") self._keys_to_read = keys_to_read - self._keys_to_read_if_available = keys_to_read_if_available + + # Make sure the optional keys are on the right device: + self._keys_to_read_if_available = { + k: v.to(output_device) for k, v in keys_to_read_if_available.items() + } self.file_reader, self._filenames = self._infer_file_type_and_filenames( data_dir diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index 394b86a420..fc3af36334 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -442,7 +442,7 @@ def shuffle_array( # If there are no weights, use uniform weights: if weights is None: - weights = torch.ones_like(points, device=points.device) + weights = torch.ones(points.shape[0], device=points.device) # Using torch multinomial for this. # Multinomial can't work with more than 2^24 input points. diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py index 8df540d9ef..bb463792ff 100644 --- a/test/datapipes/test_domino_datapipe.py +++ b/test/datapipes/test_domino_datapipe.py @@ -18,7 +18,7 @@ import tempfile from dataclasses import dataclass from pathlib import Path -from typing import List, Literal +from typing import List, Literal, Optional, Sequence import numpy as np import pytest @@ -32,6 +32,7 @@ DoMINODataConfig, DoMINODataPipe, ) +from physicsnemo.datapipes.cae.drivaer_ml_dataset import DrivaerMLDataset Tensor = torch.Tensor @@ -242,7 +243,22 @@ def bounding_boxes(): } -def create_basic_dataset(data_dir, model_type, **kwargs): +def create_basic_dataset( + data_dir, + model_type, + gpu_preprocessing: bool = False, + gpu_output: bool = False, + normalize_coordinates: bool = False, + sample_in_bbox: bool = False, + sampling: bool = False, + volume_points_sample: int = 1234, + surface_points_sample: int = 1234, + surface_sampling_algorithm: str = "random", + caching: bool = False, + scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None, + volume_factors: Optional[Sequence] = None, + surface_factors: Optional[Sequence] = None, +): """Helper function to create a basic DoMINODataPipe with default settings.""" # assert model_type in ["volume", "surface", "combined"] @@ -251,43 +267,80 @@ def create_basic_dataset(data_dir, model_type, **kwargs): bounding_box = bounding_boxes() + keys_to_read = [ + "stl_coordinates", + "stl_faces", + "stl_centers", + "stl_areas", + ] + + if model_type == "volume" or model_type == "combined": + keys_to_read += [ + "volume_mesh_centers", + "volume_fields", + ] + + if model_type == "surface" or model_type == "combined": + keys_to_read += [ + "surface_mesh_centers", + "surface_areas", + "surface_normals", + "surface_fields", + ] + + keys_to_read_if_available = { + "global_params_values": torch.tensor([1.225, 10.0]), + "global_params_reference": torch.tensor([1.225, 10.0]), + } + + dataset = DrivaerMLDataset( + data_dir=input_path, + keys_to_read=keys_to_read, + keys_to_read_if_available=keys_to_read_if_available, + output_device=torch.device("cuda") + if gpu_preprocessing + else torch.device("cpu"), + preload_depth=0, + pin_memory=False, + device_mesh=None, + placements=None, + ) + default_kwargs = { "phase": "test", "grid_resolution": [64, 64, 64], - "volume_points_sample": 1234, - "surface_points_sample": 1234, - "geom_points_sample": 2345, + "volume_points_sample": volume_points_sample, + "surface_points_sample": surface_points_sample, + "geom_points_sample": 500, "num_surface_neighbors": 5, "bounding_box_dims": bounding_box["volume"], "bounding_box_dims_surf": bounding_box["surface"], - "normalize_coordinates": True, - "sampling": False, - "sample_in_bbox": False, - "positional_encoding": False, - "scaling_type": None, - "volume_factors": None, - "surface_factors": None, - "caching": False, - "compute_scaling_factors": False, - "gpu_preprocessing": True, - "gpu_output": True, + "normalize_coordinates": normalize_coordinates, + "sampling": sampling, + "sample_in_bbox": sample_in_bbox, + "scaling_type": scaling_type, + "volume_factors": volume_factors, + "surface_factors": surface_factors, + "caching": caching, + "gpu_preprocessing": gpu_preprocessing, + "gpu_output": gpu_output, + "surface_sampling_algorithm": surface_sampling_algorithm, } - default_kwargs.update(kwargs) - - print(f"kwargs: {default_kwargs}") - - return DoMINODataPipe( + pipe = DoMINODataPipe( input_path=input_path, model_type=model_type, **default_kwargs ) + pipe.set_dataset(dataset) + return pipe + def validate_sample_structure(sample, model_type, gpu_output): """Helper function to validate the structure of a dataset sample.""" assert isinstance(sample, dict) # Common keys that should always be present - expected_keys = ["geometry_coordinates", "length_scale", "surface_min_max"] + expected_keys = ["geometry_coordinates"] # Model-specific keys volume_keys = [ @@ -310,6 +363,9 @@ def validate_sample_structure(sample, model_type, gpu_output): expected_keys.extend(surface_keys) # Check that required keys are present and are torch tensors on correct device + for key in expected_keys: + print(f"Got key: {key} on device: {sample[key].device.type}") + for key in expected_keys: if key in sample: # Some keys may be None if compute_scaling_factors=True if sample[key] is not None: @@ -335,7 +391,13 @@ def test_domino_datapipe_core( data_dir = request.getfixturevalue(data_dir) dataset = create_basic_dataset( - data_dir, model_type, gpu_preprocessing=gpu_preprocessing, gpu_output=gpu_output + data_dir, + model_type, + gpu_preprocessing=gpu_preprocessing, + gpu_output=gpu_output, + normalize_coordinates=False, + sample_in_bbox=False, + sampling=False, ) assert len(dataset) > 0 @@ -356,8 +418,10 @@ def test_domino_datapipe_coordinate_normalization( zarr_dataset, model_type, gpu_preprocessing=True, + gpu_output=True, normalize_coordinates=normalize_coordinates, sample_in_bbox=sample_in_bbox, + sampling=False, ) sample = dataset[0] @@ -457,17 +521,26 @@ def test_domino_datapipe_coordinate_normalization( def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconfig): """Test point sampling functionality.""" sample_points = 4321 + + use_cuda = torch.cuda.is_available() + dataset = create_basic_dataset( zarr_dataset, model_type, - gpu_preprocessing=False, + gpu_preprocessing=use_cuda, + gpu_output=use_cuda, + normalize_coordinates=False, + sample_in_bbox=False, sampling=sampling, volume_points_sample=sample_points, surface_points_sample=sample_points, ) sample = dataset[0] - validate_sample_structure(sample, model_type, gpu_output=True) + validate_sample_structure(sample, model_type, gpu_output=use_cuda) + + for key in sample: + print(f"sample[{key}].shape: {sample[key].shape}") if model_type in ["volume", "combined"]: for key in ["volume_mesh_centers", "volume_fields"]: @@ -501,41 +574,13 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf assert sample[key].shape[2] == dataset.config.num_surface_neighbors - 1 -@import_or_fail(["warp", "cupy", "cuml"]) -@pytest.mark.parametrize("model_type", ["combined"]) -@pytest.mark.parametrize( - "positional_encoding", - [ - True, - ], -) -def test_domino_datapipe_positional_encoding( - zarr_dataset, model_type, positional_encoding, pytestconfig -): - """Test positional encoding functionality.""" - dataset = create_basic_dataset( - zarr_dataset, - model_type, - gpu_preprocessing=False, - positional_encoding=positional_encoding, - ) - - sample = dataset[0] - validate_sample_structure(sample, model_type, gpu_output=True) - - # Check for positional encoding keys - if positional_encoding: - pos_keys = ["pos_volume_closest", "pos_volume_center_of_mass"] - for key in pos_keys: - if key in sample: - assert sample[key] is not None - - @import_or_fail(["warp", "cupy", "cuml"]) @pytest.mark.parametrize("model_type", ["volume"]) @pytest.mark.parametrize("scaling_type", [None, "min_max_scaling", "mean_std_scaling"]) def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestconfig): """Test field scaling functionality.""" + use_cuda = torch.cuda.is_available() + if scaling_type == "min_max_scaling": volume_factors = [10.0, -10.0] # [max, min] elif scaling_type == "mean_std_scaling": @@ -546,13 +591,14 @@ def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestc dataset = create_basic_dataset( zarr_dataset, model_type, - gpu_preprocessing=False, + gpu_preprocessing=use_cuda, + gpu_output=use_cuda, scaling_type=scaling_type, volume_factors=volume_factors, ) sample = dataset[0] - validate_sample_structure(sample, model_type, gpu_output=True) + validate_sample_structure(sample, model_type, gpu_output=use_cuda) # Caching tests @@ -560,18 +606,18 @@ def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestc @pytest.mark.parametrize("model_type", ["volume"]) def test_domino_datapipe_caching_config(zarr_dataset, model_type, pytestconfig): """Test DoMINODataPipe with caching=True configuration.""" + use_cuda = torch.cuda.is_available() dataset = create_basic_dataset( zarr_dataset, model_type, - gpu_preprocessing=False, + gpu_preprocessing=use_cuda, + gpu_output=use_cuda, caching=True, sampling=False, # Required for caching - compute_scaling_factors=False, # Required for caching - resample_surfaces=False, # Required for caching ) sample = dataset[0] - validate_sample_structure(sample, model_type, gpu_output=True) + validate_sample_structure(sample, model_type, gpu_output=use_cuda) @import_or_fail(["warp", "cupy", "cuml"]) @@ -617,24 +663,16 @@ def test_cached_domino_dataset(zarr_dataset, tmp_path, pytestconfig): def test_domino_datapipe_invalid_caching_config(zarr_dataset, pytestconfig): """Test that invalid caching configurations raise appropriate errors.""" + use_cuda = torch.cuda.is_available() # Test: caching=True with sampling=True should fail with pytest.raises(ValueError, match="Sampling should be False for caching"): - create_basic_dataset(zarr_dataset, "volume", caching=True, sampling=True) - - # Test: caching=True with compute_scaling_factors=True should fail - with pytest.raises( - ValueError, match="Compute scaling factors should be False for caching" - ): - create_basic_dataset( - zarr_dataset, "volume", caching=True, compute_scaling_factors=True - ) - - # Test: caching=True with resample_surfaces=True should fail - with pytest.raises( - ValueError, match="Resample surface should be False for caching" - ): create_basic_dataset( - zarr_dataset, "volume", caching=True, resample_surfaces=True + zarr_dataset, + "volume", + caching=True, + sampling=True, + gpu_preprocessing=use_cuda, + gpu_output=use_cuda, ) @@ -661,12 +699,15 @@ def test_domino_datapipe_file_format_support(zarr_dataset, pytestconfig): """Test support for different file formats (.zarr, .npz, .npy).""" # This test assumes the data directory has files in these formats # If not available, we can mock the file reading - dataset = create_basic_dataset(zarr_dataset, "volume", gpu_preprocessing=False) + use_cuda = torch.cuda.is_available() + dataset = create_basic_dataset( + zarr_dataset, "volume", gpu_preprocessing=use_cuda, gpu_output=use_cuda + ) # Just verify we can load at least one sample assert len(dataset) > 0 sample = dataset[0] - validate_sample_structure(sample, "volume", gpu_output=True) + validate_sample_structure(sample, "volume", gpu_output=use_cuda) # Surface-specific tests (when GPU preprocessing issues are resolved) @@ -676,10 +717,14 @@ def test_domino_datapipe_surface_sampling( zarr_dataset, surface_sampling_algorithm, pytestconfig ): """Test surface sampling algorithms.""" + + gpu = torch.cuda.is_available() + dataset = create_basic_dataset( zarr_dataset, "surface", - gpu_preprocessing=False, # Avoid known GPU issues + gpu_preprocessing=gpu, + gpu_output=gpu, sampling=True, surface_sampling_algorithm=surface_sampling_algorithm, ) diff --git a/test/distributed/shard_tensor/ops/test_radius_search.py b/test/distributed/shard_tensor/ops/test_radius_search.py index 0ebaf05536..7c18cd0190 100644 --- a/test/distributed/shard_tensor/ops/test_radius_search.py +++ b/test/distributed/shard_tensor/ops/test_radius_search.py @@ -31,7 +31,6 @@ import torch from physicsnemo.distributed import DistributedManager -from physicsnemo.models.domino.model import BQWarp from physicsnemo.utils.version_check import check_module_requirements try: @@ -138,6 +137,8 @@ def run_radius_search_module(model, data_dict, reverse_mapping): def test_sharded_radius_search_layer_forward( distributed_mesh, shard_points, shard_grid, reverse_mapping ): + from physicsnemo.models.layers.ball_query import BQWarp + dm = DistributedManager() device = dm.device From 4b1a3fdd17c57914366512a2ba6197cfd733a994 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:35:43 +0000 Subject: [PATCH 59/98] Fix dataloading error, and remove old datapipe --- .../domino/requirements.txt | 1 + .../domino/src/benchmark_dataloader.py | 2 +- .../domino/src/compute_statistics.py | 2 +- .../domino/src/inference_on_stl.py | 2 +- .../external_aerodynamics/domino/src/loss.py | 6 +- .../external_aerodynamics/domino/src/train.py | 29 +- physicsnemo/datapipes/cae/__init__.py | 2 +- physicsnemo/datapipes/cae/domino_datapipe.py | 1649 +++++++---------- physicsnemo/datapipes/cae/domino_datapipe2.py | 1222 ------------ .../datapipes/cae/domino_sharded_datapipe.py | 176 -- .../datapipes/cae/drivaer_ml_dataset.py | 3 + 11 files changed, 732 insertions(+), 2362 deletions(-) delete mode 100644 physicsnemo/datapipes/cae/domino_datapipe2.py delete mode 100644 physicsnemo/datapipes/cae/domino_sharded_datapipe.py diff --git a/examples/cfd/external_aerodynamics/domino/requirements.txt b/examples/cfd/external_aerodynamics/domino/requirements.txt index 4c689c85e2..1d2cfe7dd9 100644 --- a/examples/cfd/external_aerodynamics/domino/requirements.txt +++ b/examples/cfd/external_aerodynamics/domino/requirements.txt @@ -3,3 +3,4 @@ warp-lang tensorboard cuml einops +tensorstore \ No newline at end of file diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index 62a41d3383..090fbf361c 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -64,7 +64,7 @@ from physicsnemo.launch.utils import load_checkpoint, save_checkpoint from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper -from physicsnemo.datapipes.cae.domino_datapipe2 import ( +from physicsnemo.datapipes.cae.domino_datapipe import ( DoMINODataPipe, compute_scaling_factors, create_domino_dataset, diff --git a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py index ac917d5353..d3516dff0f 100644 --- a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py +++ b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py @@ -37,7 +37,7 @@ from physicsnemo.distributed import DistributedManager from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper -from physicsnemo.datapipes.cae.domino_datapipe2 import compute_scaling_factors +from physicsnemo.datapipes.cae.domino_datapipe import compute_scaling_factors from utils import ScalingFactors diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index f2c3388ada..a55f703d66 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -64,7 +64,7 @@ from physicsnemo.launch.utils import load_checkpoint, save_checkpoint from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper -from physicsnemo.datapipes.cae.domino_datapipe2 import ( +from physicsnemo.datapipes.cae.domino_datapipe import ( DoMINODataPipe, create_domino_dataset, ) diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py index 0d90ab3674..e8a143b9c4 100644 --- a/examples/cfd/external_aerodynamics/domino/src/loss.py +++ b/examples/cfd/external_aerodynamics/domino/src/loss.py @@ -46,11 +46,7 @@ from physicsnemo.launch.utils import load_checkpoint, save_checkpoint from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper -from physicsnemo.datapipes.cae.domino_datapipe2 import ( - DoMINODataPipe, - compute_scaling_factors, - create_domino_dataset, -) + from physicsnemo.models.domino.model import DoMINO from physicsnemo.utils.domino.utils import * diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index ea71ee2a71..3763ce68ec 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -75,7 +75,7 @@ def srt2bool(val: str): from physicsnemo.launch.utils import load_checkpoint, save_checkpoint from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper -from physicsnemo.datapipes.cae.domino_datapipe2 import ( +from physicsnemo.datapipes.cae.domino_datapipe import ( DoMINODataPipe, create_domino_dataset, ) @@ -122,7 +122,7 @@ def validation_step( for i_batch, sample_batched in enumerate(dataloader): sampled_batched = dict_to_device(sample_batched, device) - with autocast("cuda", enabled=True): + with autocast("cuda", enabled=True, cache_enabled=False): if add_physics_loss: prediction_vol, prediction_surf = model( sampled_batched, return_volume_neighbors=True @@ -184,12 +184,16 @@ def train_epoch( with Profiler(): io_start_time = time.perf_counter() for i_batch, sampled_batched in enumerate(dataloader): + for key in sampled_batched.keys(): + print( + f"{key} has shape {sampled_batched[key].shape} and autograd fn {sampled_batched[key].autograd_fn if hasattr(sampled_batched[key], 'autograd_fn') else None}" + ) io_end_time = time.perf_counter() if add_physics_loss: autocast_enabled = False else: autocast_enabled = True - with autocast("cuda", enabled=autocast_enabled): + with autocast("cuda", enabled=autocast_enabled, cache_enabled=False): with nvtx.range("Model Forward Pass"): if add_physics_loss: prediction_vol, prediction_surf = model( @@ -478,14 +482,15 @@ def main(cfg: DictConfig) -> None: # Load checkpoint if available ###################################################### - init_epoch = load_checkpoint( - to_absolute_path(cfg.resume_dir), - models=model, - optimizer=optimizer, - scheduler=scheduler, - scaler=scaler, - device=dist.device, - ) + # init_epoch = load_checkpoint( + # to_absolute_path(cfg.resume_dir), + # models=model, + # optimizer=optimizer, + # scheduler=scheduler, + # scaler=scaler, + # device=dist.device, + # ) + init_epoch = 0 if init_epoch != 0: init_epoch += 1 # Start with the next epoch @@ -529,7 +534,7 @@ def main(cfg: DictConfig) -> None: else: surface_scaling_loss = cfg.model.surf_loss_scaling - model.train(True) + # model.train(True) epoch_start_time = time.perf_counter() avg_loss = train_epoch( dataloader=train_dataloader, diff --git a/physicsnemo/datapipes/cae/__init__.py b/physicsnemo/datapipes/cae/__init__.py index 9af8d88db2..c0d17ff723 100644 --- a/physicsnemo/datapipes/cae/__init__.py +++ b/physicsnemo/datapipes/cae/__init__.py @@ -14,5 +14,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .domino_datapipe2 import DoMINODataPipe +from .domino_datapipe import DoMINODataPipe from .mesh_datapipe import MeshDatapipe diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 666e4bfad5..5feae7e118 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -17,7 +17,7 @@ """ This code provides the datapipe for reading the processed npy files, generating multi-res grids, calculating signed distance fields, -positional encodings, sampling random points in the volume and on surface, +sampling random points in the volume and on surface, normalizing fields and returning the output tensors as a dictionary. This datapipe also non-dimensionalizes the fields, so the order in which the variables should @@ -26,101 +26,46 @@ variable names, domain resolution, sampling size etc. are configurable in config.yaml. """ -import os -import time -from concurrent.futures import ThreadPoolExecutor -from contextlib import nullcontext from dataclasses import dataclass from pathlib import Path -from typing import Literal, Optional, Protocol, Sequence, Union +from typing import Iterable, Literal, Optional, Protocol, Sequence, Union -import cuml -import cupy as cp import numpy as np import torch import torch.cuda.nvtx as nvtx -import zarr from omegaconf import DictConfig -from scipy.spatial import KDTree -from torch import Tensor -from torch.utils.data import Dataset, default_collate +from torch.distributed.tensor.placement_types import Replicate +from torch.utils.data import Dataset +from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( + DrivaerMLDataset, + compute_mean_std_min_max, +) from physicsnemo.distributed import DistributedManager +from physicsnemo.distributed.shard_tensor import ShardTensor, scatter_tensor from physicsnemo.utils.domino.utils import ( - ArrayType, - area_weighted_shuffle_array, calculate_center_of_mass, create_grid, get_filenames, - mean_std_sampling, normalize, pad, - # sample_array, shuffle_array, - solution_weighted_shuffle_array, standardize, + unnormalize, + unstandardize, ) +from physicsnemo.utils.neighbors import knn from physicsnemo.utils.profiling import profile from physicsnemo.utils.sdf import signed_distance_field -""" -These functions, below, are to handle the SDF calculation which only -accepts torch tensors. The entire pipeline is moving to torch, so -these aren't necessary after that. -""" - - -def _convert_array_to_torch(array: cp.ndarray | np.ndarray) -> torch.Tensor: - """ - TEMPORARY function to convert cupy and numpy arrays to torch tensors. - """ - if isinstance(array, cp.ndarray): - return torch.utils.dlpack.from_dlpack(array) - elif isinstance(array, np.ndarray): - return torch.from_numpy(array) - else: - raise ValueError(f"Unsupported array type: {type(array)}") - - -def _convert_torch_to_array(array: torch.Tensor, provider) -> cp.ndarray | np.ndarray: - """ - TEMPORARY function to convert torch tensors to cupy arrays. - """ - return provider.from_dlpack(array) - - -def domino_collate_fn(batch): - """ - This function is a custom collation function to move cupy data to torch tensors on the device. - - For things that aren't cupy arrays, fall back to torch.data.default_convert. Data, here, - is a dictionary of numpy arrays or cupy arrays. - - """ - - def convert(obj): - if isinstance(obj, cp.ndarray): - return torch.utils.dlpack.from_dlpack(obj.toDlpack()) - elif isinstance(obj, list): - return [convert(x) for x in obj] - elif isinstance(obj, tuple): - return tuple(convert(x) for x in obj) - elif isinstance(obj, dict): - return {k: convert(v) for k, v in obj.items()} - else: - return obj - - batch = [convert(sample) for sample in batch] - return default_collate(batch) - class BoundingBox(Protocol): """ Type definition for the required format of bounding box dimensions. """ - min: ArrayType - max: ArrayType + min: Sequence + max: Sequence @dataclass @@ -179,7 +124,7 @@ class DoMINODataConfig: You might choose gpu_preprocessing=True and gpu_output=False if caching. """ - data_path: Path + data_path: Path | None phase: Literal["train", "val", "test"] # Surface-specific variables: @@ -196,7 +141,7 @@ class DoMINODataConfig: volume_factors: Optional[Sequence] = None bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None - grid_resolution: Union[Sequence, ArrayType] = (256, 96, 64) + grid_resolution: Sequence = (256, 96, 64) normalize_coordinates: bool = False sample_in_bbox: bool = False sampling: bool = False @@ -209,16 +154,17 @@ class DoMINODataConfig: gpu_output: bool = True def __post_init__(self): - # Ensure data_path is a Path object: - if isinstance(self.data_path, str): - self.data_path = Path(self.data_path) - self.data_path = self.data_path.expanduser() + if self.data_path is not None: + # Ensure data_path is a Path object: + if isinstance(self.data_path, str): + self.data_path = Path(self.data_path) + self.data_path = self.data_path.expanduser() - if not self.data_path.exists(): - raise ValueError(f"Path {self.data_path} does not exist") + if not self.data_path.exists(): + raise ValueError(f"Path {self.data_path} does not exist") - if not self.data_path.is_dir(): - raise ValueError(f"Path {self.data_path} is not a directory") + if not self.data_path.is_dir(): + raise ValueError(f"Path {self.data_path} is not a directory") # Object if caching settings are impossible: if self.caching: @@ -246,328 +192,179 @@ def __post_init__(self): ##### TODO -# - put model type in config or leave in __init__ -# - check the bounding box protocol works +# - The SDF normalization here is based on using a normalized mesh and +# a normalized coordinate. The alternate method is to normalize to the min/max of the grid. class DoMINODataPipe(Dataset): """ Datapipe for DoMINO + Leverages a dataset for the actual reading of the data, and this + object is responsible for preprocessing the data. + """ def __init__( self, input_path, model_type: Literal["surface", "volume", "combined"], + pin_memory: bool = False, **data_config_overrides, ): # Perform config packaging and validation self.config = DoMINODataConfig(data_path=input_path, **data_config_overrides) + # Set up the distributed manager: if not DistributedManager.is_initialized(): DistributedManager.initialize() dist = DistributedManager() - if self.config.gpu_preprocessing or self.config.gpu_output: - # Make sure we move data to the right device: - target_device = dist.device.index - self.device_context = cp.cuda.Device(target_device) - self.device_context.use() - else: - self.device_context = nullcontext() - self.device = dist.device - - if self.config.deterministic: - np.random.seed(42) - cp.random.seed(42) - else: - np.random.seed(seed=int(time.time())) - cp.random.seed(seed=int(time.time())) + # Set devices for the preprocessing and IO target + self.preproc_device = ( + dist.device if self.config.gpu_preprocessing else torch.device("cpu") + ) + # The drivaer_ml_dataset will automatically target this device + # In an async transfer. + self.output_device = ( + dist.device if self.config.gpu_output else torch.device("cpu") + ) + # Model type determines whether we process surface, volume, or both. self.model_type = model_type - self.filenames = get_filenames(self.config.data_path, exclude_dirs=True) - total_files = len(self.filenames) - - self.indices = np.array(range(total_files)) - - # Why shuffle the indices here if only using random access below? - - np.random.shuffle(self.indices) - - # Determine the array provider based on what device - # will do preprocessing: - self.array_provider = cp if self.config.gpu_preprocessing else np # Update the arrays for bounding boxes: - if hasattr(self.config.bounding_box_dims, "max") and hasattr( self.config.bounding_box_dims, "min" ): self.config.bounding_box_dims = [ - self.array_provider.asarray(self.config.bounding_box_dims.max).astype( - "float32" + torch.tensor( + self.config.bounding_box_dims.max, + device=self.preproc_device, + dtype=torch.float32, ), - self.array_provider.asarray(self.config.bounding_box_dims.min).astype( - "float32" + torch.tensor( + self.config.bounding_box_dims.min, + device=self.preproc_device, + dtype=torch.float32, ), ] + self.default_volume_grid = create_grid( + self.config.bounding_box_dims[0], + self.config.bounding_box_dims[1], + self.config.grid_resolution, + ) + + # And, do the surface bounding box if supplied: if hasattr(self.config.bounding_box_dims_surf, "max") and hasattr( self.config.bounding_box_dims_surf, "min" ): self.config.bounding_box_dims_surf = [ - self.array_provider.asarray( - self.config.bounding_box_dims_surf.max - ).astype("float32"), - self.array_provider.asarray( - self.config.bounding_box_dims_surf.min - ).astype("float32"), + torch.tensor( + self.config.bounding_box_dims_surf.max, + device=self.preproc_device, + dtype=torch.float32, + ), + torch.tensor( + self.config.bounding_box_dims_surf.min, + device=self.preproc_device, + dtype=torch.float32, + ), ] - # Used if threaded data is enabled: - self.max_workers = 24 - # Create a single thread pool for the class - self.executor = ThreadPoolExecutor(max_workers=self.max_workers) - - # Define here the keys to read for each __getitem__ call - - # Always read these keys - self.keys_to_read = ["stl_coordinates", "stl_centers", "stl_faces", "stl_areas"] - with self.device_context: - xp = self.array_provider - self.keys_to_read_if_available = { - "global_params_values": xp.asarray([30.0, 1.226]), - "global_params_reference": xp.asarray([30.0, 1.226]), - } - self.volume_keys = ["volume_mesh_centers", "volume_fields"] - self.surface_keys = [ - "surface_mesh_centers", - "surface_normals", - "surface_areas", - "surface_fields", - ] - - if self.model_type == "volume" or self.model_type == "combined": - self.keys_to_read.extend(self.volume_keys) - if self.model_type == "surface" or self.model_type == "combined": - self.keys_to_read.extend(self.surface_keys) - - def __del__(self): - # Clean up the executor when the instance is being destroyed - if hasattr(self, "executor"): - self.executor.shutdown() + self.default_surface_grid = create_grid( + self.config.bounding_box_dims_surf[0], + self.config.bounding_box_dims_surf[1], + self.config.grid_resolution, + ) - @profile - def read_data_zarr(self, filepath): - # def create_pinned_streaming_space(shape, dtype): - # # TODO - this function could boost performance a little, but - # # the pinned memory pool seems too small. - # if self.array_provider == cp: - # nbytes = np.prod(shape) * dtype.itemsize - # ptr = cp.cuda.alloc_pinned_memory(nbytes) - # arr = np.frombuffer(ptr, dtype) - # return arr.reshape(shape) - # else: - # return np.empty(shape, dtype=dtype) - - def read_chunk_into_array(ram_array, fs_zarr_array, slice): - ram_array[slice] = fs_zarr_array[slice] - - @profile - def chunked_aligned_read(zarr_group, key, futures): - zarr_array = zarr_group[key] - - shape = zarr_array.shape - chunk_size = zarr_array.chunks[0] - - # Pre-allocate the full result array - result_shape = zarr_array.shape - result_dtype = zarr_array.dtype - - result = np.empty(result_shape, dtype=result_dtype) - - for start in range(0, shape[0], chunk_size): - end = min(start + chunk_size, shape[0]) - read_slice = np.s_[start:end] - futures.append( - self.executor.submit( - read_chunk_into_array, result, zarr_array, read_slice - ) - ) + # Ensure the volume and surface scaling factors are torch tensors + # and on the right device: + if self.config.volume_factors is not None: + self.config.volume_factors = torch.tensor( + self.config.volume_factors, + device=self.preproc_device, + dtype=torch.float32, + ) + if self.config.surface_factors is not None: + self.config.surface_factors = torch.tensor( + self.config.surface_factors, + device=self.preproc_device, + dtype=torch.float32, + ) - return result + self.dataset = None - with zarr.open_group(filepath, mode="r") as z: - data = {} - futures = [] - if "volume_fields" in z.keys(): - data["volume_fields"] = chunked_aligned_read( - z, "volume_fields", futures - ) - if "volume_mesh_centers" in z.keys(): - data["volume_mesh_centers"] = chunked_aligned_read( - z, "volume_mesh_centers", futures - ) - - for key in self.keys_to_read: - if z[key].shape == (): - data[key] = z[key] - elif key in ["volume_fields", "volume_mesh_centers"]: - continue - else: - data[key] = np.empty(z[key].shape, dtype=z[key].dtype) - slice = np.s_[:] - futures.append( - self.executor.submit( - read_chunk_into_array, data[key], z[key], slice - ) - ) - - # Now wait for all the futures to complete - for future in futures: - result = future.result() - if isinstance(result, tuple) and len(result) == 2: - key, value = result - data[key] = value - - # Move big data to GPU - for key in data.keys(): - data[key] = self.array_provider.asarray(data[key]) - - # Optional, maybe-present keys - for key in self.keys_to_read_if_available: - if key not in data.keys(): - data[key] = self.keys_to_read_if_available[key] - - return data + def compute_stl_scaling_and_surface_grids( + self, + stl_vertices: torch.Tensor, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Compute the min and max for the defining mesh. - @profile - def read_data_npy(self, filepath): - with open(filepath, "rb") as f: - data = np.load(f, allow_pickle=True).item() + If the user supplies a bounding box, we use that. Otherwise, + it's created dynamically from the min/max of the stl vertices. - for key in self.keys_to_read_if_available: - if key not in data.keys(): - data[key] = self.keys_to_read_if_available[key] + The returned min/max and grid are used for surface data. + """ - if "filename" in data.keys(): - data.pop("filename", None) + # Check the bounding box is not unit length - if not (isinstance(data["stl_coordinates"], np.ndarray)): - data["stl_coordinates"] = np.asarray(data["stl_coordinates"]) + if self.config.bounding_box_dims_surf is not None: + s_max = self.config.bounding_box_dims_surf[0] + s_min = self.config.bounding_box_dims_surf[1] + surf_grid = self.default_surface_grid + else: + # Create the grid dynamically + s_min = torch.amin(stl_vertices, 0) + s_max = torch.amax(stl_vertices, 0) + surf_grid = create_grid(s_max, s_min, self.config.grid_resolution) - # Maybe move to GPU: - with self.device_context: - for key in data.keys(): - if data[key] is not None: - data[key] = self.array_provider.asarray(data[key]) - return data + return s_min, s_max, surf_grid - @profile - def read_data_npz( - self, - filepath, - max_workers=None, + def compute_volume_scaling_and_grids( + self, s_min: torch.Tensor, s_max: torch.Tensor ): - if max_workers is not None: - self.max_workers = max_workers - - def load_one(key): - with np.load(filepath) as data: - return key, data[key] - - def check_optional_keys(): - with np.load(filepath) as data: - optional_results = {} - for key in self.keys_to_read_if_available: - if key in data.keys(): - optional_results[key] = data[key] - else: - optional_results[key] = self.keys_to_read_if_available[key] - with self.device_context: - optional_results = { - key: self.array_provider.asarray(value) - for key, value in optional_results.items() - } - return optional_results - - # Use the class-level executor instead of creating a new one - results = dict(self.executor.map(load_one, self.keys_to_read)) - - # Move the results to the GPU: - with self.device_context: - for key in results.keys(): - results[key] = self.array_provider.asarray(results[key]) - - # Check the optional ones: - optional_results = check_optional_keys() - results.update(optional_results) - - return results + """ + Compute the min and max and grid for volume data. - def __len__(self): - return len(self.indices) + If the user supplies a bounding box, we use that. Otherwise, + it's created dynamically from the surface min/max. - @profile - def preprocess_combined(self, data_dict): - # Pull these out and force to fp32: - with self.device_context: - global_params_values = data_dict["global_params_values"].astype( - self.array_provider.float32 - ) - global_params_reference = data_dict["global_params_reference"].astype( - self.array_provider.float32 - ) + This will be 2x longer in x and y and the same in z as the surface bounding box. + """ - # Pull these pieces out of the data_dict for manipulation - stl_vertices = data_dict["stl_coordinates"] - stl_centers = data_dict["stl_centers"] - mesh_indices_flattened = data_dict["stl_faces"] - stl_sizes = data_dict["stl_areas"] - idx = np.where(stl_sizes > 0.0) - stl_sizes = stl_sizes[idx] - stl_centers = stl_centers[idx] + # Determine the volume min / max locations + if self.config.bounding_box_dims is not None: + c_max = self.config.bounding_box_dims[0] + c_min = self.config.bounding_box_dims[1] + volume_grid = self.default_volume_grid - xp = self.array_provider + else: + # Create the grid based on the surface grid + c_max = s_max + (s_max - s_min) / 2 + c_min = s_min - (s_max - s_min) / 2 + c_min[2] = s_min[2] + volume_grid = create_grid(c_max, c_min, self.config.grid_resolution) - # Make sure the mesh_indices_flattened is an integer array: - if mesh_indices_flattened.dtype != xp.int32: - mesh_indices_flattened = mesh_indices_flattened.astype(xp.int32) + return c_min, c_max, volume_grid - if self.config.bounding_box_dims_surf is None: - s_max = xp.amax(stl_vertices, 0) - s_min = xp.amin(stl_vertices, 0) - else: - s_max = xp.asarray(self.config.bounding_box_dims_surf[0]) - s_min = xp.asarray(self.config.bounding_box_dims_surf[1]) - - center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes) - - # SDF calculation on the grid using WARP - if not self.config.compute_scaling_factors: - nx, ny, nz = self.config.grid_resolution - surf_grid = create_grid(s_max, s_min, [nx, ny, nz]) - surf_grid_reshaped = surf_grid.reshape(nx * ny * nz, 3) - - sdf_surf_grid, _ = signed_distance_field( - _convert_array_to_torch(stl_vertices), - _convert_array_to_torch(mesh_indices_flattened), - _convert_array_to_torch(surf_grid_reshaped), - use_sign_winding_number=True, - ) - sdf_surf_grid = sdf_surf_grid.reshape(nx, ny, nz) - sdf_surf_grid = _convert_torch_to_array(sdf_surf_grid, self.array_provider) - if self.config.normalize_coordinates: - sdf_surf_grid = normalize(sdf_surf_grid, xp.amax(surf_grid), xp.amin(surf_grid)) + @profile + def downsample_geometry( + self, + stl_vertices, + ) -> torch.Tensor: + """ + Downsample the geometry to the desired number of points. - else: - surf_grid = None - sdf_surf_grid = None + Args: + stl_vertices: The vertices of the surface. + """ if self.config.sampling: - # nvtx.range_push("Geometry Sampling") geometry_points = self.config.geom_points_sample + geometry_coordinates_sampled, idx_geometry = shuffle_array( stl_vertices, geometry_points ) @@ -576,208 +373,133 @@ def preprocess_combined(self, data_dict): geometry_coordinates_sampled, geometry_points, pad_value=-100.0 ) geom_centers = geometry_coordinates_sampled - # nvtx.range_pop() else: geom_centers = stl_vertices - # geom_centers = self.array_provider.float32(geom_centers) - - surf_grid_max_min = xp.stack([s_min, s_max]) + return geom_centers - return_dict = { - "surf_grid": surf_grid, - "sdf_surf_grid": sdf_surf_grid, - "surface_min_max": surf_grid_max_min, - "global_params_values": xp.expand_dims( - xp.array(global_params_values, dtype=xp.float32), -1 - ), - "global_params_reference": xp.expand_dims( - xp.array(global_params_reference, dtype=xp.float32), -1 - ), - "geometry_coordinates": geom_centers, - } - - return ( - return_dict, - s_min, - s_max, - mesh_indices_flattened, - stl_vertices, - center_of_mass, - ) - - @profile - def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max): + def process_surface( + self, + s_min: torch.Tensor, + s_max: torch.Tensor, + c_min: torch.Tensor, + c_max: torch.Tensor, + *, # Forcing the rest by keyword only since it's a long list ... + center_of_mass: torch.Tensor, + surf_grid: torch.Tensor, + surface_coordinates: torch.Tensor, + surface_normals: torch.Tensor, + surface_sizes: torch.Tensor, + stl_vertices: torch.Tensor, + stl_indices: torch.Tensor, + surface_fields: torch.Tensor | None, + ) -> dict[str, torch.Tensor]: nx, ny, nz = self.config.grid_resolution return_dict = {} - surface_coordinates = data_dict["surface_mesh_centers"] - surface_normals = data_dict["surface_normals"] - surface_sizes = data_dict["surface_areas"] - surface_fields = data_dict["surface_fields"] - idx = np.where(surface_sizes > 0) + ######################################################################## + # Remove any sizes <= 0: + ######################################################################## + idx = surface_sizes > 0 surface_sizes = surface_sizes[idx] - surface_fields = surface_fields[idx] surface_normals = surface_normals[idx] surface_coordinates = surface_coordinates[idx] + if surface_fields is not None: + surface_fields = surface_fields[idx] + + ######################################################################## + # Reject surface points outside of the Bounding Box + # NOTE - this is using the VOLUME bounding box! + ######################################################################## + if self.config.sample_in_bbox: + ids_min = surface_coordinates[:] > c_min + ids_max = surface_coordinates[:] < c_max + + ids_in_bbox = ids_min & ids_max + ids_in_bbox = ids_in_bbox.all(dim=-1) + + surface_coordinates = surface_coordinates[ids_in_bbox] + surface_normals = surface_normals[ids_in_bbox] + surface_sizes = surface_sizes[ids_in_bbox] + if surface_fields is not None: + surface_fields = surface_fields[ids_in_bbox] - xp = self.array_provider + ######################################################################## + # Perform Down sampling of the surface fields. + # Note that we snapshot the full surface coordinates for + # use in the kNN in the next step. + ######################################################################## - if not self.config.compute_scaling_factors: - c_max = self.config.bounding_box_dims[0] - c_min = self.config.bounding_box_dims[1] + full_surface_coordinates = surface_coordinates + full_surface_normals = surface_normals + full_surface_sizes = surface_sizes + + if self.config.sampling: + # Perform the down sampling: + if self.config.surface_sampling_algorithm == "area_weighted": + weights = surface_sizes + else: + weights = None + + surface_coordinates_sampled, idx_surface = shuffle_array( + surface_coordinates, + self.config.surface_points_sample, + weights=weights, + ) - if self.config.sample_in_bbox: - # TODO - clean this up with vectorization? - # TODO - the xp.where is likely a useless op. Need to check. - ids_in_bbox = xp.where( - (surface_coordinates[:, 0] > c_min[0]) - & (surface_coordinates[:, 0] < c_max[0]) - & (surface_coordinates[:, 1] > c_min[1]) - & (surface_coordinates[:, 1] < c_max[1]) - & (surface_coordinates[:, 2] > c_min[2]) - & (surface_coordinates[:, 2] < c_max[2]) + if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample: + surface_coordinates_sampled = pad( + surface_coordinates_sampled, + self.config.surface_points_sample, + pad_value=-10.0, ) - surface_coordinates = surface_coordinates[ids_in_bbox] - surface_normals = surface_normals[ids_in_bbox] - surface_sizes = surface_sizes[ids_in_bbox] - surface_fields = surface_fields[ids_in_bbox] - - # Have to normalize neighbors after the kNN and sampling - if self.config.normalize_coordinates: - core_dict["surf_grid"] = normalize(core_dict["surf_grid"], s_max, s_min) - surface_coordinates = normalize(surface_coordinates, s_max, s_min) - center_of_mass_normalized = normalize(xp.asarray(center_of_mass), s_max, s_min) - else: - center_of_mass_normalized = xp.asarray(center_of_mass) - - pos_normals_com_surface = surface_coordinates - center_of_mass_normalized - - # Fit the kNN (or KDTree, if CPU) on ALL points: - if self.config.num_surface_neighbors > 1: - if self.array_provider == cp: - knn = cuml.neighbors.NearestNeighbors( - n_neighbors=self.config.num_surface_neighbors, - algorithm="rbc", - ) - knn.fit(surface_coordinates) - else: - # Under the hood this is instantiating a KDTree. - # aka here knn is a type, not a class, technically. - interp_func = KDTree(surface_coordinates) - - if self.config.sampling: - # Perform the down sampling: - if self.config.surface_sampling_algorithm == "area_weighted": - ( - surface_coordinates_sampled, - idx_surface, - ) = area_weighted_shuffle_array( - surface_coordinates, - self.config.surface_points_sample, - surface_sizes, - ) - elif self.config.surface_sampling_algorithm == "solution_weighted": - ( - surface_coordinates_sampled, - idx_surface, - ) = solution_weighted_shuffle_array( - surface_coordinates, - self.config.surface_points_sample, - surface_fields[:, 0], - scaling_factor=0.5, - ) - else: - surface_coordinates_sampled, idx_surface = shuffle_array( - surface_coordinates, self.config.surface_points_sample - ) - - if ( - surface_coordinates_sampled.shape[0] - < self.config.surface_points_sample - ): - surface_coordinates_sampled = pad( - surface_coordinates_sampled, - self.config.surface_points_sample, - pad_value=-10.0, - ) - - # Select out the sampled points for non-neighbor arrays: + # Select out the sampled points for non-neighbor arrays: + if surface_fields is not None: surface_fields = surface_fields[idx_surface] - pos_normals_com_surface = pos_normals_com_surface[idx_surface] - - # Now, perform the kNN on the sampled points: - if self.config.num_surface_neighbors > 1: - if self.array_provider == cp: - ii = knn.kneighbors( - surface_coordinates_sampled, return_distance=False - ) - else: - _, ii = interp_func.query( - surface_coordinates_sampled, - k=self.config.num_surface_neighbors, - ) - - # Pull out the neighbor elements. Note that ii is the index into the original - # points - but only exists for the sampled points - # In other words, a point from `surface_coordinates_sampled` has neighbors - # from the full `surface_coordinates` array. - surface_neighbors = surface_coordinates[ii][:, 1:] - surface_neighbors_normals = surface_normals[ii][:, 1:] - surface_neighbors_sizes = surface_sizes[ii][:, 1:] - else: - surface_neighbors = surface_coordinates - surface_neighbors_normals = surface_normals - surface_neighbors_sizes = surface_sizes - - # We could index into these above the knn step too; they aren't dependent on that. - surface_normals = surface_normals[idx_surface] - surface_sizes = surface_sizes[idx_surface] - - # Update the coordinates to the sampled points: - surface_coordinates = surface_coordinates_sampled - else: - # We are *not* sampling, kNN on ALL points: - if self.array_provider == cp: - ii = knn.kneighbors(surface_coordinates, return_distance=False) - else: - _, ii = interp_func.query( - surface_coordinates, - k=self.config.num_surface_neighbors, - ) - - # Construct the neighbors arrays: - surface_neighbors = surface_coordinates[ii][:, 1:] - surface_neighbors_normals = surface_normals[ii][:, 1:] - surface_neighbors_sizes = surface_sizes[ii][:, 1:] - - if self.config.scaling_type is not None: - if self.config.surface_factors is not None: - if self.config.scaling_type == "mean_std_scaling": - surf_mean = self.config.surface_factors[0] - surf_std = self.config.surface_factors[1] - # TODO - Are these array calls needed? - surface_fields = standardize( - surface_fields, xp.asarray(surf_mean), xp.asarray(surf_std) - ) - elif self.config.scaling_type == "min_max_scaling": - surf_min = self.config.surface_factors[1] - surf_max = self.config.surface_factors[0] - # TODO - Are these array calls needed? - surface_fields = normalize( - surface_fields, xp.asarray(surf_max), xp.asarray(surf_min) - ) + # Subsample the normals and sizes: + surface_normals = surface_normals[idx_surface] + surface_sizes = surface_sizes[idx_surface] + # Update the coordinates to the sampled points: + surface_coordinates = surface_coordinates_sampled + + ######################################################################## + # Perform a kNN on the surface to find the neighbor information + ######################################################################## + if self.config.num_surface_neighbors > 1: + # Perform the kNN: + neighbor_indices, neighbor_distances = knn( + points=full_surface_coordinates, + queries=surface_coordinates, + k=self.config.num_surface_neighbors, + ) - else: - surface_sizes = None - surface_normals = None - surface_neighbors = None - surface_neighbors_normals = None - surface_neighbors_sizes = None - pos_normals_com_surface = None + # Pull out the neighbor elements. + # Note that `neighbor_indices` is the index into the original, + # full sized tensors (full_surface_coordinates, etc). + surface_neighbors = full_surface_coordinates[neighbor_indices][:, 1:] + surface_neighbors_normals = full_surface_normals[neighbor_indices][:, 1:] + surface_neighbors_sizes = full_surface_sizes[neighbor_indices][:, 1:] + + # Better to normalize everything after the kNN and sampling + if self.config.normalize_coordinates: + surf_grid = normalize(surf_grid, s_max, s_min) + surface_coordinates = normalize(surface_coordinates, s_max, s_min) + surface_neighbors = normalize(surface_neighbors, s_max, s_min) + # Make sure to normalize the center of mass for the normals_com_surface calc + center_of_mass = normalize(center_of_mass, s_max, s_min) + + pos_normals_com_surface = surface_coordinates - center_of_mass + + ######################################################################## + # Apply scaling to the targets, if desired: + ######################################################################## + if self.config.scaling_type is not None and surface_fields is not None: + surface_fields = self.scale_model_targets( + surface_fields, self.config.surface_factors + ) return_dict.update( { @@ -788,465 +510,450 @@ def preprocess_surface(self, data_dict, core_dict, center_of_mass, s_min, s_max) "surface_neighbors_normals": surface_neighbors_normals, "surface_areas": surface_sizes, "surface_neighbors_areas": surface_neighbors_sizes, - "surface_fields": surface_fields, } ) + if surface_fields is not None: + return_dict["surface_fields"] = surface_fields return return_dict - @profile - def preprocess_volume( + def process_volume( self, - data_dict, - core_dict, - s_min, - s_max, - mesh_indices_flattened, - stl_vertices, - center_of_mass, - ): - return_dict = {} - - nx, ny, nz = self.config.grid_resolution + c_min: torch.Tensor, + c_max: torch.Tensor, + volume_coordinates: torch.Tensor, + volume_grid: torch.Tensor, + center_of_mass: torch.Tensor, + stl_vertices: torch.Tensor, + stl_indices: torch.Tensor, + volume_fields: torch.Tensor | None, + ) -> dict[str, torch.Tensor]: + """ + Preprocess the volume data. - xp = self.array_provider + First, if configured, we reject points not in the volume bounding box. - # # Temporary: convert to cupy here: - volume_coordinates = data_dict["volume_mesh_centers"] - volume_fields = data_dict["volume_fields"] + Next, if sampling is enabled, we sample the volume points and apply that + sampling to the ground truth too, if it's present. - if not self.config.compute_scaling_factors: - if self.config.bounding_box_dims is None: - c_max = s_max + (s_max - s_min) / 2 - c_min = s_min - (s_max - s_min) / 2 - c_min[2] = s_min[2] - else: - c_max = xp.asarray(self.config.bounding_box_dims[0]) - c_min = xp.asarray(self.config.bounding_box_dims[1]) - - if self.config.sample_in_bbox: - # TODO - xp.where can probably be removed. - ids_in_bbox = self.array_provider.where( - (volume_coordinates[:, 0] > c_min[0]) - & (volume_coordinates[:, 0] < c_max[0]) - & (volume_coordinates[:, 1] > c_min[1]) - & (volume_coordinates[:, 1] < c_max[1]) - & (volume_coordinates[:, 2] > c_min[2]) - & (volume_coordinates[:, 2] < c_max[2]) - ) - volume_coordinates = volume_coordinates[ids_in_bbox] + """ + ######################################################################## + # Reject points outside the volumetric BBox + ######################################################################## + if self.config.sample_in_bbox: + # Remove points in the volume that are outside + # of the bbox area. + min_check = volume_coordinates[:] > c_min + max_check = volume_coordinates[:] < c_max + + ids_in_bbox = min_check & max_check + ids_in_bbox = ids_in_bbox.all(dim=1) + + volume_coordinates = volume_coordinates[ids_in_bbox] + if volume_fields is not None: volume_fields = volume_fields[ids_in_bbox] - # Generate a grid of specified resolution to map the bounding box - # The grid is used for capturing structured geometry features and SDF representation of geometry - grid = create_grid(c_max, c_min, [nx, ny, nz]) - grid_reshaped = grid.reshape(nx * ny * nz, 3) - - # SDF calculation on the grid using WARP - sdf_grid, _ = signed_distance_field( - _convert_array_to_torch(stl_vertices), - _convert_array_to_torch(mesh_indices_flattened), - _convert_array_to_torch(grid_reshaped), - use_sign_winding_number=True, + ######################################################################## + # Apply sampling to the volume coordinates and fields + ######################################################################## + + if self.config.sampling: + # Generate a series of idx to sample the volume + # without replacement + volume_coordinates_sampled, idx_volume = shuffle_array( + volume_coordinates, self.config.volume_points_sample ) - sdf_grid = sdf_grid.reshape((nx, ny, nz)) - sdf_grid = _convert_torch_to_array(sdf_grid, self.array_provider) + volume_coordinates_sampled = volume_coordinates[idx_volume] + # In case too few points are in the sampled data (because the + # inputs were too few), pad the outputs: + if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample: + padding_size = ( + self.config.volume_points_sample + - volume_coordinates_sampled.shape[0] + ) - if self.config.sampling: - volume_coordinates_sampled, idx_volume = shuffle_array( - volume_coordinates, self.config.volume_points_sample + volume_coordinates_sampled = torch.nn.functional.pad( + volume_coordinates_sampled, + (0, 0, 0, 0, 0, padding_size), + mode="constant", + value=-10.0, ) - if ( - volume_coordinates_sampled.shape[0] - < self.config.volume_points_sample - ): - volume_coordinates_sampled = pad( - volume_coordinates_sampled, - self.config.volume_points_sample, - pad_value=-10.0, - ) + + # Apply the same sampling to the targets, too: + if volume_fields is not None: volume_fields = volume_fields[idx_volume] - volume_coordinates = volume_coordinates_sampled - sdf_nodes, sdf_node_closest_point = signed_distance_field( - _convert_array_to_torch(stl_vertices), - _convert_array_to_torch(mesh_indices_flattened), - _convert_array_to_torch(volume_coordinates), - use_sign_winding_number=True, - ) - sdf_nodes = _convert_torch_to_array(sdf_nodes, self.array_provider) - sdf_node_closest_point = _convert_torch_to_array( - sdf_node_closest_point, self.array_provider + volume_coordinates = volume_coordinates_sampled + + ######################################################################## + # Apply normalization to the coordinates, if desired: + ######################################################################## + if self.config.normalize_coordinates: + volume_coordinates = normalize(volume_coordinates, c_max, c_min) + grid = normalize(volume_grid, c_max, c_min) + # This is used later in the SDF, apply the same scaling to the mesh + # coordinates: + normed_vertices = normalize(stl_vertices, c_max, c_min) + else: + grid = volume_grid + normed_vertices = stl_vertices + + ######################################################################## + # Apply scaling to the targets, if desired: + ######################################################################## + if self.config.scaling_type is not None and volume_fields is not None: + volume_fields = self.scale_model_targets( + volume_fields, self.config.volume_factors ) - # TODO - is this needed? - sdf_nodes = xp.asarray(sdf_nodes) - sdf_node_closest_point = xp.asarray(sdf_node_closest_point) + ######################################################################## + # Compute Signed Distance Function for volumetric quantities + # Note - the SDF happens here, after volume data processing finishes, + # because we need to use the (maybe) normalized volume coordinates and grid + ######################################################################## + + # SDF calculation on the volume grid using WARP + sdf_grid, _ = signed_distance_field( + normed_vertices, + stl_indices, + grid, + use_sign_winding_number=True, + ) - sdf_nodes = sdf_nodes.reshape((-1, 1)) + # Get the SDF of all the selected volume coordinates, + # And keep the closest point to each one. + sdf_nodes, sdf_node_closest_point = signed_distance_field( + normed_vertices, + stl_indices, + volume_coordinates, + use_sign_winding_number=True, + ) + sdf_nodes = sdf_nodes.reshape((-1, 1)) - if self.config.normalize_coordinates: - volume_coordinates = normalize(volume_coordinates, c_max, c_min) - grid = normalize(grid, c_max, c_min) - sdf_grid = normalize(sdf_grid, xp.amax(grid), xp.amin(grid)) - sdf_nodes = normalize(sdf_nodes, xp.amax(grid), xp.amin(grid)) - sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) - center_of_mass_normalized = normalize(xp.asarray(center_of_mass), c_max, c_min) - else: - center_of_mass_normalized = xp.asarray(center_of_mass) - - pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point - pos_normals_com_vol = volume_coordinates - center_of_mass_normalized - - if self.config.scaling_type is not None: - if self.config.volume_factors is not None: - if self.config.scaling_type == "mean_std_scaling": - vol_mean = self.config.volume_factors[0] - vol_std = self.config.volume_factors[1] - volume_fields = standardize(volume_fields, vol_mean, vol_std) - elif self.config.scaling_type == "min_max_scaling": - vol_min = xp.asarray(self.config.volume_factors[1]) - vol_max = xp.asarray(self.config.volume_factors[0]) - volume_fields = normalize(volume_fields, vol_max, vol_min) - - vol_grid_max_min = xp.stack([c_min, c_max]) + # Use the closest point from the mesh to compute the volume encodings: + pos_normals_closest_vol, pos_normals_com_vol = self.calculate_volume_encoding( + c_min, c_max, volume_coordinates, sdf_node_closest_point, center_of_mass + ) + + return_dict = { + "volume_mesh_centers": volume_coordinates, + "sdf_nodes": sdf_nodes, + "grid": grid, + "sdf_grid": sdf_grid, + "pos_volume_closest": pos_normals_closest_vol, + "pos_volume_center_of_mass": pos_normals_com_vol, + } + + if volume_fields is not None: + return_dict["volume_fields"] = volume_fields + + return return_dict + + def calculate_volume_encoding( + self, + c_min: torch.Tensor, + c_max: torch.Tensor, + volume_coordinates: torch.Tensor, + sdf_node_closest_point: torch.Tensor, + center_of_mass: torch.Tensor, + ): + if self.config.normalize_coordinates: + volume_coordinates = normalize(volume_coordinates, c_max, c_min) + sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) + center_of_mass = normalize(center_of_mass, c_max, c_min) + + pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point + pos_normals_com_vol = volume_coordinates - center_of_mass + + return pos_normals_closest_vol, pos_normals_com_vol + + @torch.no_grad() + def process_data(self, data_dict): + # Start building the preprocessed return dict: + return_dict = { + "global_params_values": data_dict["global_params_values"], + "global_params_reference": data_dict["global_params_reference"], + } + ######################################################################## + # Process the core STL information + ######################################################################## + + # This function gets information about the surface scale, + # and decides what the surface grid will be: + + stl_coordinates = data_dict["stl_coordinates"] + + s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids( + stl_coordinates + ) + + if isinstance(stl_coordinates, ShardTensor): + mesh = stl_coordinates._spec.mesh + # Then, replicate the bounding box along the mesh if present. + s_max = scatter_tensor( + s_max, + 0, + mesh=mesh, + placements=[ + Replicate(), + ], + global_shape=s_max.shape, + dtype=s_max.dtype, + requires_grad=False, + ) + s_min = scatter_tensor( + s_min, + 0, + mesh=mesh, + placements=[ + Replicate(), + ], + global_shape=s_min.shape, + dtype=s_min.dtype, + requires_grad=False, + ) + surf_grid = scatter_tensor( + surf_grid, + 0, + mesh=mesh, + placements=[ + Replicate(), + ], + global_shape=surf_grid.shape, + dtype=surf_grid.dtype, + requires_grad=False, + ) + + return_dict["surf_grid"] = surf_grid + + # We always need to calculate the SDF on the surface grid: + # This is for the SDF Later: + if self.config.normalize_coordinates: + normed_vertices = normalize(data_dict["stl_coordinates"], s_max, s_min) else: - pos_normals_closest_vol = None - pos_normals_com_vol = None - sdf_nodes = None - sdf_grid = None - grid = None - vol_grid_max_min = None + normed_vertices = data_dict["stl_coordinates"] - return_dict.update( - { - "pos_volume_closest": pos_normals_closest_vol, - "pos_volume_center_of_mass": pos_normals_com_vol, - "grid": grid, - "sdf_grid": sdf_grid, - "sdf_nodes": sdf_nodes, - "volume_fields": volume_fields, - "volume_mesh_centers": volume_coordinates, - "volume_min_max": vol_grid_max_min, - } + # For SDF calculations, make sure the mesh_indices_flattened is an integer array: + mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) + + # Compute signed distance function for the surface grid: + sdf_surf_grid, _ = signed_distance_field( + mesh_vertices=normed_vertices, + mesh_indices=mesh_indices_flattened, + input_points=surf_grid, + use_sign_winding_number=True, ) + return_dict["sdf_surf_grid"] = sdf_surf_grid - return return_dict + # Store this only if normalization is active: + if self.config.normalize_coordinates: + return_dict["surface_min_max"] = torch.stack([s_min, s_max]) - @profile - def preprocess_data(self, data_dict): - ( - return_dict, - s_min, - s_max, - mesh_indices_flattened, - stl_vertices, - center_of_mass, - ) = self.preprocess_combined(data_dict) + # This is a center of mass computation for the stl surface, + # using the size of each mesh point as weight. + center_of_mass = calculate_center_of_mass( + data_dict["stl_centers"], data_dict["stl_areas"] + ) + + # This will apply downsampling if needed to the geometry coordinates + geom_centers = self.downsample_geometry( + stl_vertices=data_dict["stl_coordinates"], + ) + return_dict["geometry_coordinates"] = geom_centers + ######################################################################## + # Determine the volumetric bounds of the data: + ######################################################################## + # Compute the min/max for volume an the unnomralized grid: + c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids(s_min, s_max) + + # For volume data, we store this only if normalizing coordinates: if self.model_type == "volume" or self.model_type == "combined": - volume_dict = self.preprocess_volume( - data_dict, - return_dict, - s_min, - s_max, - mesh_indices_flattened, - stl_vertices, - center_of_mass, + if self.config.normalize_coordinates: + return_dict["volume_min_max"] = torch.stack([c_min, c_max]) + + if self.model_type == "volume" or self.model_type == "combined": + volume_fields_raw = ( + data_dict["volume_fields"] if "volume_fields" in data_dict else None + ) + volume_dict = self.process_volume( + c_min, + c_max, + volume_coordinates=data_dict["volume_mesh_centers"], + volume_grid=volume_grid, + center_of_mass=center_of_mass, + stl_vertices=data_dict["stl_coordinates"], + stl_indices=mesh_indices_flattened, + volume_fields=volume_fields_raw, ) return_dict.update(volume_dict) if self.model_type == "surface" or self.model_type == "combined": - surface_dict = self.preprocess_surface( - data_dict, return_dict, center_of_mass, s_min, s_max + surface_fields_raw = ( + data_dict["surface_fields"] if "surface_fields" in data_dict else None ) + surface_dict = self.process_surface( + s_min, + s_max, + c_min, + c_max, + center_of_mass=center_of_mass, + surf_grid=surf_grid, + surface_coordinates=data_dict["surface_mesh_centers"], + surface_normals=data_dict["surface_normals"], + surface_sizes=data_dict["surface_areas"], + stl_vertices=data_dict["stl_coordinates"], + stl_indices=mesh_indices_flattened, + surface_fields=surface_fields_raw, + ) + return_dict.update(surface_dict) return return_dict - @profile + def scale_model_targets( + self, fields: torch.Tensor, factors: torch.Tensor + ) -> torch.Tensor: + """ + Scale the model targets based on the configured scaling factors. + """ + if self.config.scaling_type == "mean_std_scaling": + field_mean = self.config.volume_factors[0] + field_std = self.config.volume_factors[1] + return standardize(fields, field_mean, field_std) + elif self.config.scaling_type == "min_max_scaling": + field_min = self.config.volume_factors[1] + field_max = self.config.volume_factors[0] + return normalize(fields, field_max, field_min) + + def unscale_model_outputs( + self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None + ): + """ + Unscale the model outputs based on the configured scaling factors. + + The unscaling is included here to make it a consistent interface regardless + of the scaling factors and type used. + + """ + + if volume_fields is not None: + if self.config.scaling_type == "mean_std_scaling": + vol_mean = self.config.volume_factors[0] + vol_std = self.config.volume_factors[1] + volume_fields = unstandardize(volume_fields, vol_mean, vol_std) + elif self.config.scaling_type == "min_max_scaling": + vol_min = self.config.volume_factors[1] + vol_max = self.config.volume_factors[0] + volume_fields = unnormalize(volume_fields, vol_max, vol_min) + if surface_fields is not None: + if self.config.scaling_type == "mean_std_scaling": + surf_mean = self.config.surface_factors[0] + surf_std = self.config.surface_factors[1] + surface_fields = unstandardize(surface_fields, surf_mean, surf_std) + elif self.config.scaling_type == "min_max_scaling": + surf_min = self.config.surface_factors[1] + surf_max = self.config.surface_factors[0] + surface_fields = unnormalize(surface_fields, surf_max, surf_min) + + return volume_fields, surface_fields + + def set_dataset(self, dataset: Iterable) -> None: + """ + Pass a dataset to the datapipe to enable iterating over both in one pass. + """ + self.dataset = dataset + + def __len__(self): + if self.dataset is not None: + return len(self.dataset) + else: + return 0 + def __getitem__(self, idx): """ Function for fetching and processing a single file's data. Domino, in general, expects one example per file and the files are relatively large due to the mesh size. + + Requires the user to have set a dataset via `set_dataset`. """ + if self.dataset is None: + raise ValueError("Dataset is not present") - if self.config.deterministic: - self.array_provider.random.seed(idx) - # But also always set numpy: - np.random.seed(idx) + # Get the data from the dataset. + # Under the hood, this may be fetching preloaded data. + data_dict = self.dataset[idx] - index = self.indices[idx] - cfd_filename = self.filenames[index] + return self.__call__(data_dict) - # Get all of the data: - filepath = self.config.data_path / cfd_filename + def __call__(self, data_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + """ + Process the incoming data dictionary. + - Processes the data + - moves it to GPU + - adds a batch dimension - if filepath.suffix == ".zarr": - data_dict = self.read_data_zarr(filepath) - elif filepath.suffix == ".npz": - data_dict = self.read_data_npz(filepath) - elif filepath.suffix == ".npy": - data_dict = self.read_data_npy(filepath) - else: - raise ValueError(f"Unsupported file extension: {filepath.suffix}") - - return_dict = self.preprocess_data(data_dict) - - # return only pytorch tensor objects. - # If returning on CPU (but processed on GPU), convert below. - # This assumes we keep the data on the device it's on. - for key, value in return_dict.items(): - if isinstance(value, np.ndarray): - return_dict[key] = torch.from_numpy(value) - elif isinstance(value, cp.ndarray): - return_dict[key] = torch.utils.dlpack.from_dlpack(value.toDlpack()) - - if self.config.gpu_output: - # Make sure this is all on the GPU. - # Everything here should be a torch tensor now. - for key, value in return_dict.items(): - if isinstance(value, torch.Tensor) and not value.is_cuda: - return_dict[key] = value.pin_memory().to(self.device) - else: - # Make sure everything is on the CPU. - for key, value in return_dict.items(): - if isinstance(value, torch.Tensor) and value.is_cuda: - return_dict[key] = value.cpu() + Args: + data_dict: Dictionary containing the data to process as torch.Tensors. - return return_dict + Returns: + Dictionary containing the processed data as torch.Tensors. + """ + data_dict = self.process_data(data_dict) -@profile -def compute_scaling_factors(cfg: DictConfig, input_path: str, use_cache: bool) -> None: - model_type = cfg.model.model_type - max_scaling_factor_files = 20 - - if model_type == "volume" or model_type == "combined": - vol_save_path = os.path.join(cfg.project_dir, "volume_scaling_factors.npy") - if not os.path.exists(vol_save_path): - print("Computing volume scaling factors") - volume_variable_names = list(cfg.variables.volume.solution.keys()) - - fm_dict = DoMINODataPipe( - input_path, - phase="train", - grid_resolution=cfg.model.interp_res, - volume_variables=volume_variable_names, - surface_variables=None, - normalize_coordinates=True, - sampling=False, - sample_in_bbox=True, - volume_points_sample=cfg.model.volume_points_sample, - geom_points_sample=cfg.model.geom_points_sample, - model_type=cfg.model.model_type, - bounding_box_dims=cfg.data.bounding_box, - bounding_box_dims_surf=cfg.data.bounding_box_surface, - compute_scaling_factors=True, - gpu_preprocessing=True, - gpu_output=True, - ) + # If the data is not on the target device, put it there: + for key, value in data_dict.items(): + if value.device != self.output_device: + data_dict[key] = value.to(self.output_device) + + # Add a batch dimension to the data_dict + data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()} - # Calculate mean - if cfg.model.normalization == "mean_std_scaling": - for j in range(len(fm_dict)): - print("On iteration {j}") - d_dict = fm_dict[j] - vol_fields = d_dict["volume_fields"] - - if vol_fields is not None: - if j == 0: - vol_fields_sum = np.mean(vol_fields, 0) - else: - vol_fields_sum += np.mean(vol_fields, 0) - else: - vol_fields_sum = 0.0 - - vol_fields_mean = vol_fields_sum / len(fm_dict) - - for j in range(len(fm_dict)): - print("On iteration {j} again") - d_dict = fm_dict[j] - vol_fields = d_dict["volume_fields"] - - if vol_fields is not None: - if j == 0: - vol_fields_sum_square = np.mean( - (vol_fields - vol_fields_mean) ** 2.0, 0 - ) - else: - vol_fields_sum_square += np.mean( - (vol_fields - vol_fields_mean) ** 2.0, 0 - ) - else: - vol_fields_sum_square = 0.0 - - vol_fields_std = np.sqrt(vol_fields_sum_square / len(fm_dict)) - - vol_scaling_factors = [vol_fields_mean, vol_fields_std] - - if cfg.model.normalization == "min_max_scaling": - for j in range(len(fm_dict)): - print(f"Min max scaling on iteration {j}") - d_dict = fm_dict[j] - vol_fields = d_dict["volume_fields"] - - if vol_fields.device.type == "cuda": - xp = cp - vol_fields = vol_fields.cuda() - vol_fields = cp.from_dlpack(vol_fields) - else: - xp = np - vol_fields = vol_fields.cpu().numpy() - - if vol_fields is not None: - vol_mean = xp.mean(vol_fields, 0) - vol_std = xp.std(vol_fields, 0) - vol_idx = mean_std_sampling( - vol_fields, vol_mean, vol_std, tolerance=12.0 - ) - vol_fields_sampled = xp.delete(vol_fields, vol_idx, axis=0) - if j == 0: - vol_fields_max = xp.amax(vol_fields_sampled, 0) - vol_fields_min = xp.amin(vol_fields_sampled, 0) - else: - vol_fields_max1 = xp.amax(vol_fields_sampled, 0) - vol_fields_min1 = xp.amin(vol_fields_sampled, 0) - - for k in range(vol_fields.shape[-1]): - if vol_fields_max1[k] > vol_fields_max[k]: - vol_fields_max[k] = vol_fields_max1[k] - - if vol_fields_min1[k] < vol_fields_min[k]: - vol_fields_min[k] = vol_fields_min1[k] - else: - vol_fields_max = 0.0 - vol_fields_min = 0.0 - - if j > max_scaling_factor_files: - break - vol_scaling_factors = [vol_fields_max, vol_fields_min] - - for i, item in enumerate(vol_scaling_factors): - if isinstance(item, cp.ndarray): - vol_scaling_factors[i] = item.get() - - np.save(vol_save_path, vol_scaling_factors) - - if model_type == "surface" or model_type == "combined": - surf_save_path = os.path.join(cfg.project_dir, "surface_scaling_factors.npy") - - if not os.path.exists(surf_save_path): - print("Computing surface scaling factors") - volume_variable_names = list(cfg.variables.volume.solution.keys()) - surface_variable_names = list(cfg.variables.surface.solution.keys()) - - fm_dict = DoMINODataPipe( - input_path, - phase="train", - grid_resolution=cfg.model.interp_res, - volume_variables=None, - surface_variables=surface_variable_names, - normalize_coordinates=True, - sampling=False, - sample_in_bbox=True, - volume_points_sample=cfg.model.volume_points_sample, - geom_points_sample=cfg.model.geom_points_sample, - model_type=cfg.model.model_type, - bounding_box_dims=cfg.data.bounding_box, - bounding_box_dims_surf=cfg.data.bounding_box_surface, - compute_scaling_factors=True, + return data_dict + + def __iter__(self): + if self.dataset is None: + raise ValueError( + "Dataset is not present, can not use the datapipe as an iterator." ) - # Calculate mean - if cfg.model.normalization == "mean_std_scaling": - for j in range(len(fm_dict)): - print(f"Mean std scaling on iteration {j}") - d_dict = fm_dict[j] - surf_fields = d_dict["surface_fields"].cpu().numpy() - - if surf_fields is not None: - if j == 0: - surf_fields_sum = np.mean(surf_fields, 0) - else: - surf_fields_sum += np.mean(surf_fields, 0) - else: - surf_fields_sum = 0.0 - - surf_fields_mean = surf_fields_sum / len(fm_dict) - - for j in range(len(fm_dict)): - print(f"Mean std scaling on iteration {j} again") - d_dict = fm_dict[j] - surf_fields = d_dict["surface_fields"] - - if surf_fields is not None: - if j == 0: - surf_fields_sum_square = np.mean( - (surf_fields - surf_fields_mean) ** 2.0, 0 - ) - else: - surf_fields_sum_square += np.mean( - (surf_fields - surf_fields_mean) ** 2.0, 0 - ) - else: - surf_fields_sum_square = 0.0 - - surf_fields_std = np.sqrt(surf_fields_sum_square / len(fm_dict)) - - surf_scaling_factors = [surf_fields_mean, surf_fields_std] - - if cfg.model.normalization == "min_max_scaling": - for j in range(len(fm_dict)): - print(f"Min max scaling on iteration {j}") - d_dict = fm_dict[j] - surf_fields = d_dict["surface_fields"] - if surf_fields.device.type == "cuda": - xp = cp - surf_fields = surf_fields.cuda() - surf_fields = cp.from_dlpack(surf_fields) - else: - xp = np - surf_fields = surf_fields.cpu().numpy() - - if surf_fields is not None: - surf_mean = xp.mean(surf_fields, 0) - surf_std = xp.std(surf_fields, 0) - surf_idx = mean_std_sampling( - surf_fields, surf_mean, surf_std, tolerance=12.0 - ) - surf_fields_sampled = xp.delete(surf_fields, surf_idx, axis=0) - if j == 0: - surf_fields_max = xp.amax(surf_fields_sampled, 0) - surf_fields_min = xp.amin(surf_fields_sampled, 0) - else: - surf_fields_max1 = xp.amax(surf_fields_sampled, 0) - surf_fields_min1 = xp.amin(surf_fields_sampled, 0) - - for k in range(surf_fields.shape[-1]): - if surf_fields_max1[k] > surf_fields_max[k]: - surf_fields_max[k] = surf_fields_max1[k] - - if surf_fields_min1[k] < surf_fields_min[k]: - surf_fields_min[k] = surf_fields_min1[k] - else: - surf_fields_max = 0.0 - surf_fields_min = 0.0 - - if j > max_scaling_factor_files: - break - - surf_scaling_factors = [surf_fields_max, surf_fields_min] - - for i, item in enumerate(surf_scaling_factors): - if isinstance(item, cp.ndarray): - surf_scaling_factors[i] = item.get() - - np.save(surf_save_path, surf_scaling_factors) + for i, batch in enumerate(self.dataset): + yield self.__call__(batch) + + +def compute_scaling_factors( + cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Using the dataset at the path, compute the mean, std, min, and max of the target keys. + + Args: + cfg: Hydra configuration object containing all parameters + input_path: Path to the dataset to load. + target_keys: List of keys to compute the mean, std, min, and max of. + use_cache: (deprecated) This argument has no effect. + """ + + device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") + + dataset = DrivaerMLDataset( + data_dir=input_path, + keys_to_read=target_keys, + keys_to_read_if_available={}, + output_device=device, + ) + + mean, std, min_val, max_val = compute_mean_std_min_max( + dataset, + field_keys=target_keys, + ) + + return mean, std, min_val, max_val class CachedDoMINODataset(Dataset): @@ -1317,7 +1024,8 @@ def __getitem__(self, idx): filepath = self.data_path / cfd_filename result = np.load(filepath, allow_pickle=True).item() result = { - k: v.numpy() if isinstance(v, Tensor) else v for k, v in result.items() + k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v + for k, v in result.items() } nvtx.range_pop() @@ -1349,10 +1057,10 @@ def __getitem__(self, idx): # Sample surface points if present if "surface_mesh_centers" in result and self.surface_points: if self.surface_sampling_algorithm == "area_weighted": - coords_sampled, idx_surface = area_weighted_shuffle_array( - result["surface_mesh_centers"], - self.surface_points, - result["surface_areas"], + coords_sampled, idx_surface = shuffle_array( + points=result["surface_mesh_centers"], + n_points=self.surface_points, + weights=result["surface_areas"], ) else: coords_sampled, idx_surface = shuffle_array( @@ -1399,12 +1107,28 @@ def __getitem__(self, idx): def create_domino_dataset( - cfg, phase, volume_variable_names, surface_variable_names, vol_factors, surf_factors + cfg: DictConfig, + phase: Literal["train", "val", "test"], + keys_to_read: list[str], + keys_to_read_if_available: dict[str, torch.Tensor], + vol_factors: list[float], + surf_factors: list[float], + normalize_coordinates: bool = True, + sample_in_bbox: bool = True, + sampling: bool = True, + device_mesh: torch.distributed.DeviceMesh | None = None, + placements: dict[str, torch.distributed.tensor.Placement] | None = None, ): + model_type = cfg.model.model_type if phase == "train": input_path = cfg.data.input_dir + dataloader_cfg = cfg.train.dataloader elif phase == "val": input_path = cfg.data.input_dir_val + dataloader_cfg = cfg.val.dataloader + elif phase == "test": + input_path = cfg.eval.test_path + dataloader_cfg = None else: raise ValueError(f"Invalid phase {phase}") @@ -1412,7 +1136,7 @@ def create_domino_dataset( return CachedDoMINODataset( input_path, phase=phase, - sampling=True, + sampling=sampling, volume_points_sample=cfg.model.volume_points_sample, surface_points_sample=cfg.model.surface_points_sample, geom_points_sample=cfg.model.geom_points_sample, @@ -1420,6 +1144,15 @@ def create_domino_dataset( surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, ) else: + # The dataset path works in two pieces: + # There is a core "dataset" which is loading data and moving to GPU + # And there is the preprocess step, here. + + # Optionally, and for backwards compatibility, the preprocess + # object can accept a dataset which will enable it as an iterator. + # The iteration function will loop over the dataset, preprocess the + # output, and return it. + overrides = {} if hasattr(cfg.data, "gpu_preprocessing"): overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing @@ -1427,22 +1160,48 @@ def create_domino_dataset( if hasattr(cfg.data, "gpu_output"): overrides["gpu_output"] = cfg.data.gpu_output - return DoMINODataPipe( + dm = DistributedManager() + + if cfg.data.gpu_preprocessing: + device = dm.device + consumer_stream = torch.cuda.default_stream() + else: + device = torch.device("cpu") + consumer_stream = None + + if dataloader_cfg is not None: + preload_depth = dataloader_cfg.preload_depth + pin_memory = dataloader_cfg.pin_memory + else: + preload_depth = 2 + pin_memory = False + + dataset = DrivaerMLDataset( + data_dir=input_path, + keys_to_read=keys_to_read, + keys_to_read_if_available=keys_to_read_if_available, + output_device=device, + preload_depth=preload_depth, + pin_memory=pin_memory, + device_mesh=device_mesh, + placements=placements, + consumer_stream=consumer_stream, + ) + + datapipe = DoMINODataPipe( input_path, phase=phase, grid_resolution=cfg.model.interp_res, - volume_variables=volume_variable_names, - surface_variables=surface_variable_names, - normalize_coordinates=True, - sampling=True, - sample_in_bbox=True, + normalize_coordinates=normalize_coordinates, + sampling=sampling, + sample_in_bbox=sample_in_bbox, volume_points_sample=cfg.model.volume_points_sample, surface_points_sample=cfg.model.surface_points_sample, geom_points_sample=cfg.model.geom_points_sample, volume_factors=vol_factors, surface_factors=surf_factors, scaling_type=cfg.model.normalization, - model_type=cfg.model.model_type, + model_type=model_type, bounding_box_dims=cfg.data.bounding_box, bounding_box_dims_surf=cfg.data.bounding_box_surface, num_surface_neighbors=cfg.model.num_neighbors_surface, @@ -1450,6 +1209,10 @@ def create_domino_dataset( **overrides, ) + datapipe.set_dataset(dataset) + + return datapipe + if __name__ == "__main__": fm_data = DoMINODataPipe( diff --git a/physicsnemo/datapipes/cae/domino_datapipe2.py b/physicsnemo/datapipes/cae/domino_datapipe2.py deleted file mode 100644 index d953e1c9df..0000000000 --- a/physicsnemo/datapipes/cae/domino_datapipe2.py +++ /dev/null @@ -1,1222 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This code provides the datapipe for reading the processed npy files, -generating multi-res grids, calculating signed distance fields, -sampling random points in the volume and on surface, -normalizing fields and returning the output tensors as a dictionary. - -This datapipe also non-dimensionalizes the fields, so the order in which the variables should -be fixed: velocity, pressure, turbulent viscosity for volume variables and -pressure, wall-shear-stress for surface variables. The different parameters such as -variable names, domain resolution, sampling size etc. are configurable in config.yaml. -""" - -from dataclasses import dataclass -from pathlib import Path -from typing import Iterable, Literal, Optional, Protocol, Sequence, Union - -import numpy as np -import torch -import torch.cuda.nvtx as nvtx -from omegaconf import DictConfig -from torch.distributed.tensor.placement_types import Replicate -from torch.utils.data import Dataset - -from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( - DrivaerMLDataset, - compute_mean_std_min_max, -) -from physicsnemo.distributed import DistributedManager -from physicsnemo.distributed.shard_tensor import ShardTensor, scatter_tensor -from physicsnemo.utils.domino.utils import ( - calculate_center_of_mass, - create_grid, - get_filenames, - normalize, - pad, - shuffle_array, - standardize, - unnormalize, - unstandardize, -) -from physicsnemo.utils.neighbors import knn -from physicsnemo.utils.profiling import profile -from physicsnemo.utils.sdf import signed_distance_field - - -class BoundingBox(Protocol): - """ - Type definition for the required format of bounding box dimensions. - """ - - min: Sequence - max: Sequence - - -@dataclass -class DoMINODataConfig: - """Configuration for DoMINO dataset processing pipeline. - - Attributes: - data_path: Path to the dataset to load. - phase: Which phase of data to load ("train", "val", or "test"). - surface_variables: (Surface specific) Names of surface variables. - surface_points_sample: (Surface specific) Number of surface points to sample per batch. - num_surface_neighbors: (Surface specific) Number of surface neighbors to consider for nearest neighbors approach. - surface_sampling_algorithm: (Surface specific) Algorithm to use for surface sampling ("area_weighted" or "random"). - surface_factors: (Surface specific) Non-dimensionalization factors for surface variables. - If set, and scaling_type is: - - min_max_scaling -> rescale surface_fields to the min/max set here - - mean_std_scaling -> rescale surface_fields to the mean and std set here. - bounding_box_dims_surf: (Surface specific) Dimensions of bounding box. Must be an object with min/max - attributes that are arraylike. - volume_variables: (Volume specific) Names of volume variables. - volume_points_sample: (Volume specific) Number of volume points to sample per batch. - volume_factors: (Volume specific) Non-dimensionalization factors for volume variables scaling. - If set, and scaling_type is: - - min_max_scaling -> rescale volume_fields to the min/max set here - - mean_std_scaling -> rescale volume_fields to the mean and std set here. - bounding_box_dims: (Volume specific) Dimensions of bounding box. Must be an object with min/max - attributes that are arraylike. - grid_resolution: Resolution of the latent grid. - normalize_coordinates: Whether to normalize coordinates based on min/max values. - For surfaces: uses s_min/s_max, defined from: - - Surface bounding box, if defined. - - Min/max of the stl_vertices - For volumes: uses c_min/c_max, defined from: - - Volume bounding_box if defined, - - 1.5x s_min/max otherwise, except c_min[2] = s_min[2] in this case - sample_in_bbox: Whether to sample points in a specified bounding box. - Uses the same min/max points as coordinate normalization. - Only performed if compute_scaling_factors is false. - sampling: Whether to downsample the full resolution mesh to fit in GPU memory. - Surface and volume sampling points are configured separately as: - - surface.points_sample - - volume.points_sample - geom_points_sample: Number of STL points sampled per batch. - Independent of volume.points_sample and surface.points_sample. - scaling_type: Scaling type for volume variables. - If used, will rescale the volume_fields and surface fields outputs. - Requires volume.factor and surface.factor to be set. - compute_scaling_factors: Whether to compute scaling factors. - Not available if caching. - Many preprocessing pieces are disabled if computing scaling factors. - caching: Whether this is for caching or serving. - deterministic: Whether to use a deterministic seed for sampling and random numbers. - gpu_preprocessing: Whether to do preprocessing on the GPU (False for CPU). - gpu_output: Whether to return output on the GPU as cupy arrays. - If False, returns numpy arrays. - You might choose gpu_preprocessing=True and gpu_output=False if caching. - """ - - data_path: Path | None - phase: Literal["train", "val", "test"] - - # Surface-specific variables: - surface_variables: Optional[Sequence] = ("pMean", "wallShearStress") - surface_points_sample: int = 1024 - num_surface_neighbors: int = 11 - surface_sampling_algorithm: str = Literal["area_weighted", "random"] - surface_factors: Optional[Sequence] = None - bounding_box_dims_surf: Optional[Union[BoundingBox, Sequence]] = None - - # Volume specific variables: - volume_variables: Optional[Sequence] = ("UMean", "pMean") - volume_points_sample: int = 1024 - volume_factors: Optional[Sequence] = None - bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None - - grid_resolution: Sequence = (256, 96, 64) - normalize_coordinates: bool = False - sample_in_bbox: bool = False - sampling: bool = False - geom_points_sample: int = 300000 - scaling_type: Optional[Literal["min_max_scaling", "mean_std_scaling"]] = None - compute_scaling_factors: bool = False - caching: bool = False - deterministic: bool = False - gpu_preprocessing: bool = True - gpu_output: bool = True - - def __post_init__(self): - if self.data_path is not None: - # Ensure data_path is a Path object: - if isinstance(self.data_path, str): - self.data_path = Path(self.data_path) - self.data_path = self.data_path.expanduser() - - if not self.data_path.exists(): - raise ValueError(f"Path {self.data_path} does not exist") - - if not self.data_path.is_dir(): - raise ValueError(f"Path {self.data_path} is not a directory") - - # Object if caching settings are impossible: - if self.caching: - if self.sampling: - raise ValueError("Sampling should be False for caching") - if self.compute_scaling_factors: - raise ValueError("Compute scaling factors should be False for caching") - - if self.phase not in [ - "train", - "val", - "test", - ]: - raise ValueError( - f"phase should be one of ['train', 'val', 'test'], got {self.phase}" - ) - if self.scaling_type is not None: - if self.scaling_type not in [ - "min_max_scaling", - "mean_std_scaling", - ]: - raise ValueError( - f"scaling_type should be one of ['min_max_scaling', 'mean_std_scaling'], got {self.scaling_type}" - ) - - -##### TODO -# - The SDF normalization here is based on using a normalized mesh and -# a normalized coordinate. The alternate method is to normalize to the min/max of the grid. - - -class DoMINODataPipe(Dataset): - """ - Datapipe for DoMINO - - Leverages a dataset for the actual reading of the data, and this - object is responsible for preprocessing the data. - - """ - - def __init__( - self, - input_path, - model_type: Literal["surface", "volume", "combined"], - pin_memory: bool = False, - **data_config_overrides, - ): - # Perform config packaging and validation - self.config = DoMINODataConfig(data_path=input_path, **data_config_overrides) - - # Set up the distributed manager: - if not DistributedManager.is_initialized(): - DistributedManager.initialize() - - dist = DistributedManager() - - # Set devices for the preprocessing and IO target - self.preproc_device = ( - dist.device if self.config.gpu_preprocessing else torch.device("cpu") - ) - # The drivaer_ml_dataset will automatically target this device - # In an async transfer. - self.output_device = ( - dist.device if self.config.gpu_output else torch.device("cpu") - ) - - # Model type determines whether we process surface, volume, or both. - self.model_type = model_type - - # Update the arrays for bounding boxes: - if hasattr(self.config.bounding_box_dims, "max") and hasattr( - self.config.bounding_box_dims, "min" - ): - self.config.bounding_box_dims = [ - torch.tensor( - self.config.bounding_box_dims.max, - device=self.preproc_device, - dtype=torch.float32, - ), - torch.tensor( - self.config.bounding_box_dims.min, - device=self.preproc_device, - dtype=torch.float32, - ), - ] - self.default_volume_grid = create_grid( - self.config.bounding_box_dims[0], - self.config.bounding_box_dims[1], - self.config.grid_resolution, - ) - - # And, do the surface bounding box if supplied: - if hasattr(self.config.bounding_box_dims_surf, "max") and hasattr( - self.config.bounding_box_dims_surf, "min" - ): - self.config.bounding_box_dims_surf = [ - torch.tensor( - self.config.bounding_box_dims_surf.max, - device=self.preproc_device, - dtype=torch.float32, - ), - torch.tensor( - self.config.bounding_box_dims_surf.min, - device=self.preproc_device, - dtype=torch.float32, - ), - ] - - self.default_surface_grid = create_grid( - self.config.bounding_box_dims_surf[0], - self.config.bounding_box_dims_surf[1], - self.config.grid_resolution, - ) - - # Ensure the volume and surface scaling factors are torch tensors - # and on the right device: - if self.config.volume_factors is not None: - self.config.volume_factors = torch.tensor( - self.config.volume_factors, - device=self.preproc_device, - dtype=torch.float32, - ) - if self.config.surface_factors is not None: - self.config.surface_factors = torch.tensor( - self.config.surface_factors, - device=self.preproc_device, - dtype=torch.float32, - ) - - self.dataset = None - - def compute_stl_scaling_and_surface_grids( - self, - stl_vertices: torch.Tensor, - ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Compute the min and max for the defining mesh. - - If the user supplies a bounding box, we use that. Otherwise, - it's created dynamically from the min/max of the stl vertices. - - The returned min/max and grid are used for surface data. - """ - - # Check the bounding box is not unit length - - if self.config.bounding_box_dims_surf is not None: - s_max = self.config.bounding_box_dims_surf[0] - s_min = self.config.bounding_box_dims_surf[1] - surf_grid = self.default_surface_grid - else: - # Create the grid dynamically - s_min = torch.amin(stl_vertices, 0) - s_max = torch.amax(stl_vertices, 0) - surf_grid = create_grid(s_max, s_min, self.config.grid_resolution) - - return s_min, s_max, surf_grid - - def compute_volume_scaling_and_grids( - self, s_min: torch.Tensor, s_max: torch.Tensor - ): - """ - Compute the min and max and grid for volume data. - - If the user supplies a bounding box, we use that. Otherwise, - it's created dynamically from the surface min/max. - - This will be 2x longer in x and y and the same in z as the surface bounding box. - """ - - # Determine the volume min / max locations - if self.config.bounding_box_dims is not None: - c_max = self.config.bounding_box_dims[0] - c_min = self.config.bounding_box_dims[1] - volume_grid = self.default_volume_grid - - else: - # Create the grid based on the surface grid - c_max = s_max + (s_max - s_min) / 2 - c_min = s_min - (s_max - s_min) / 2 - c_min[2] = s_min[2] - volume_grid = create_grid(c_max, c_min, self.config.grid_resolution) - - return c_min, c_max, volume_grid - - @profile - def downsample_geometry( - self, - stl_vertices, - ) -> torch.Tensor: - """ - Downsample the geometry to the desired number of points. - - Args: - stl_vertices: The vertices of the surface. - """ - - if self.config.sampling: - geometry_points = self.config.geom_points_sample - - geometry_coordinates_sampled, idx_geometry = shuffle_array( - stl_vertices, geometry_points - ) - if geometry_coordinates_sampled.shape[0] < geometry_points: - geometry_coordinates_sampled = pad( - geometry_coordinates_sampled, geometry_points, pad_value=-100.0 - ) - geom_centers = geometry_coordinates_sampled - else: - geom_centers = stl_vertices - - return geom_centers - - def process_surface( - self, - s_min: torch.Tensor, - s_max: torch.Tensor, - c_min: torch.Tensor, - c_max: torch.Tensor, - *, # Forcing the rest by keyword only since it's a long list ... - center_of_mass: torch.Tensor, - surf_grid: torch.Tensor, - surface_coordinates: torch.Tensor, - surface_normals: torch.Tensor, - surface_sizes: torch.Tensor, - stl_vertices: torch.Tensor, - stl_indices: torch.Tensor, - surface_fields: torch.Tensor | None, - ) -> dict[str, torch.Tensor]: - nx, ny, nz = self.config.grid_resolution - - return_dict = {} - - ######################################################################## - # Remove any sizes <= 0: - ######################################################################## - idx = surface_sizes > 0 - surface_sizes = surface_sizes[idx] - surface_normals = surface_normals[idx] - surface_coordinates = surface_coordinates[idx] - if surface_fields is not None: - surface_fields = surface_fields[idx] - - ######################################################################## - # Reject surface points outside of the Bounding Box - # NOTE - this is using the VOLUME bounding box! - ######################################################################## - if self.config.sample_in_bbox: - ids_min = surface_coordinates[:] > c_min - ids_max = surface_coordinates[:] < c_max - - ids_in_bbox = ids_min & ids_max - ids_in_bbox = ids_in_bbox.all(dim=-1) - - surface_coordinates = surface_coordinates[ids_in_bbox] - surface_normals = surface_normals[ids_in_bbox] - surface_sizes = surface_sizes[ids_in_bbox] - if surface_fields is not None: - surface_fields = surface_fields[ids_in_bbox] - - ######################################################################## - # Perform Down sampling of the surface fields. - # Note that we snapshot the full surface coordinates for - # use in the kNN in the next step. - ######################################################################## - - full_surface_coordinates = surface_coordinates - full_surface_normals = surface_normals - full_surface_sizes = surface_sizes - - if self.config.sampling: - # Perform the down sampling: - if self.config.surface_sampling_algorithm == "area_weighted": - weights = surface_sizes - else: - weights = None - - surface_coordinates_sampled, idx_surface = shuffle_array( - surface_coordinates, - self.config.surface_points_sample, - weights=weights, - ) - - if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample: - surface_coordinates_sampled = pad( - surface_coordinates_sampled, - self.config.surface_points_sample, - pad_value=-10.0, - ) - - # Select out the sampled points for non-neighbor arrays: - if surface_fields is not None: - surface_fields = surface_fields[idx_surface] - - # Subsample the normals and sizes: - surface_normals = surface_normals[idx_surface] - surface_sizes = surface_sizes[idx_surface] - # Update the coordinates to the sampled points: - surface_coordinates = surface_coordinates_sampled - - ######################################################################## - # Perform a kNN on the surface to find the neighbor information - ######################################################################## - if self.config.num_surface_neighbors > 1: - # Perform the kNN: - neighbor_indices, neighbor_distances = knn( - points=full_surface_coordinates, - queries=surface_coordinates, - k=self.config.num_surface_neighbors, - ) - - # Pull out the neighbor elements. - # Note that `neighbor_indices` is the index into the original, - # full sized tensors (full_surface_coordinates, etc). - surface_neighbors = full_surface_coordinates[neighbor_indices][:, 1:] - surface_neighbors_normals = full_surface_normals[neighbor_indices][:, 1:] - surface_neighbors_sizes = full_surface_sizes[neighbor_indices][:, 1:] - - # Better to normalize everything after the kNN and sampling - if self.config.normalize_coordinates: - surf_grid = normalize(surf_grid, s_max, s_min) - surface_coordinates = normalize(surface_coordinates, s_max, s_min) - surface_neighbors = normalize(surface_neighbors, s_max, s_min) - # Make sure to normalize the center of mass for the normals_com_surface calc - center_of_mass = normalize(center_of_mass, s_max, s_min) - - pos_normals_com_surface = surface_coordinates - center_of_mass - - ######################################################################## - # Apply scaling to the targets, if desired: - ######################################################################## - if self.config.scaling_type is not None and surface_fields is not None: - surface_fields = self.scale_model_targets( - surface_fields, self.config.surface_factors - ) - - return_dict.update( - { - "pos_surface_center_of_mass": pos_normals_com_surface, - "surface_mesh_centers": surface_coordinates, - "surface_mesh_neighbors": surface_neighbors, - "surface_normals": surface_normals, - "surface_neighbors_normals": surface_neighbors_normals, - "surface_areas": surface_sizes, - "surface_neighbors_areas": surface_neighbors_sizes, - } - ) - if surface_fields is not None: - return_dict["surface_fields"] = surface_fields - - return return_dict - - def process_volume( - self, - c_min: torch.Tensor, - c_max: torch.Tensor, - volume_coordinates: torch.Tensor, - volume_grid: torch.Tensor, - center_of_mass: torch.Tensor, - stl_vertices: torch.Tensor, - stl_indices: torch.Tensor, - volume_fields: torch.Tensor | None, - ) -> dict[str, torch.Tensor]: - """ - Preprocess the volume data. - - First, if configured, we reject points not in the volume bounding box. - - Next, if sampling is enabled, we sample the volume points and apply that - sampling to the ground truth too, if it's present. - - """ - ######################################################################## - # Reject points outside the volumetric BBox - ######################################################################## - if self.config.sample_in_bbox: - # Remove points in the volume that are outside - # of the bbox area. - min_check = volume_coordinates[:] > c_min - max_check = volume_coordinates[:] < c_max - - ids_in_bbox = min_check & max_check - ids_in_bbox = ids_in_bbox.all(dim=1) - - volume_coordinates = volume_coordinates[ids_in_bbox] - if volume_fields is not None: - volume_fields = volume_fields[ids_in_bbox] - - ######################################################################## - # Apply sampling to the volume coordinates and fields - ######################################################################## - - if self.config.sampling: - # Generate a series of idx to sample the volume - # without replacement - volume_coordinates_sampled, idx_volume = shuffle_array( - volume_coordinates, self.config.volume_points_sample - ) - volume_coordinates_sampled = volume_coordinates[idx_volume] - # In case too few points are in the sampled data (because the - # inputs were too few), pad the outputs: - if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample: - padding_size = ( - self.config.volume_points_sample - - volume_coordinates_sampled.shape[0] - ) - - volume_coordinates_sampled = torch.nn.functional.pad( - volume_coordinates_sampled, - (0, 0, 0, 0, 0, padding_size), - mode="constant", - value=-10.0, - ) - - # Apply the same sampling to the targets, too: - if volume_fields is not None: - volume_fields = volume_fields[idx_volume] - - volume_coordinates = volume_coordinates_sampled - - ######################################################################## - # Apply normalization to the coordinates, if desired: - ######################################################################## - if self.config.normalize_coordinates: - volume_coordinates = normalize(volume_coordinates, c_max, c_min) - grid = normalize(volume_grid, c_max, c_min) - # This is used later in the SDF, apply the same scaling to the mesh - # coordinates: - normed_vertices = normalize(stl_vertices, c_max, c_min) - else: - grid = volume_grid - normed_vertices = stl_vertices - - ######################################################################## - # Apply scaling to the targets, if desired: - ######################################################################## - if self.config.scaling_type is not None and volume_fields is not None: - volume_fields = self.scale_model_targets( - volume_fields, self.config.volume_factors - ) - - ######################################################################## - # Compute Signed Distance Function for volumetric quantities - # Note - the SDF happens here, after volume data processing finishes, - # because we need to use the (maybe) normalized volume coordinates and grid - ######################################################################## - - # SDF calculation on the volume grid using WARP - sdf_grid, _ = signed_distance_field( - normed_vertices, - stl_indices, - grid, - use_sign_winding_number=True, - ) - - # Get the SDF of all the selected volume coordinates, - # And keep the closest point to each one. - sdf_nodes, sdf_node_closest_point = signed_distance_field( - normed_vertices, - stl_indices, - volume_coordinates, - use_sign_winding_number=True, - ) - sdf_nodes = sdf_nodes.reshape((-1, 1)) - - # Use the closest point from the mesh to compute the volume encodings: - pos_normals_closest_vol, pos_normals_com_vol = self.calculate_volume_encoding( - c_min, c_max, volume_coordinates, sdf_node_closest_point, center_of_mass - ) - - return_dict = { - "volume_mesh_centers": volume_coordinates, - "sdf_nodes": sdf_nodes, - "grid": grid, - "sdf_grid": sdf_grid, - "pos_volume_closest": pos_normals_closest_vol, - "pos_volume_center_of_mass": pos_normals_com_vol, - } - - if volume_fields is not None: - return_dict["volume_fields"] = volume_fields - - return return_dict - - def calculate_volume_encoding( - self, - c_min: torch.Tensor, - c_max: torch.Tensor, - volume_coordinates: torch.Tensor, - sdf_node_closest_point: torch.Tensor, - center_of_mass: torch.Tensor, - ): - if self.config.normalize_coordinates: - volume_coordinates = normalize(volume_coordinates, c_max, c_min) - sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) - center_of_mass = normalize(center_of_mass, c_max, c_min) - - pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point - pos_normals_com_vol = volume_coordinates - center_of_mass - - return pos_normals_closest_vol, pos_normals_com_vol - - @torch.no_grad() - def process_data(self, data_dict): - # Start building the preprocessed return dict: - return_dict = { - "global_params_values": data_dict["global_params_values"], - "global_params_reference": data_dict["global_params_reference"], - } - - ######################################################################## - # Process the core STL information - ######################################################################## - - # This function gets information about the surface scale, - # and decides what the surface grid will be: - - stl_coordinates = data_dict["stl_coordinates"] - - s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids( - stl_coordinates - ) - - if isinstance(stl_coordinates, ShardTensor): - mesh = stl_coordinates._spec.mesh - # Then, replicate the bounding box along the mesh if present. - s_max = scatter_tensor( - s_max, - 0, - mesh=mesh, - placements=[ - Replicate(), - ], - global_shape=s_max.shape, - dtype=s_max.dtype, - requires_grad=False, - ) - s_min = scatter_tensor( - s_min, - 0, - mesh=mesh, - placements=[ - Replicate(), - ], - global_shape=s_min.shape, - dtype=s_min.dtype, - requires_grad=False, - ) - surf_grid = scatter_tensor( - surf_grid, - 0, - mesh=mesh, - placements=[ - Replicate(), - ], - global_shape=surf_grid.shape, - dtype=surf_grid.dtype, - requires_grad=False, - ) - - return_dict["surf_grid"] = surf_grid - - # We always need to calculate the SDF on the surface grid: - # This is for the SDF Later: - if self.config.normalize_coordinates: - normed_vertices = normalize(data_dict["stl_coordinates"], s_max, s_min) - else: - normed_vertices = data_dict["stl_coordinates"] - - # For SDF calculations, make sure the mesh_indices_flattened is an integer array: - mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) - - # Compute signed distance function for the surface grid: - sdf_surf_grid, _ = signed_distance_field( - mesh_vertices=normed_vertices, - mesh_indices=mesh_indices_flattened, - input_points=surf_grid, - use_sign_winding_number=True, - ) - return_dict["sdf_surf_grid"] = sdf_surf_grid - - # Store this only if normalization is active: - if self.config.normalize_coordinates: - return_dict["surface_min_max"] = torch.stack([s_min, s_max]) - - # This is a center of mass computation for the stl surface, - # using the size of each mesh point as weight. - center_of_mass = calculate_center_of_mass( - data_dict["stl_centers"], data_dict["stl_areas"] - ) - - # This will apply downsampling if needed to the geometry coordinates - geom_centers = self.downsample_geometry( - stl_vertices=data_dict["stl_coordinates"], - ) - return_dict["geometry_coordinates"] = geom_centers - - ######################################################################## - # Determine the volumetric bounds of the data: - ######################################################################## - # Compute the min/max for volume an the unnomralized grid: - c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids(s_min, s_max) - - # For volume data, we store this only if normalizing coordinates: - if self.model_type == "volume" or self.model_type == "combined": - if self.config.normalize_coordinates: - return_dict["volume_min_max"] = torch.stack([c_min, c_max]) - - if self.model_type == "volume" or self.model_type == "combined": - volume_fields_raw = ( - data_dict["volume_fields"] if "volume_fields" in data_dict else None - ) - volume_dict = self.process_volume( - c_min, - c_max, - volume_coordinates=data_dict["volume_mesh_centers"], - volume_grid=volume_grid, - center_of_mass=center_of_mass, - stl_vertices=data_dict["stl_coordinates"], - stl_indices=mesh_indices_flattened, - volume_fields=volume_fields_raw, - ) - - return_dict.update(volume_dict) - - if self.model_type == "surface" or self.model_type == "combined": - surface_fields_raw = ( - data_dict["surface_fields"] if "surface_fields" in data_dict else None - ) - surface_dict = self.process_surface( - s_min, - s_max, - c_min, - c_max, - center_of_mass=center_of_mass, - surf_grid=surf_grid, - surface_coordinates=data_dict["surface_mesh_centers"], - surface_normals=data_dict["surface_normals"], - surface_sizes=data_dict["surface_areas"], - stl_vertices=data_dict["stl_coordinates"], - stl_indices=mesh_indices_flattened, - surface_fields=surface_fields_raw, - ) - - return_dict.update(surface_dict) - - return return_dict - - def scale_model_targets( - self, fields: torch.Tensor, factors: torch.Tensor - ) -> torch.Tensor: - """ - Scale the model targets based on the configured scaling factors. - """ - if self.config.scaling_type == "mean_std_scaling": - field_mean = self.config.volume_factors[0] - field_std = self.config.volume_factors[1] - return standardize(fields, field_mean, field_std) - elif self.config.scaling_type == "min_max_scaling": - field_min = self.config.volume_factors[1] - field_max = self.config.volume_factors[0] - return normalize(fields, field_max, field_min) - - def unscale_model_outputs( - self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None - ): - """ - Unscale the model outputs based on the configured scaling factors. - - The unscaling is included here to make it a consistent interface regardless - of the scaling factors and type used. - - """ - - if volume_fields is not None: - if self.config.scaling_type == "mean_std_scaling": - vol_mean = self.config.volume_factors[0] - vol_std = self.config.volume_factors[1] - volume_fields = unstandardize(volume_fields, vol_mean, vol_std) - elif self.config.scaling_type == "min_max_scaling": - vol_min = self.config.volume_factors[1] - vol_max = self.config.volume_factors[0] - volume_fields = unnormalize(volume_fields, vol_max, vol_min) - if surface_fields is not None: - if self.config.scaling_type == "mean_std_scaling": - surf_mean = self.config.surface_factors[0] - surf_std = self.config.surface_factors[1] - surface_fields = unstandardize(surface_fields, surf_mean, surf_std) - elif self.config.scaling_type == "min_max_scaling": - surf_min = self.config.surface_factors[1] - surf_max = self.config.surface_factors[0] - surface_fields = unnormalize(surface_fields, surf_max, surf_min) - - return volume_fields, surface_fields - - def set_dataset(self, dataset: Iterable) -> None: - """ - Pass a dataset to the datapipe to enable iterating over both in one pass. - """ - self.dataset = dataset - - def __len__(self): - if self.dataset is not None: - return len(self.dataset) - else: - return 0 - - def __getitem__(self, idx): - """ - Function for fetching and processing a single file's data. - - Domino, in general, expects one example per file and the files - are relatively large due to the mesh size. - - Requires the user to have set a dataset via `set_dataset`. - """ - if self.dataset is None: - raise ValueError("Dataset is not present") - - # Get the data from the dataset. - # Under the hood, this may be fetching preloaded data. - data_dict = self.dataset[idx] - - return self.__call__(data_dict) - - def __call__(self, data_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: - """ - Process the incoming data dictionary. - - Processes the data - - moves it to GPU - - adds a batch dimension - - Args: - data_dict: Dictionary containing the data to process as torch.Tensors. - - Returns: - Dictionary containing the processed data as torch.Tensors. - - """ - data_dict = self.process_data(data_dict) - - # If the data is not on the target device, put it there: - for key, value in data_dict.items(): - if value.device != self.output_device: - data_dict[key] = value.to(self.output_device) - - # Add a batch dimension to the data_dict - data_dict = {k: v.unsqueeze(0) for k, v in data_dict.items()} - - return data_dict - - def __iter__(self): - if self.dataset is None: - raise ValueError( - "Dataset is not present, can not use the datapipe as an iterator." - ) - - for i, batch in enumerate(self.dataset): - yield self.__call__(batch) - - -def compute_scaling_factors( - cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None -) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - """ - Using the dataset at the path, compute the mean, std, min, and max of the target keys. - - Args: - cfg: Hydra configuration object containing all parameters - input_path: Path to the dataset to load. - target_keys: List of keys to compute the mean, std, min, and max of. - use_cache: (deprecated) This argument has no effect. - """ - - device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - - dataset = DrivaerMLDataset( - data_dir=input_path, - keys_to_read=target_keys, - output_device=device, - ) - - mean, std, min_val, max_val = compute_mean_std_min_max( - dataset, - field_keys=target_keys, - ) - - return mean, std, min_val, max_val - - -class CachedDoMINODataset(Dataset): - """ - Dataset for reading cached DoMINO data files, with optional resampling. - Acts as a drop-in replacement for DoMINODataPipe. - """ - - # @nvtx_annotate(message="CachedDoMINODataset __init__") - def __init__( - self, - data_path: Union[str, Path], - phase: Literal["train", "val", "test"] = "train", - sampling: bool = False, - volume_points_sample: Optional[int] = None, - surface_points_sample: Optional[int] = None, - geom_points_sample: Optional[int] = None, - model_type=None, # Model_type, surface, volume or combined - deterministic_seed=False, - surface_sampling_algorithm="area_weighted", - ): - super().__init__() - - self.model_type = model_type - if deterministic_seed: - np.random.seed(42) - - if isinstance(data_path, str): - data_path = Path(data_path) - self.data_path = data_path.expanduser() - - if not self.data_path.exists(): - raise AssertionError(f"Path {self.data_path} does not exist") - if not self.data_path.is_dir(): - raise AssertionError(f"Path {self.data_path} is not a directory") - - self.deterministic_seed = deterministic_seed - self.sampling = sampling - self.volume_points = volume_points_sample - self.surface_points = surface_points_sample - self.geom_points = geom_points_sample - self.surface_sampling_algorithm = surface_sampling_algorithm - - self.filenames = get_filenames(self.data_path, exclude_dirs=True) - - total_files = len(self.filenames) - - self.phase = phase - self.indices = np.array(range(total_files)) - - np.random.shuffle(self.indices) - - if not self.filenames: - raise AssertionError(f"No cached files found in {self.data_path}") - - def __len__(self): - return len(self.indices) - - # @nvtx_annotate(message="CachedDoMINODataset __getitem__") - def __getitem__(self, idx): - if self.deterministic_seed: - np.random.seed(idx) - nvtx.range_push("Load cached file") - - index = self.indices[idx] - cfd_filename = self.filenames[index] - - filepath = self.data_path / cfd_filename - result = np.load(filepath, allow_pickle=True).item() - result = { - k: torch.from_numpy(v) if isinstance(v, np.ndarray) else v - for k, v in result.items() - } - - nvtx.range_pop() - if not self.sampling: - return result - - nvtx.range_push("Sample points") - - # Sample volume points if present - if "volume_mesh_centers" in result and self.volume_points: - coords_sampled, idx_volume = shuffle_array( - result["volume_mesh_centers"], self.volume_points - ) - if coords_sampled.shape[0] < self.volume_points: - coords_sampled = pad( - coords_sampled, self.volume_points, pad_value=-10.0 - ) - - result["volume_mesh_centers"] = coords_sampled - for key in [ - "volume_fields", - "pos_volume_closest", - "pos_volume_center_of_mass", - "sdf_nodes", - ]: - if key in result: - result[key] = result[key][idx_volume] - - # Sample surface points if present - if "surface_mesh_centers" in result and self.surface_points: - if self.surface_sampling_algorithm == "area_weighted": - coords_sampled, idx_surface = shuffle_array( - points=result["surface_mesh_centers"], - n_points=self.surface_points, - weights=result["surface_areas"], - ) - else: - coords_sampled, idx_surface = shuffle_array( - result["surface_mesh_centers"], self.surface_points - ) - - if coords_sampled.shape[0] < self.surface_points: - coords_sampled = pad( - coords_sampled, self.surface_points, pad_value=-10.0 - ) - - ii = result["neighbor_indices"] - result["surface_mesh_neighbors"] = result["surface_mesh_centers"][ii] - result["surface_neighbors_normals"] = result["surface_normals"][ii] - result["surface_neighbors_areas"] = result["surface_areas"][ii] - - result["surface_mesh_centers"] = coords_sampled - - for key in [ - "surface_fields", - "surface_areas", - "surface_normals", - "pos_surface_center_of_mass", - "surface_mesh_neighbors", - "surface_neighbors_normals", - "surface_neighbors_areas", - ]: - if key in result: - result[key] = result[key][idx_surface] - - del result["neighbor_indices"] - - # Sample geometry points if present - if "geometry_coordinates" in result and self.geom_points: - coords_sampled, _ = shuffle_array( - result["geometry_coordinates"], self.geom_points - ) - if coords_sampled.shape[0] < self.geom_points: - coords_sampled = pad(coords_sampled, self.geom_points, pad_value=-100.0) - result["geometry_coordinates"] = coords_sampled - - nvtx.range_pop() - return result - - -def create_domino_dataset( - cfg: DictConfig, - phase: Literal["train", "val", "test"], - keys_to_read: list[str], - keys_to_read_if_available: dict[str, torch.Tensor], - vol_factors: list[float], - surf_factors: list[float], - normalize_coordinates: bool = True, - sample_in_bbox: bool = True, - sampling: bool = True, - device_mesh: torch.distributed.DeviceMesh | None = None, - placements: dict[str, torch.distributed.tensor.Placement] | None = None, -): - model_type = cfg.model.model_type - if phase == "train": - input_path = cfg.data.input_dir - dataloader_cfg = cfg.train.dataloader - elif phase == "val": - input_path = cfg.data.input_dir_val - dataloader_cfg = cfg.val.dataloader - elif phase == "test": - input_path = cfg.eval.test_path - dataloader_cfg = None - else: - raise ValueError(f"Invalid phase {phase}") - - if cfg.data_processor.use_cache: - return CachedDoMINODataset( - input_path, - phase=phase, - sampling=sampling, - volume_points_sample=cfg.model.volume_points_sample, - surface_points_sample=cfg.model.surface_points_sample, - geom_points_sample=cfg.model.geom_points_sample, - model_type=cfg.model.model_type, - surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, - ) - else: - # The dataset path works in two pieces: - # There is a core "dataset" which is loading data and moving to GPU - # And there is the preprocess step, here. - - # Optionally, and for backwards compatibility, the preprocess - # object can accept a dataset which will enable it as an iterator. - # The iteration function will loop over the dataset, preprocess the - # output, and return it. - - overrides = {} - if hasattr(cfg.data, "gpu_preprocessing"): - overrides["gpu_preprocessing"] = cfg.data.gpu_preprocessing - - if hasattr(cfg.data, "gpu_output"): - overrides["gpu_output"] = cfg.data.gpu_output - - dm = DistributedManager() - - if cfg.data.gpu_preprocessing: - device = dm.device - consumer_stream = torch.cuda.default_stream() - else: - device = torch.device("cpu") - consumer_stream = None - - if dataloader_cfg is not None: - preload_depth = dataloader_cfg.preload_depth - pin_memory = dataloader_cfg.pin_memory - else: - preload_depth = 2 - pin_memory = False - - dataset = DrivaerMLDataset( - data_dir=input_path, - keys_to_read=keys_to_read, - keys_to_read_if_available=keys_to_read_if_available, - output_device=device, - preload_depth=preload_depth, - pin_memory=pin_memory, - device_mesh=device_mesh, - placements=placements, - consumer_stream=consumer_stream, - ) - - datapipe = DoMINODataPipe( - input_path, - phase=phase, - grid_resolution=cfg.model.interp_res, - normalize_coordinates=normalize_coordinates, - sampling=sampling, - sample_in_bbox=sample_in_bbox, - volume_points_sample=cfg.model.volume_points_sample, - surface_points_sample=cfg.model.surface_points_sample, - geom_points_sample=cfg.model.geom_points_sample, - volume_factors=vol_factors, - surface_factors=surf_factors, - scaling_type=cfg.model.normalization, - model_type=model_type, - bounding_box_dims=cfg.data.bounding_box, - bounding_box_dims_surf=cfg.data.bounding_box_surface, - num_surface_neighbors=cfg.model.num_neighbors_surface, - surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, - **overrides, - ) - - datapipe.set_dataset(dataset) - - return datapipe - - -if __name__ == "__main__": - fm_data = DoMINODataPipe( - data_path="/code/processed_data/new_models_1/", - phase="train", - sampling=False, - sample_in_bbox=False, - ) diff --git a/physicsnemo/datapipes/cae/domino_sharded_datapipe.py b/physicsnemo/datapipes/cae/domino_sharded_datapipe.py deleted file mode 100644 index fe2b0d5fcf..0000000000 --- a/physicsnemo/datapipes/cae/domino_sharded_datapipe.py +++ /dev/null @@ -1,176 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dataclasses import asdict - -import torch - -from physicsnemo.utils.version_check import check_module_requirements - -from .domino_datapipe import DoMINODataPipe - -# Prevent importing this module if the minimum version of pytorch is not met. -check_module_requirements("physicsnemo.distributed.shard_tensor") - -from torch.distributed.tensor.placement_types import ( # noqa: E402 - Replicate, - Shard, -) - -from physicsnemo.distributed.shard_tensor import ShardTensor # noqa: E402 - - -class ShardedDoMINODataPipe(DoMINODataPipe): - """ - An extension of the DoMINODataPipe for domain parallel training. - - How this works: - 1. the preprocessing is done in cupy or numpy in the base class, which we - want to keep. - 2. Dataloading is done on one file per idx in __getitem__. For sharded data, - we want to load one file per mesh and shard or replicate the data as needed. - 3. The sharding can be either on the grid or the point clouds. We shard the grids - after loading point data, so data loading only worries about the point clouds. - 4. For numpy files (.npz, .npy), each rank reads the whole file and takes only - the data it needs, in the end. Because data loading is the bulk of the time, - this preprocesses everything independently and then shards. - 5. For Zarr files, each rank can read slices of the data independently. So - infer the chunk size, based on the number of ranks in the mesh and sharding, - and then read the right slice. - 6. For some of the pipeline, we need the full data. So it gets gathered locally. - 7. After preprocessing, the data is chunked into appropriate shards and sent out. - 8. This file provides a wrapper function for the collate function (like a decorator) - that will turn appropriate cupy into tensors and then into shard tensors. - - """ - - def __init__( - self, - input_path, - model_type, - domain_mesh, - shard_point_cloud, - shard_grid, - **config_overrides, - ): - # if 'gpu_output' not in config_overrides: - config_overrides["gpu_output"] = True - - # First, initialize the super class. - super().__init__( - input_path, - model_type, - **config_overrides, - ) - - self.domain_mesh = domain_mesh - - self.shard_point_cloud = shard_point_cloud - self.shard_grid = shard_grid - - # These are keys that are point-like - self.point_cloud_keys = [ - "volume_fields", - "pos_volume_closest", - "pos_volume_center_of_mass", - "pos_surface_center_of_mass", - "geometry_coordinates", - "surface_mesh_centers", - "surface_mesh_neighbors", - "sdf_nodes", - "surface_normals", - "surface_neighbors_normals", - "surface_areas", - "surface_neighbors_areas", - "volume_mesh_centers", - "surface_fields", - ] - - # These keys are grid-like - self.grid_keys = [ - "grid", - "surf_grid", - "sdf_grid", - "sdf_surf_grid", - ] - - # These keys are scalar-like and should never be sharded - self.scalar_keys = [ - "global_params_values", - "global_params_reference", - "surface_min_max", - "volume_min_max", - "length_scale", - ] - - def __getitem__(self, idx): - single_dict = super().__getitem__(idx) - - # Here, we're assuming that the data is already replicated. - # Turn all the pieces of the dict into ShardTensors with that placement. - default_placement = [ - Replicate(), - ] - for key, value in single_dict.items(): - if isinstance(value, torch.Tensor): - single_dict[key] = ShardTensor.from_local( - value, self.domain_mesh, default_placement - ) - - # # Now, shard the data. - sharding = [ - Shard(0), - ] - if self.shard_point_cloud: - for key in self.point_cloud_keys: - if key in single_dict: - single_dict[key] = single_dict[key].redistribute( - placements=sharding - ) - - if self.shard_grid: - for key in self.grid_keys: - if key in single_dict: - single_dict[key] = single_dict[key].redistribute( - placements=sharding - ) - - return single_dict - - -def create_sharded_domino_dataset( - base_dataset, - domain_mesh, - shard_point_cloud, - shard_grid, -): - # Pull off the data path, model type, and config_dict: - data_path = base_dataset.config.data_path - model_type = base_dataset.model_type - config_dict = asdict(base_dataset.config) - - # Make sure the input path is not included in the config_dict: - config_dict.pop("data_path") - - # Use the configuration of the base dataset to create a sharded dataset: - return ShardedDoMINODataPipe( - input_path=data_path, - model_type=model_type, - domain_mesh=domain_mesh, - shard_point_cloud=shard_point_cloud, - shard_grid=shard_grid, - **config_dict, - ) diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index 1e6ae62f81..bb9a17041b 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -793,6 +793,9 @@ def _move_to_gpu( with torch.cuda.stream(self._data_loader_stream): for key in data.keys(): + if data[key].device == self.output_device: + result[key] = data[key] + continue if self.pin_memory: result[key] = ( data[key].pin_memory().to(self.output_device, non_blocking=True) From b9964179a22347a66df07b2bc26cbca7cd182ec5 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 30 Sep 2025 16:15:07 +0000 Subject: [PATCH 60/98] Remove printouts. --- examples/cfd/external_aerodynamics/domino/src/train.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 3763ce68ec..ffadc0403c 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -184,10 +184,6 @@ def train_epoch( with Profiler(): io_start_time = time.perf_counter() for i_batch, sampled_batched in enumerate(dataloader): - for key in sampled_batched.keys(): - print( - f"{key} has shape {sampled_batched[key].shape} and autograd fn {sampled_batched[key].autograd_fn if hasattr(sampled_batched[key], 'autograd_fn') else None}" - ) io_end_time = time.perf_counter() if add_physics_loss: autocast_enabled = False From f7aab1272ddc98759011c19c2184c384f0483cf3 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 30 Sep 2025 17:46:23 +0000 Subject: [PATCH 61/98] Add unified gpu memory interface that correctly places memory pools onto the right device. --- .../domino/src/benchmark_dataloader.py | 10 +- .../domino/src/inference_on_stl.py | 9 +- .../external_aerodynamics/domino/src/train.py | 41 ++----- physicsnemo/datapipes/cae/domino_datapipe.py | 2 +- physicsnemo/utils/memory.py | 114 ++++++++++++++++++ 5 files changed, 129 insertions(+), 47 deletions(-) create mode 100644 physicsnemo/utils/memory.py diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index 090fbf361c..345f78fa2c 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -40,14 +40,8 @@ from hydra.utils import to_absolute_path from omegaconf import DictConfig, OmegaConf -DISABLE_RMM = os.environ.get("DOMINO_DISABLE_RMM", False) -if not DISABLE_RMM: - import rmm - from rmm.allocators.torch import rmm_torch_allocator - import torch - - rmm.reinitialize(pool_allocator=True) - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) +# This will set up the cupy-ecosystem and pytorch to share memory pools +from physicsnemo.utils.memory import unified_gpu_memory import torch.distributed as dist diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index a55f703d66..bee8c1cd2f 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -42,13 +42,8 @@ from omegaconf import DictConfig, OmegaConf import torch -DISABLE_RMM = os.environ.get("DISABLE_RMM", False) -if not DISABLE_RMM: - import rmm - from rmm.allocators.torch import rmm_torch_allocator - - rmm.reinitialize(pool_allocator=True) - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) +# This will set up the cupy-ecosystem and pytorch to share memory pools +from physicsnemo.utils.memory import unified_gpu_memory import torchinfo import torch.distributed as dist diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index ffadc0403c..94e184f3b4 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -38,27 +38,8 @@ from hydra.utils import to_absolute_path from omegaconf import DictConfig, OmegaConf - -def srt2bool(val: str): - if isinstance(val, bool): - return val - if val.lower() in ["true", "1", "yes", "y"]: - return True - elif val.lower() in ["false", "0", "no", "n"]: - return False - else: - raise ValueError(f"Invalid boolean value: {val}") - - -DISABLE_RMM = srt2bool(os.environ.get("DOMINO_DISABLE_RMM", False)) - -if not DISABLE_RMM: - import rmm - from rmm.allocators.torch import rmm_torch_allocator - import torch - - rmm.reinitialize(pool_allocator=True) - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) +# This will set up the cupy-ecosystem and pytorch to share memory pools +from physicsnemo.utils.memory import unified_gpu_memory import torchinfo import torch.distributed as dist @@ -477,16 +458,14 @@ def main(cfg: DictConfig) -> None: ###################################################### # Load checkpoint if available ###################################################### - - # init_epoch = load_checkpoint( - # to_absolute_path(cfg.resume_dir), - # models=model, - # optimizer=optimizer, - # scheduler=scheduler, - # scaler=scaler, - # device=dist.device, - # ) - init_epoch = 0 + init_epoch = load_checkpoint( + to_absolute_path(cfg.resume_dir), + models=model, + optimizer=optimizer, + scheduler=scheduler, + scaler=scaler, + device=dist.device, + ) if init_epoch != 0: init_epoch += 1 # Start with the next epoch diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 5feae7e118..c9df6bceb0 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -1173,7 +1173,7 @@ def create_domino_dataset( preload_depth = dataloader_cfg.preload_depth pin_memory = dataloader_cfg.pin_memory else: - preload_depth = 2 + preload_depth = 1 pin_memory = False dataset = DrivaerMLDataset( diff --git a/physicsnemo/utils/memory.py b/physicsnemo/utils/memory.py new file mode 100644 index 0000000000..54ceb5061c --- /dev/null +++ b/physicsnemo/utils/memory.py @@ -0,0 +1,114 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch + +try: + import rmm + + RMM_AVAILABLE = True +except ImportError: + RMM_AVAILABLE = False + +try: + import cupy + + CUPY_AVAILABLE = True +except ImportError: + CUPY_AVAILABLE = False + +""" +Using a unifed gpu memory provider, we consolidate the pool into just a +single allocator for cupy/rapids and torch. Ideally, we add warp to this someday. + +To use this, you need to add the following to your code at or near the top +(before allocating any GPU memory): + +```python +from physicsnemo.utils.memory import unified_gpu_memory +``` + +""" + + +def srt2bool(val: str): + if isinstance(val, bool): + return val + if val.lower() in ["true", "1", "yes", "y"]: + return True + elif val.lower() in ["false", "0", "no", "n"]: + return False + else: + raise ValueError(f"Invalid boolean value: {val}") + + +DISABLE_RMM = srt2bool(os.environ.get("PHYSICSNEMO_DISABLE_RMM", False)) + + +def _setup_unified_gpu_memory(): + # Skip if RMM is disabled + if RMM_AVAILABLE and not DISABLE_RMM: + # First, determine the local rank so that we allocate on the right device. + # These are meant to be tested in the same order as DistributedManager + # We can't actually initialize it, though, since we have to unify mallocs + # before torch init. + PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD = os.environ.get( + "PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD", None + ) + if PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD is None: + for method in ["LOCAL_RANK", "OMPI_COMM_WORLD_LOCAL_RANK", "SLURM_LOCALID"]: + if os.environ.get(method) is not None: + local_rank = int(os.environ.get(method)) + break + else: + if PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD == "ENV": + local_rank = int(os.environ.get("LOCAL_RANK")) + elif PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD == "SLURM": + local_rank = int(os.environ.get("SLURM_LOCALID")) + elif PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD == "OPENMPI": + local_rank = int(os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK")) + else: + raise ValueError( + f"Unknown initialization method: {PHYSICSNEMO_DISTRIBUTED_INITIALIZATION_METHOD}" + ) + + # Initialize RMM + rmm.reinitialize( + pool_allocator=True, devices=local_rank, initial_pool_size="1024MB" + ) + + # Set PyTorch allocator if available + from rmm.allocators.torch import rmm_torch_allocator + + if torch.cuda.is_available(): + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + # Set CuPy allocator if available + if CUPY_AVAILABLE: + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + + +# This is what gets executed when someone does "from memory import unified_gpu_memory" + + +def __getattr__(name): + if name == "unified_gpu_memory": + return _setup_unified_gpu_memory() + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") From e36bf9b41da6ec53d461e7a1cd0b814c4404fad0 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 1 Oct 2025 15:48:14 +0000 Subject: [PATCH 62/98] Fix indexing error in the dataset that was leading to GPU memory leaking. --- physicsnemo/datapipes/cae/drivaer_ml_dataset.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index bb9a17041b..af844b2656 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -934,15 +934,23 @@ def __iter__(self): def __next__(self): N = len(self.indices) if hasattr(self, "indices") else len(self._filenames) + # Iteration bounds are based on the counter, not the random-access index if self.i >= N: self.i = 0 raise StopIteration - for i in range(self.preload_depth): - if N > i + 1: - self.preload(self.i + i) + # This is the file random access index + target_index = self.idx_to_index(self.i) + + # Before returning, put the next two target indexes into the queue: + for preload_i in range(self.preload_depth): + next_iteration_index = self.i + preload_i + 1 + if N > next_iteration_index: + preload_idx = self.idx_to_index(next_iteration_index) + self.preload(preload_idx) - data = self.__getitem__(self.i) + # Send up the random-access data: + data = self.__getitem__(target_index) self.i += 1 From 5240b33bfc9cb751f5bfc8602cfcf03398601ba8 Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Wed, 1 Oct 2025 08:53:33 -0700 Subject: [PATCH 63/98] fix in scaling factors calculation --- .../external_aerodynamics/domino/src/conf/config.yaml | 10 +++++----- examples/cfd/external_aerodynamics/domino/src/train.py | 9 ++++----- physicsnemo/datapipes/cae/domino_datapipe.py | 8 ++++---- physicsnemo/models/domino/model.py | 8 -------- 4 files changed, 13 insertions(+), 22 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index aa17995327..26870b461f 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -71,8 +71,8 @@ variables: # │ Data Configs │ # └───────────────────────────────────────────┘ data: # Input directory for training and validation data - input_dir: /user/data/aws_data_all/ - input_dir_val: /user/data/aws_data_all_val/ + input_dir: /lustre/rranade/modulus_dev/data/aws_data_all/ + input_dir_val: /lustre/rranade/modulus_dev/data/aws_data_all_val/ bounding_box: # Bounding box dimensions for computational domain min: [-3.5, -2.25, -0.32] max: [8.5, 2.25, 3.00] @@ -84,7 +84,7 @@ data: # Input directory for training and validation data normalize_coordinates: true sample_in_bbox: true sampling: true - scaling_factors: outputs/AWS_Dataset/1/scaling_factors/scaling_factors.pkl + scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl # ┌───────────────────────────────────────────┐ # │ Domain Parallelism Settings │ @@ -174,10 +174,10 @@ model: # └───────────────────────────────────────────┘ train: # Training configurable parameters epochs: 1000 - checkpoint_interval: 1 + checkpoint_interval: 50 dataloader: batch_size: 1 - preload_depth: 2 + preload_depth: 1 pin_memory: True # if the preprocessing is outputing GPU data, set this to false sampler: shuffle: true diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 94e184f3b4..4bf52bfb2e 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -152,6 +152,7 @@ def train_epoch( eqn: Any = None, bounding_box: torch.Tensor | None = None, vol_factors: torch.Tensor | None = None, + surf_factors: torch.Tensor | None = None, add_physics_loss=False, ): dist = DistributedManager() @@ -284,8 +285,8 @@ def main(cfg: DictConfig) -> None: f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." ) - vol_factors = scaling_factors.mean["volume_fields"] - surf_factors = scaling_factors.mean["surface_fields"] + vol_factors = np.asarray([scaling_factors.max_val["volume_fields"], scaling_factors.min_val["volume_fields"]]) + surf_factors = np.asarray([scaling_factors.max_val["surface_fields"], scaling_factors.min_val["surface_fields"]]) vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) ###################################################### @@ -509,7 +510,7 @@ def main(cfg: DictConfig) -> None: else: surface_scaling_loss = cfg.model.surf_loss_scaling - # model.train(True) + model.train(True) epoch_start_time = time.perf_counter() avg_loss = train_epoch( dataloader=train_dataloader, @@ -537,8 +538,6 @@ def main(cfg: DictConfig) -> None: ) epoch_end_time = time.perf_counter() - return - model.eval() avg_vloss = validation_step( dataloader=val_dataloader, diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index c9df6bceb0..e4fd314695 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -820,12 +820,12 @@ def scale_model_targets( Scale the model targets based on the configured scaling factors. """ if self.config.scaling_type == "mean_std_scaling": - field_mean = self.config.volume_factors[0] - field_std = self.config.volume_factors[1] + field_mean = factors[0] + field_std = factors[1] return standardize(fields, field_mean, field_std) elif self.config.scaling_type == "min_max_scaling": - field_min = self.config.volume_factors[1] - field_max = self.config.volume_factors[0] + field_min = factors[1] + field_max = factors[0] return normalize(fields, field_max, field_min) def unscale_model_outputs( diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 94f6adaaba..e0ec7b772c 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -308,14 +308,6 @@ def __init__( hops=model_parameters.geometry_rep.geo_conv.surface_hops, model_parameters=model_parameters, ) - - self.geo_rep_surface1 = GeometryRep( - input_features=input_features, - radii=model_parameters.geometry_rep.geo_conv.volume_radii, - neighbors_in_radius=model_parameters.geometry_rep.geo_conv.volume_neighbors_in_radius, - model_parameters=model_parameters, - ) - # Basis functions for surface and volume base_layer_nn = model_parameters.nn_basis_functions.base_layer if self.output_features_surf is not None: From fec26d5e8ebfd25f71714810150b04912a45affe Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Thu, 2 Oct 2025 09:40:36 -0700 Subject: [PATCH 64/98] small fixes in datapipe and model --- physicsnemo/datapipes/cae/domino_datapipe.py | 26 ++++++-------------- physicsnemo/models/domino/model.py | 1 + 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index e4fd314695..8a0aa5e035 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -449,11 +449,7 @@ def process_surface( ) if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample: - surface_coordinates_sampled = pad( - surface_coordinates_sampled, - self.config.surface_points_sample, - pad_value=-10.0, - ) + raise ValueError("Sampled points is more than points in the surface mesh") # Select out the sampled points for non-neighbor arrays: if surface_fields is not None: @@ -485,7 +481,7 @@ def process_surface( # Better to normalize everything after the kNN and sampling if self.config.normalize_coordinates: - surf_grid = normalize(surf_grid, s_max, s_min) + # surf_grid = normalize(surf_grid, s_max, s_min) surface_coordinates = normalize(surface_coordinates, s_max, s_min) surface_neighbors = normalize(surface_neighbors, s_max, s_min) # Make sure to normalize the center of mass for the normals_com_surface calc @@ -567,17 +563,7 @@ def process_volume( # In case too few points are in the sampled data (because the # inputs were too few), pad the outputs: if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample: - padding_size = ( - self.config.volume_points_sample - - volume_coordinates_sampled.shape[0] - ) - - volume_coordinates_sampled = torch.nn.functional.pad( - volume_coordinates_sampled, - (0, 0, 0, 0, 0, padding_size), - mode="constant", - value=-10.0, - ) + raise ValueError("Sampled points is more than points in the volume mesh") # Apply the same sampling to the targets, too: if volume_fields is not None: @@ -594,6 +580,7 @@ def process_volume( # This is used later in the SDF, apply the same scaling to the mesh # coordinates: normed_vertices = normalize(stl_vertices, c_max, c_min) + center_of_mass = normalize(center_of_mass, c_max, c_min) else: grid = volume_grid normed_vertices = stl_vertices @@ -658,9 +645,9 @@ def calculate_volume_encoding( center_of_mass: torch.Tensor, ): if self.config.normalize_coordinates: - volume_coordinates = normalize(volume_coordinates, c_max, c_min) + # volume_coordinates = normalize(volume_coordinates, c_max, c_min) sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) - center_of_mass = normalize(center_of_mass, c_max, c_min) + # center_of_mass = normalize(center_of_mass, c_max, c_min) pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point pos_normals_com_vol = volume_coordinates - center_of_mass @@ -731,6 +718,7 @@ def process_data(self, data_dict): # This is for the SDF Later: if self.config.normalize_coordinates: normed_vertices = normalize(data_dict["stl_coordinates"], s_max, s_min) + surf_grid = normalize(surf_grid, s_max, s_min) else: normed_vertices = data_dict["stl_coordinates"] diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index e0ec7b772c..aea2e91ad4 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -308,6 +308,7 @@ def __init__( hops=model_parameters.geometry_rep.geo_conv.surface_hops, model_parameters=model_parameters, ) + # Basis functions for surface and volume base_layer_nn = model_parameters.nn_basis_functions.base_layer if self.output_features_surf is not None: From 073a3f97a41fc5d4101badbd3b7986d91b645734 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 7 Oct 2025 13:42:52 +0000 Subject: [PATCH 65/98] Fix factor calculations --- .../external_aerodynamics/domino/src/utils.py | 11 ++++++++--- physicsnemo/datapipes/cae/domino_datapipe.py | 16 +++++++++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py index 6d05c90bfc..12259641e3 100644 --- a/examples/cfd/external_aerodynamics/domino/src/utils.py +++ b/examples/cfd/external_aerodynamics/domino/src/utils.py @@ -107,10 +107,15 @@ def get_keys_to_read( # If these keys are in the config, use them, else provide defaults in # case they aren't in the dataset: - # TODO + cfg_params_vec = [] + for key in cfg.variables.global_parameters: + if cfg.variables.global_parameters[key].type == "vector": + cfg_params_vec.append(*cfg.variables.global_parameters[key].reference) + else: + cfg_params_vec.append(cfg.variables.global_parameters[key].reference) keys_to_read_if_available = { - "global_params_values": torch.tensor([[30.0], [1.226]]), - "global_params_reference": torch.tensor([[30.0], [1.226]]), + "global_params_values": torch.tensor(cfg_params_vec).reshape(-1, 1), + "global_params_reference": torch.tensor(cfg_params_vec).reshape(-1, 1), } # Volume keys: diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 8a0aa5e035..cbd6296495 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -478,6 +478,10 @@ def process_surface( surface_neighbors = full_surface_coordinates[neighbor_indices][:, 1:] surface_neighbors_normals = full_surface_normals[neighbor_indices][:, 1:] surface_neighbors_sizes = full_surface_sizes[neighbor_indices][:, 1:] + else: + surface_neighbors = surface_coordinates + surface_neighbors_normals = surface_normals + surface_neighbors_sizes = surface_sizes # Better to normalize everything after the kNN and sampling if self.config.normalize_coordinates: @@ -817,7 +821,9 @@ def scale_model_targets( return normalize(fields, field_max, field_min) def unscale_model_outputs( - self, volume_fields: torch.Tensor | None, surface_fields: torch.Tensor | None + self, + volume_fields: torch.Tensor | None = None, + surface_fields: torch.Tensor | None = None, ): """ Unscale the model outputs based on the configured scaling factors. @@ -833,8 +839,8 @@ def unscale_model_outputs( vol_std = self.config.volume_factors[1] volume_fields = unstandardize(volume_fields, vol_mean, vol_std) elif self.config.scaling_type == "min_max_scaling": - vol_min = self.config.volume_factors[1] - vol_max = self.config.volume_factors[0] + vol_min = self.config.volume_factors[0] + vol_max = self.config.volume_factors[1] volume_fields = unnormalize(volume_fields, vol_max, vol_min) if surface_fields is not None: if self.config.scaling_type == "mean_std_scaling": @@ -842,8 +848,8 @@ def unscale_model_outputs( surf_std = self.config.surface_factors[1] surface_fields = unstandardize(surface_fields, surf_mean, surf_std) elif self.config.scaling_type == "min_max_scaling": - surf_min = self.config.surface_factors[1] - surf_max = self.config.surface_factors[0] + surf_min = self.config.surface_factors[0] + surf_max = self.config.surface_factors[1] surface_fields = unnormalize(surface_fields, surf_max, surf_min) return volume_fields, surface_fields From 594b9edf9f980c672d5ff6e1b13be1c374b988d6 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 7 Oct 2025 19:09:23 +0000 Subject: [PATCH 66/98] Enable sliced reading of volumetric data. --- physicsnemo/datapipes/cae/domino_datapipe.py | 39 ++-- .../datapipes/cae/drivaer_ml_dataset.py | 168 ++++++++++++++---- physicsnemo/utils/domino/utils.py | 2 +- 3 files changed, 162 insertions(+), 47 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index cbd6296495..9ee479223d 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -87,6 +87,9 @@ class DoMINODataConfig: attributes that are arraylike. volume_variables: (Volume specific) Names of volume variables. volume_points_sample: (Volume specific) Number of volume points to sample per batch. + volume_sample_from_disk: (Volume specific) If the volume data is in a shuffled state on disk, + read contiguous chunks of the data rather than the entire volume data. This greatly + accelerates IO in bandwidth limited systems or when the volumetric data is very large. volume_factors: (Volume specific) Non-dimensionalization factors for volume variables scaling. If set, and scaling_type is: - min_max_scaling -> rescale volume_fields to the min/max set here @@ -138,6 +141,7 @@ class DoMINODataConfig: # Volume specific variables: volume_variables: Optional[Sequence] = ("UMean", "pMean") volume_points_sample: int = 1024 + volume_sample_from_disk: bool = False volume_factors: Optional[Sequence] = None bounding_box_dims: Optional[Union[BoundingBox, Sequence]] = None @@ -282,16 +286,20 @@ def __init__( # Ensure the volume and surface scaling factors are torch tensors # and on the right device: if self.config.volume_factors is not None: - self.config.volume_factors = torch.tensor( - self.config.volume_factors, - device=self.preproc_device, - dtype=torch.float32, + if not isinstance(self.config.volume_factors, torch.Tensor): + self.config.volume_factors = torch.from_numpy( + self.config.volume_factors + ) + self.config.volume_factors = self.config.volume_factors.to( + self.preproc_device, dtype=torch.float32 ) if self.config.surface_factors is not None: - self.config.surface_factors = torch.tensor( - self.config.surface_factors, - device=self.preproc_device, - dtype=torch.float32, + if not isinstance(self.config.surface_factors, torch.Tensor): + self.config.surface_factors = torch.from_numpy( + self.config.surface_factors + ) + self.config.surface_factors = self.config.surface_factors.to( + self.preproc_device, dtype=torch.float32 ) self.dataset = None @@ -449,7 +457,9 @@ def process_surface( ) if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample: - raise ValueError("Sampled points is more than points in the surface mesh") + raise ValueError( + "Sampled points is more than points in the surface mesh" + ) # Select out the sampled points for non-neighbor arrays: if surface_fields is not None: @@ -557,6 +567,8 @@ def process_volume( # Apply sampling to the volume coordinates and fields ######################################################################## + # If the volume data has been sampled from disk, directly, then + # still apply sampling. We over-pull from disk deliberately. if self.config.sampling: # Generate a series of idx to sample the volume # without replacement @@ -567,7 +579,9 @@ def process_volume( # In case too few points are in the sampled data (because the # inputs were too few), pad the outputs: if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample: - raise ValueError("Sampled points is more than points in the volume mesh") + raise ValueError( + "Sampled points is more than points in the volume mesh" + ) # Apply the same sampling to the targets, too: if volume_fields is not None: @@ -860,6 +874,10 @@ def set_dataset(self, dataset: Iterable) -> None: """ self.dataset = dataset + if self.config.volume_sample_from_disk: + # We deliberately double the data to read compared to the sampling size: + self.dataset.set_volume_sampling_size(2 * self.config.volume_points_sample) + def __len__(self): if self.dataset is not None: return len(self.dataset) @@ -1198,6 +1216,7 @@ def create_domino_dataset( model_type=model_type, bounding_box_dims=cfg.data.bounding_box, bounding_box_dims_surf=cfg.data.bounding_box_surface, + volume_sample_from_disk=cfg.data.volume_sample_from_disk, num_surface_neighbors=cfg.model.num_neighbors_surface, surface_sampling_algorithm=cfg.model.surface_sampling_algorithm, **overrides, diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index af844b2656..faa62f6ed6 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -78,6 +78,8 @@ def __init__( self.keys_to_read = keys_to_read self.keys_to_read_if_available = keys_to_read_if_available + self.volume_sampling_size = None + @abstractmethod def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: """ @@ -145,6 +147,45 @@ def _get_slice_boundaries( return global_chunk_start, global_chunk_stop, chunk_sizes + def set_volume_sampling_size(self, volume_sampling_size: int): + """ + Set the volume sampling size. When set, the readers will + assume the volumetric data is shuffled on disk and read only + contiguous chunks of the data up to the sampling size. + + + Args: + volume_sampling_size: The total size of the volume sampling. + + """ + self.volume_sampling_size = volume_sampling_size + + def select_random_sections_from_slice( + self, + slice_start: int, + slice_stop: int, + n_points: int, + ) -> slice: + """ + + select the contiguous chunks of the volume data to read. + + Args: + n_volume_points: The number of points to sample from the volume. + + Returns: + A tuple of the start and stop indices of the contiguous chunks. + """ + + if slice_stop - slice_start < n_points: + raise ValueError( + f"Slice size {slice_stop - slice_start} is less than the number of points {n_points}" + ) + + # Choose a random start point that will fit the entire n_points region: + start = np.random.randint(slice_start, slice_stop - n_points) + return slice(start, start + n_points) + class NpyFileReader(BackendReader): """ @@ -178,6 +219,14 @@ def read_file_sharded( ) -> dict[str, ShardTensor]: pass + def set_volume_sampling_size(self, volume_sampling_size: int): + """ + This is not supported for npy files. + """ + raise NotImplementedError( + "volume sampling directly from disk is not supported for npy files." + ) + class NpzFileReader(BackendReader): """ @@ -202,7 +251,25 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: if len(keys_missing) > 0: raise ValueError(f"Keys {keys_missing} not found in file {filename}") - data = {key: torch.from_numpy(in_data[key][:]) for key in self.keys_to_read} + # Make sure to select the slice outside of the loop. + if self.volume_sampling_size is not None: + volume_slice = self.select_random_sections_from_slice( + 0, + in_data["volume_mesh_centers"].shape[0], + self.volume_sampling_size, + ) + else: + volume_slice = slice(0, in_data["volume_mesh_centers"].shape[0]) + + # This is a slower basic way to do this, to be improved: + data = {} + for key in self.keys_to_read: + if "volume" not in key: + data[key] = torch.from_numpy(in_data[key][:]) + else: + data[key] = torch.from_numpy(in_data[key][volume_slice]) + + # data = {key: torch.from_numpy(in_data[key][:]) for key in self.keys_to_read} return self.fill_optional_keys(data) @@ -211,6 +278,14 @@ def read_file_sharded( ) -> dict[str, ShardTensor]: pass + def set_volume_sampling_size(self, volume_sampling_size: int): + """ + This is not supported for npz files. + """ + raise NotImplementedError( + "volume sampling directly from disk is not supported for npz files." + ) + class ZarrFileReader(BackendReader): """ @@ -235,8 +310,23 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: if len(missing_keys) > 0: raise ValueError(f"Keys {missing_keys} not found in file {filename}") + # Make sure to select the slice outside of the loop. + if self.volume_sampling_size is not None: + volume_slice = self.select_random_sections_from_slice( + 0, + group["volume_mesh_centers"].shape[0], + self.volume_sampling_size, + ) + else: + volume_slice = slice(0, group["volume_mesh_centers"].shape[0]) + # This is a slower basic way to do this, to be improved: - data = {key: torch.from_numpy(group[key][:]) for key in self.keys_to_read} + data = {} + for key in self.keys_to_read: + if "volume" not in key: + data[key] = torch.from_numpy(group[key][:]) + else: + data[key] = torch.from_numpy(group[key][volume_slice]) return self.fill_optional_keys(data) @@ -436,6 +526,14 @@ def read_data_from_vtp(self, vtp_path: str) -> dict: raise NotImplementedError("Not implemented yet.") + def set_volume_sampling_size(self, volume_sampling_size: int): + """ + This is not supported for vtk files. + """ + raise NotImplementedError( + "volume sampling directly from disk is not supported for vtk files." + ) + if TENSORSTORE_AVAILABLE: @@ -452,7 +550,7 @@ def __init__( super().__init__(keys_to_read, keys_to_read_if_available) self.spec_template = { - "driver": "zarr2", + "driver": "auto", "kvstore": { "driver": "file", "path": None, @@ -463,6 +561,7 @@ def __init__( { "cache_pool": {"total_bytes_limit": 10_000_000}, "data_copy_concurrency": {"limit": 72}, + "file_io_concurrency": {"limit": 72}, } ) @@ -486,16 +585,31 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: key: read_futures[key].result() for key in read_futures.keys() } + # Make sure to select the slice outside of the loop. + # We need + if self.volume_sampling_size is not None: + volume_slice = self.select_random_sections_from_slice( + 0, + read_futures["volume_mesh_centers"].shape[0], + self.volume_sampling_size, + ) + else: + volume_slice = slice(0, read_futures["volume_mesh_centers"].shape[0]) + # Trigger an async read of each data item: # (Each item will be a numpy ndarray after this:) - read_futures = { - key: read_futures[key].read() for key in read_futures.keys() - } + tensor_futures = {} + for key in self.keys_to_read: + if "volume" not in key: + tensor_futures[key] = read_futures[key].read() + # For the volume data, read the slice: + else: + tensor_futures[key] = read_futures[key][volume_slice].read() # Convert them to torch tensors: # (make sure to block for the result) data = { - key: torch.as_tensor(read_futures[key].result(), dtype=torch.float32) + key: torch.as_tensor(tensor_futures[key].result(), dtype=torch.float32) for key in self.keys_to_read } @@ -844,35 +958,6 @@ def _convert_to_shard_tensors( return result - # result = {} - - # for key, tensor in tensors.items(): - # # Create a ShardTensor with whatever layout the data is actually in: - # st = ShardTensor.__new__( - # ShardTensor, - # local_tensor=tensor, - # spec=self.tensor_specs[key], - # requires_grad=False, # By default, the data pipe output doesn't need a grad. - # ) - - # # Find out the desired placement: - # if tensor.numel() > 1: - # if isinstance(self.placements, dict): - # target_placement = self.placements[key] - # else: - # target_placement = self.placements - # else: - # target_placement = (Replicate(),) - - # # Redistribute if necessary: - # # (Recall that this is one dimensional mesh only) - # if st._spec.placements[0] != target_placement[0]: - # st = st.redistribute(placements=target_placement) - - # result[key] = st - - # return result - def preload(self, idx: int) -> None: """ Asynchronously preload the data for the given index (up to CPU, not GPU). @@ -1013,6 +1098,17 @@ def __getitem__(self, idx: int) -> dict[str, torch.Tensor | ShardTensor]: return data + def set_volume_sampling_size(self, volume_sampling_size: int): + """ + Set the volume sampling size. When set, the readers will + assume the volumetric data is shuffled on disk and read only + contiguous chunks of the data up to the sampling size. + + Args: + volume_sampling_size: The total size of the volume sampling. + """ + self.file_reader.set_volume_sampling_size(volume_sampling_size) + def compute_mean_std_min_max( dataset: DrivaerMLDataset, field_keys: list[str], max_samples: int = 20 diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index fc3af36334..8b7a982142 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -679,7 +679,7 @@ def create_grid( ] # Combine them with meshgrid: - xv, yv, zv = torch.meshgrid(*dd) + xv, yv, zv = torch.meshgrid(*dd, indexing="ij") xv = xv.unsqueeze(-1) yv = yv.unsqueeze(-1) From 4c67de6d6a88bed33bbb44d5bb3d30d0c861c4e3 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 7 Oct 2025 19:19:46 +0000 Subject: [PATCH 67/98] Update scaling factor calculation and loading ... much simpler now. --- .../domino/src/benchmark_dataloader.py | 34 +++-------- .../domino/src/conf/config.yaml | 3 +- .../domino/src/inference_on_stl.py | 13 +--- .../external_aerodynamics/domino/src/train.py | 22 ++----- .../external_aerodynamics/domino/src/utils.py | 59 ++++++++++++++++++- 5 files changed, 75 insertions(+), 56 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index 345f78fa2c..e3f24a5fff 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -70,10 +70,12 @@ from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo import time -from utils import ScalingFactors, get_keys_to_read, coordinate_distributed_environment - -# Initialize NVML -nvmlInit() +from utils import ( + ScalingFactors, + get_keys_to_read, + coordinate_distributed_environment, + load_scaling_factors, +) from physicsnemo.utils.profiling import profile, Profiler @@ -96,7 +98,7 @@ def benchmark_io_epoch( start_time = time.perf_counter() for i_batch, sample_batched in enumerate(dataloader): # for key in sample_batched.keys(): - # print(f"{key}: {sample_batched[key].shape}") + # print(f"Key {key} shape: {sample_batched[key].shape} with mean {sample_batched[key].mean()} and std {sample_batched[key].std()} ") # Gather data and report elapsed_time = time.perf_counter() - start_time @@ -136,19 +138,7 @@ def main(cfg: DictConfig) -> None: ################################ # Get scaling factors ################################ - pickle_path = os.path.join(cfg.output) + "/scaling_factors/scaling_factors.pkl" - - try: - scaling_factors = ScalingFactors.load(pickle_path) - logger.info(f"Scaling factors loaded from: {pickle_path}") - except FileNotFoundError: - raise FileNotFoundError( - f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." - ) - - vol_factors = scaling_factors.mean["volume_fields"] - surf_factors = scaling_factors.mean["surface_fields"] - vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + vol_factors, surf_factors = load_scaling_factors(cfg) keys_to_read, keys_to_read_if_available = get_keys_to_read( cfg, model_type, get_ground_truth=True @@ -170,18 +160,12 @@ def main(cfg: DictConfig) -> None: train_dataset, num_replicas=data_mesh.size(), rank=data_mesh.get_local_rank() ) - # train_dataloader = DataLoader( - # train_dataset, - # sampler=train_sampler, - # **cfg.train.dataloader, - # ) - for epoch in range(0, cfg.train.epochs): start_time = time.perf_counter() logger.info(f"Device {dist.device}, epoch {epoch}:") train_sampler.set_epoch(epoch) - print(f"indices: {list(train_sampler)}") + train_dataset.dataset.set_indices(list(train_sampler)) epoch_start_time = time.perf_counter() diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index 26870b461f..9a404ff240 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -82,9 +82,10 @@ data: # Input directory for training and validation data gpu_preprocessing: true gpu_output: true normalize_coordinates: true - sample_in_bbox: true + sample_in_bbox: True sampling: true scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl + volume_sample_from_disk: true # ┌───────────────────────────────────────────┐ # │ Domain Parallelism Settings │ diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index bee8c1cd2f..9707a7e6d1 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -486,18 +486,7 @@ def main(cfg: DictConfig) -> None: # Get scaling factors # Likely, you want to reuse the scaling factors from training. ###################################################### - pickle_path = os.path.join(cfg.data.scaling_factors) - - try: - scaling_factors = ScalingFactors.load(pickle_path) - logger.info(f"Scaling factors loaded from: {pickle_path}") - except FileNotFoundError: - raise FileNotFoundError( - f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." - ) - - vol_factors = scaling_factors.mean["volume_fields"] - surf_factors = scaling_factors.mean["surface_fields"] + vol_factors, surf_factors = load_scaling_factors(cfg) ###################################################### # Configure the model diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 4bf52bfb2e..954114ae46 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -78,7 +78,7 @@ from loss import compute_loss_dict -from utils import get_num_vars +from utils import get_num_vars, load_scaling_factors def validation_step( @@ -275,19 +275,7 @@ def main(cfg: DictConfig) -> None: ###################################################### # Get scaling factors - precompute them if this fails! ###################################################### - pickle_path = os.path.join(cfg.data.scaling_factors) - - try: - scaling_factors = ScalingFactors.load(pickle_path) - logger.info(f"Scaling factors loaded from: {pickle_path}") - except FileNotFoundError: - raise FileNotFoundError( - f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." - ) - - vol_factors = np.asarray([scaling_factors.max_val["volume_fields"], scaling_factors.min_val["volume_fields"]]) - surf_factors = np.asarray([scaling_factors.max_val["surface_fields"], scaling_factors.min_val["surface_fields"]]) - vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + vol_factors, surf_factors = load_scaling_factors(cfg) ###################################################### # Configure the model @@ -334,7 +322,7 @@ def main(cfg: DictConfig) -> None: torch.from_numpy( np.stack([bounding_box["max"], bounding_box["min"]], axis=0) ) - .to(vol_factors_tensor.dtype) + .to(vol_factors.dtype) .to(dist.device) ) @@ -529,7 +517,7 @@ def main(cfg: DictConfig) -> None: first_deriv=first_deriv, eqn=eqn, bounding_box=bounding_box, - vol_factors=vol_factors_tensor, + vol_factors=vol_factors, add_physics_loss=add_physics_loss, ) epoch_end_time = time.perf_counter() @@ -553,7 +541,7 @@ def main(cfg: DictConfig) -> None: first_deriv=first_deriv, eqn=eqn, bounding_box=bounding_box, - vol_factors=vol_factors_tensor, + vol_factors=vol_factors, add_physics_loss=add_physics_loss, ) diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py index 12259641e3..4cc04d0d3c 100644 --- a/examples/cfd/external_aerodynamics/domino/src/utils.py +++ b/examples/cfd/external_aerodynamics/domino/src/utils.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os + from dataclasses import dataclass from typing import Dict, Optional, Any import numpy as np @@ -160,7 +162,8 @@ def coordinate_distributed_environment(cfg: DictConfig): placements: dict[str, torch.distributed.tensor.Placement]: The placements for the data set """ - DistributedManager.initialize() + if not DistributedManager.is_initialized(): + DistributedManager.initialize() dist = DistributedManager() # Default to no domain parallelism: @@ -287,3 +290,57 @@ def summary(self) -> str: summary.append(f" Max: {max_val}") return "\n".join(summary) + + +def load_scaling_factors( + cfg: DictConfig, logger=None +) -> tuple[torch.Tensor, torch.Tensor]: + """Load scaling factors from the configuration.""" + pickle_path = os.path.join(cfg.data.scaling_factors) + + try: + scaling_factors = ScalingFactors.load(pickle_path) + if logger is not None: + logger.info(f"Scaling factors loaded from: {pickle_path}") + except FileNotFoundError: + raise FileNotFoundError( + f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." + ) + + if cfg.model.normalization == "min_max_scaling": + vol_factors = np.asarray( + [ + scaling_factors.max_val["volume_fields"], + scaling_factors.min_val["volume_fields"], + ] + ) + surf_factors = np.asarray( + [ + scaling_factors.max_val["surface_fields"], + scaling_factors.min_val["surface_fields"], + ] + ) + elif cfg.model.normalization == "mean_std_scaling": + vol_factors = np.asarray( + [ + scaling_factors.mean["volume_fields"], + scaling_factors.std["volume_fields"], + ] + ) + surf_factors = np.asarray( + [ + scaling_factors.mean["surface_fields"], + scaling_factors.std["surface_fields"], + ] + ) + else: + raise ValueError(f"Invalid normalization mode: {cfg.model.normalization}") + + vol_factors_tensor = torch.from_numpy(vol_factors) + surf_factors_tensor = torch.from_numpy(surf_factors) + + dm = DistributedManager() + vol_factors_tensor = vol_factors_tensor.to(dm.device, dtype=torch.float32) + surf_factors_tensor = surf_factors_tensor.to(dm.device, dtype=torch.float32) + + return vol_factors_tensor, surf_factors_tensor From 316dfe61175b5f06605d9a1143c7d8f6190e5a74 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 8 Oct 2025 18:16:03 +0000 Subject: [PATCH 68/98] Fix volume encoding calculation. Make sure surface grid is normalized --- physicsnemo/datapipes/cae/domino_datapipe.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 9ee479223d..d72c0193f3 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -637,7 +637,7 @@ def process_volume( # Use the closest point from the mesh to compute the volume encodings: pos_normals_closest_vol, pos_normals_com_vol = self.calculate_volume_encoding( - c_min, c_max, volume_coordinates, sdf_node_closest_point, center_of_mass + volume_coordinates, sdf_node_closest_point, center_of_mass ) return_dict = { @@ -656,17 +656,10 @@ def process_volume( def calculate_volume_encoding( self, - c_min: torch.Tensor, - c_max: torch.Tensor, volume_coordinates: torch.Tensor, sdf_node_closest_point: torch.Tensor, center_of_mass: torch.Tensor, ): - if self.config.normalize_coordinates: - # volume_coordinates = normalize(volume_coordinates, c_max, c_min) - sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) - # center_of_mass = normalize(center_of_mass, c_max, c_min) - pos_normals_closest_vol = volume_coordinates - sdf_node_closest_point pos_normals_com_vol = volume_coordinates - center_of_mass @@ -730,8 +723,6 @@ def process_data(self, data_dict): requires_grad=False, ) - return_dict["surf_grid"] = surf_grid - # We always need to calculate the SDF on the surface grid: # This is for the SDF Later: if self.config.normalize_coordinates: @@ -740,6 +731,8 @@ def process_data(self, data_dict): else: normed_vertices = data_dict["stl_coordinates"] + return_dict["surf_grid"] = surf_grid + # For SDF calculations, make sure the mesh_indices_flattened is an integer array: mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) From cc9a56630dc68061aa768ecd6c368a63040b0f70 Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Wed, 8 Oct 2025 11:13:42 -0700 Subject: [PATCH 69/98] fixing bugs and refactoring test --- .../domino/src/compute_statistics.py | 1 + .../domino/src/conf/config.yaml | 9 +- .../external_aerodynamics/domino/src/test.py | 298 +++++++++--------- .../external_aerodynamics/domino/src/train.py | 4 + physicsnemo/datapipes/cae/domino_datapipe.py | 16 +- .../datapipes/cae/drivaer_ml_dataset.py | 4 +- 6 files changed, 175 insertions(+), 157 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py index d3516dff0f..991105492e 100644 --- a/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py +++ b/examples/cfd/external_aerodynamics/domino/src/compute_statistics.py @@ -104,6 +104,7 @@ def main(cfg: DictConfig) -> None: cfg=cfg, input_path=cfg.data.input_dir, target_keys=target_keys, + max_samples=cfg.data.max_samples_for_statistics, ) mean = {k: m.cpu().numpy() for k, m in mean.items()} std = {k: s.cpu().numpy() for k, s in std.items()} diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index 9a404ff240..13c00823b8 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -62,10 +62,10 @@ variables: global_parameters: inlet_velocity: type: vector - reference: [38.89] # vector [30, 0, 0] should be specified as [30], while [30, 30, 0] should be [30, 30]. + reference: [30.00] # vector [30, 0, 0] should be specified as [30], while [30, 30, 0] should be [30, 30]. air_density: type: scalar - reference: 1.0 + reference: 1.205 # ┌───────────────────────────────────────────┐ # │ Data Configs │ @@ -86,6 +86,7 @@ data: # Input directory for training and validation data sampling: true scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl volume_sample_from_disk: true + max_samples_for_statistics: 200 # ┌───────────────────────────────────────────┐ # │ Domain Parallelism Settings │ @@ -175,7 +176,7 @@ model: # └───────────────────────────────────────────┘ train: # Training configurable parameters epochs: 1000 - checkpoint_interval: 50 + checkpoint_interval: 2 dataloader: batch_size: 1 preload_depth: 1 @@ -206,7 +207,7 @@ eval: # Testing configurable parameters test_path: /user/testing_data # Dir for testing data in raw format (vtp, vtu ,stls) save_path: /user/predicted_data # Dir to save predicted results in raw format (vtp, vtu) checkpoint_name: DoMINO.0.455.pt # Name of checkpoint to select from saved checkpoints - scaling_param_path: /user/scaling_params + scaling_param_path: /lustre/rranade/modulus_dev/corey_fork/physicsnemo/examples/cfd/external_aerodynamics/domino/outputs/DrivAerML_Dataset/ refine_stl: False # Automatically refine STL during inference #TODO - This was hardcoded anyways, remove it. # stencil_size: 7 # Stencil size for evaluating surface and volume model diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py index c799e83f64..e5423207a6 100644 --- a/examples/cfd/external_aerodynamics/domino/src/test.py +++ b/examples/cfd/external_aerodynamics/domino/src/test.py @@ -54,7 +54,10 @@ from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataPipe from physicsnemo.models.domino.model import DoMINO from physicsnemo.utils.domino.utils import * +from physicsnemo.utils.domino.vtk_file_utils import * from physicsnemo.utils.sdf import signed_distance_field +from physicsnemo.utils.neighbors import knn +from utils import ScalingFactors # AIR_DENSITY = 1.205 # STREAM_VELOCITY = 30.00 @@ -84,7 +87,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): with torch.no_grad(): point_batch_size = 256000 - data_dict = dict_to_device(data_dict, device) + # data_dict = dict_to_device(data_dict, device) # Non-dimensionalization factors length_scale = data_dict["length_scale"] @@ -110,11 +113,14 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): p_grid = data_dict["grid"] sdf_grid = data_dict["sdf_grid"] # Scaling factors - vol_max = data_dict["volume_min_max"][:, 1] - vol_min = data_dict["volume_min_max"][:, 0] + if "volume_min_max" in data_dict.keys(): + vol_max = data_dict["volume_min_max"][:, 1] + vol_min = data_dict["volume_min_max"][:, 0] + geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 + else: + geo_centers_vol = geo_centers # Normalize based on computational domain - geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 encoding_g_vol = model.geo_rep_volume(geo_centers_vol, p_grid, sdf_grid) if output_features_surf is not None: @@ -147,7 +153,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): pos_volume_center_of_mass = data_dict["pos_volume_center_of_mass"] p_grid = data_dict["grid"] - prediction_vol = np.zeros_like(target_vol.cpu().numpy()) + prediction_vol = torch.zeros_like(target_vol) num_points = volume_mesh_centers.shape[1] subdomain_points = int(np.floor(num_points / point_batch_size)) @@ -166,14 +172,13 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): pos_normals_com_batch = pos_volume_center_of_mass[ :, start_idx:end_idx ] - geo_encoding_local = model.geo_encoding_local( + geo_encoding_local = model.volume_local_geo_encodings( 0.5 * encoding_g_vol, volume_mesh_centers_batch, p_grid, - mode="volume", ) if cfg.model.use_sdf_in_basis_func: - pos_encoding = torch.cat( + pos_encoding_all = torch.cat( ( sdf_nodes_batch, pos_volume_closest_batch, @@ -182,38 +187,36 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): axis=-1, ) else: - pos_encoding = pos_normals_com_batch - pos_encoding = model.position_encoder( - pos_encoding, eval_mode="volume" + pos_encoding_all = pos_normals_com_batch + + pos_encoding = model.fc_p_vol( + pos_encoding_all ) - tpredictions_batch = model.calculate_solution( + tpredictions_batch = model.solution_calculator_vol( volume_mesh_centers_batch, geo_encoding_local, pos_encoding, global_params_values, global_params_reference, - num_sample_points=cfg.model.num_neighbors_volume, - eval_mode="volume", ) running_tloss_vol += loss_fn(tpredictions_batch, target_batch) - prediction_vol[:, start_idx:end_idx] = ( - tpredictions_batch.cpu().numpy() - ) + prediction_vol[:, start_idx:end_idx] = tpredictions_batch prediction_vol = unnormalize(prediction_vol, vol_factors[0], vol_factors[1]) + # print(np.amax(prediction_vol, axis=(0, 1)), np.amin(prediction_vol, axis=(0, 1))) prediction_vol[:, :, :3] = ( - prediction_vol[:, :, :3] * stream_velocity[0, 0].cpu().numpy() + prediction_vol[:, :, :3] * stream_velocity[0, 0] ) prediction_vol[:, :, 3] = ( prediction_vol[:, :, 3] - * stream_velocity[0, 0].cpu().numpy() ** 2.0 - * air_density[0, 0].cpu().numpy() + * stream_velocity[0, 0] ** 2.0 + * air_density[0, 0] ) prediction_vol[:, :, 4] = ( prediction_vol[:, :, 4] - * stream_velocity[0, 0].cpu().numpy() - * length_scale[0].cpu().numpy() + * stream_velocity[0, 0] + * length_scale[0] ) else: prediction_vol = None @@ -236,7 +239,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): subdomain_points = int(np.floor(num_points / point_batch_size)) target_surf = data_dict["surface_fields"] - prediction_surf = np.zeros_like(target_surf.cpu().numpy()) + prediction_surf = torch.zeros_like(target_surf) start_time = time.time() @@ -262,18 +265,16 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): pos_surface_center_of_mass_batch = pos_surface_center_of_mass[ :, start_idx:end_idx ] - geo_encoding_local = model.geo_encoding_local( + geo_encoding_local = model.surface_local_geo_encodings( 0.5 * encoding_g_surf, surface_mesh_centers_batch, s_grid, - mode="surface", ) - pos_encoding = pos_surface_center_of_mass_batch - pos_encoding = model.position_encoder( - pos_encoding, eval_mode="surface" + pos_encoding = model.fc_p_surf( + pos_surface_center_of_mass_batch ) - tpredictions_batch = model.calculate_solution_with_neighbors( + tpredictions_batch = model.solution_calculator_surf( surface_mesh_centers_batch, geo_encoding_local, pos_encoding, @@ -284,20 +285,16 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): surface_neighbors_areas_batch, global_params_values, global_params_reference, - num_sample_points=cfg.model.num_neighbors_surface, ) running_tloss_surf += loss_fn(tpredictions_batch, target_batch) - prediction_surf[:, start_idx:end_idx] = ( - tpredictions_batch.cpu().numpy() - ) + prediction_surf[:, start_idx:end_idx] = tpredictions_batch prediction_surf = ( unnormalize(prediction_surf, surf_factors[0], surf_factors[1]) - * stream_velocity[0, 0].cpu().numpy() ** 2.0 - * air_density[0, 0].cpu().numpy() + * stream_velocity[0, 0] ** 2.0 + * air_density[0, 0] ) - else: prediction_surf = None @@ -346,22 +343,26 @@ def main(cfg: DictConfig): else: global_features += 1 - vol_save_path = os.path.join( - cfg.eval.scaling_param_path, "volume_scaling_factors.npy" - ) - surf_save_path = os.path.join( - cfg.eval.scaling_param_path, "surface_scaling_factors.npy" - ) - if os.path.exists(vol_save_path): - vol_factors = np.load(vol_save_path) - else: - vol_factors = None + ###################################################### + # Get scaling factors - precompute them if this fails! + ###################################################### + pickle_path = os.path.join(cfg.data.scaling_factors) - if os.path.exists(surf_save_path): - surf_factors = np.load(surf_save_path) - else: - surf_factors = None + try: + scaling_factors = ScalingFactors.load(pickle_path) + except FileNotFoundError: + raise FileNotFoundError( + f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." + ) + # vol_factors = np.asarray([scaling_factors.max_val["volume_fields"], scaling_factors.min_val["volume_fields"]]) + # surf_factors = np.asarray([scaling_factors.max_val["surface_fields"], scaling_factors.min_val["surface_fields"]]) + + vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]]) + surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]]) + + vol_factors = torch.from_numpy(vol_factors).to(dist.device) + surf_factors = torch.from_numpy(surf_factors).to(dist.device) print("Vol factors:", vol_factors) print("Surf factors:", surf_factors) @@ -434,6 +435,12 @@ def main(cfg: DictConfig): stl_sizes = np.array(stl_sizes.cell_data["Area"], dtype=np.float32) stl_centers = np.array(mesh_stl.cell_centers().points, dtype=np.float32) + # Convert to torch tensors and load on device + stl_vertices = torch.from_numpy(stl_vertices).to(torch.float32).to(dist.device) + stl_sizes = torch.from_numpy(stl_sizes).to(torch.float32).to(dist.device) + stl_centers = torch.from_numpy(stl_centers).to(torch.float32).to(dist.device) + mesh_indices_flattened = torch.from_numpy(mesh_indices_flattened).to(torch.int32).to(dist.device) + # Center of mass calculation center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes) @@ -446,27 +453,28 @@ def main(cfg: DictConfig): bounding_box_dims_surf.append(np.asarray(cfg.data.bounding_box_surface.min)) s_max = np.float32(bounding_box_dims_surf[0]) s_min = np.float32(bounding_box_dims_surf[1]) + s_max = torch.from_numpy(s_max).to(torch.float32).to(dist.device) + s_min = torch.from_numpy(s_min).to(torch.float32).to(dist.device) nx, ny, nz = cfg.model.interp_res - surf_grid = create_grid(s_max, s_min, [nx, ny, nz]) - surf_grid_reshaped = surf_grid.reshape(nx * ny * nz, 3) + surf_grid = create_grid(s_max, s_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device)) + + normed_stl_vertices_cp = normalize(stl_vertices, s_max, s_min) + surf_grid_normed = normalize(surf_grid, s_max, s_min) # SDF calculation on the grid using WARP + time_start = time.time() sdf_surf_grid = signed_distance_field( - cp.asarray(stl_vertices).astype(cp.float32), - cp.asarray(mesh_indices_flattened).astype(cp.int32), - cp.asarray(surf_grid_reshaped).astype(cp.float32), + normed_stl_vertices_cp, + mesh_indices_flattened, + surf_grid_normed, use_sign_winding_number=True, - return_cupy=False, - ).reshape(nx, ny, nz) - - surf_grid = np.float32(surf_grid) - sdf_surf_grid = np.float32(sdf_surf_grid) - surf_grid_max_min = np.float32(np.asarray([s_min, s_max])) - if cfg.model.normalize_coordinates: - sdf_surf_grid = normalize(sdf_surf_grid, xp.amax(surf_grid), xp.amin(surf_grid)) - + ) + sdf_surf_grid = sdf_surf_grid[0] + + surf_grid_max_min = torch.stack([s_min, s_max]) + # Get global parameters and global parameters scaling from config.yaml global_params_names = list(cfg.variables.global_parameters.keys()) global_params_reference = { @@ -494,6 +502,7 @@ def main(cfg: DictConfig): global_params_reference = np.array( global_params_reference_list, dtype=np.float32 ) + global_params_reference = torch.from_numpy(global_params_reference).to(dist.device) # Define the list of global parameter values for each simulation. # Note: The user must ensure that the values provided here correspond to the @@ -509,7 +518,10 @@ def main(cfg: DictConfig): raise ValueError( f"Global parameter {key} not supported for this dataset" ) - global_params_values = np.array(global_params_values_list, dtype=np.float32) + global_params_values_list = np.array( + global_params_values_list, dtype=np.float32 + ) + global_params_values = torch.from_numpy(global_params_values_list).to(dist.device) # Read VTP if model_type == "surface" or model_type == "combined": @@ -537,18 +549,17 @@ def main(cfg: DictConfig): surface_normals = ( surface_normals / np.linalg.norm(surface_normals, axis=1)[:, np.newaxis] ) - - if cfg.model.normalize_coordinates: - surface_coordinates = normalize(surface_coordinates, s_max, s_min) - surf_grid = normalize(surf_grid, s_max, s_min) - center_of_mass_normalized = normalize(center_of_mass, s_max, s_min) - else: - center_of_mass_normalized = center_of_mass + surface_coordinates = torch.from_numpy(surface_coordinates).to(torch.float32).to(dist.device) + surface_normals = torch.from_numpy(surface_normals).to(torch.float32).to(dist.device) + surface_sizes = torch.from_numpy(surface_sizes).to(torch.float32).to(dist.device) if cfg.model.num_neighbors_surface > 1: - interp_func = KDTree(surface_coordinates) - dd, ii = interp_func.query( - surface_coordinates, k=cfg.model.num_neighbors_surface + + time_start = time.time() + ii, dd = knn( + points=surface_coordinates, + queries=surface_coordinates, + k=cfg.model.num_neighbors_surface, ) surface_neighbors = surface_coordinates[ii] @@ -563,12 +574,15 @@ def main(cfg: DictConfig): surface_neighbors_normals = surface_normals surface_neighbors_sizes = surface_sizes - + if cfg.data.normalize_coordinates: + surface_coordinates = normalize(surface_coordinates, s_max, s_min) + surf_grid = normalize(surf_grid, s_max, s_min) + center_of_mass_normalized = normalize(center_of_mass, s_max, s_min) + surface_neighbors = normalize(surface_neighbors, s_max, s_min) + else: + center_of_mass_normalized = center_of_mass pos_surface_center_of_mass = surface_coordinates - center_of_mass_normalized - # surface_coordinates = normalize(surface_coordinates, s_max, s_min) - # surface_neighbors = normalize(surface_neighbors, s_max, s_min) - else: surface_coordinates = None surface_fields = None @@ -589,13 +603,13 @@ def main(cfg: DictConfig): polydata_vol, volume_variable_names ) volume_fields = np.concatenate(volume_fields, axis=-1) + volume_coordinates = torch.from_numpy(volume_coordinates).to(torch.float32).to(dist.device) + volume_fields = torch.from_numpy(volume_fields).to(torch.float32).to(dist.device) bounding_box_dims = [] bounding_box_dims.append(np.asarray(cfg.data.bounding_box.max)) bounding_box_dims.append(np.asarray(cfg.data.bounding_box.min)) - v_max = np.amax(volume_coordinates, 0) - v_min = np.amin(volume_coordinates, 0) if bounding_box_dims is None: c_max = s_max + (s_max - s_min) / 2 c_min = s_min - (s_max - s_min) / 2 @@ -603,46 +617,50 @@ def main(cfg: DictConfig): else: c_max = np.float32(bounding_box_dims[0]) c_min = np.float32(bounding_box_dims[1]) + c_max = torch.from_numpy(c_max).to(dist.device) + c_min = torch.from_numpy(c_min).to(dist.device) # Generate a grid of specified resolution to map the bounding box # The grid is used for capturing structured geometry features and SDF representation of geometry - grid = create_grid(c_max, c_min, [nx, ny, nz]) - grid_reshaped = grid.reshape(nx * ny * nz, 3) + grid = create_grid(c_max, c_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device)) + + if cfg.data.normalize_coordinates: + volume_coordinates = normalize(volume_coordinates, c_max, c_min) + grid = normalize(grid, c_max, c_min) + center_of_mass_normalized = normalize(center_of_mass, c_max, c_min) + normed_stl_vertices_vol = normalize(stl_vertices, c_max, c_min) + else: + center_of_mass_normalized = center_of_mass # SDF calculation on the grid using WARP + time_start = time.time() sdf_grid = signed_distance_field( - cp.asarray(stl_vertices).astype(cp.float32), - cp.asarray(mesh_indices_flattened).astype(cp.int32), - cp.asarray(grid_reshaped).astype(cp.float32), + normed_stl_vertices_vol, + mesh_indices_flattened, + grid, use_sign_winding_number=True, - return_cupy=False, - ).reshape(nx, ny, nz) - + ) + sdf_grid = sdf_grid[0] + # SDF calculation + time_start = time.time() sdf_nodes, sdf_node_closest_point = signed_distance_field( - cp.asarray(stl_vertices).astype(cp.float32), - cp.asarray(mesh_indices_flattened).astype(cp.int32), - cp.asarray(volume_coordinates).astype(cp.float32), - include_hit_points=True, + normed_stl_vertices_vol, + mesh_indices_flattened, + volume_coordinates, use_sign_winding_number=True, - return_cupy=False, ) sdf_nodes = sdf_nodes.reshape(-1, 1) vol_grid_max_min = np.asarray([c_min, c_max]) - if cfg.model.normalize_coordinates: - volume_coordinates = normalize(volume_coordinates, c_max, c_min) - grid = normalize(grid, c_max, c_min) - center_of_mass_normalized = normalize(center_of_mass, c_max, c_min) - sdf_grid = normalize(sdf_grid, xp.amax(grid), xp.amin(grid)) - sdf_nodes = normalize(sdf_nodes, xp.amax(grid), xp.amin(grid)) + if cfg.data.normalize_coordinates: sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) else: - center_of_mass_normalized = center_of_mass + sdf_node_closest_point = sdf_node_closest_point pos_volume_closest = volume_coordinates - sdf_node_closest_point pos_volume_center_of_mass = volume_coordinates - center_of_mass_normalized - + else: volume_coordinates = None volume_fields = None @@ -651,7 +669,8 @@ def main(cfg: DictConfig): # print(f"Processed sdf and normalized") - geom_centers = np.float32(stl_vertices) + geom_centers = stl_vertices + # print(f"Geom centers max: {np.amax(geom_centers, axis=0)}, min: {np.amin(geom_centers, axis=0)}") if model_type == "combined": # Add the parameters to the dictionary @@ -676,35 +695,27 @@ def main(cfg: DictConfig): "surface_fields": surface_fields, "volume_min_max": vol_grid_max_min, "surface_min_max": surf_grid_max_min, - "length_scale": np.array(length_scale, dtype=np.float32), - "global_params_values": np.expand_dims( - np.array(global_params_values, dtype=np.float32), -1 - ), - "global_params_reference": np.expand_dims( - np.array(global_params_reference, dtype=np.float32), -1 - ), + "length_scale": length_scale, + "global_params_values": torch.unsqueeze(global_params_values, -1), + "global_params_reference": torch.unsqueeze(global_params_reference, -1), } elif model_type == "surface": data_dict = { - "pos_surface_center_of_mass": np.float32(pos_surface_center_of_mass), - "geometry_coordinates": np.float32(geom_centers), - "surf_grid": np.float32(surf_grid), - "sdf_surf_grid": np.float32(sdf_surf_grid), - "surface_mesh_centers": np.float32(surface_coordinates), - "surface_mesh_neighbors": np.float32(surface_neighbors), - "surface_normals": np.float32(surface_normals), - "surface_neighbors_normals": np.float32(surface_neighbors_normals), - "surface_areas": np.float32(surface_sizes), - "surface_neighbors_areas": np.float32(surface_neighbors_sizes), - "surface_fields": np.float32(surface_fields), - "surface_min_max": np.float32(surf_grid_max_min), - "length_scale": np.array(length_scale, dtype=np.float32), - "global_params_values": np.expand_dims( - np.array(global_params_values, dtype=np.float32), -1 - ), - "global_params_reference": np.expand_dims( - np.array(global_params_reference, dtype=np.float32), -1 - ), + "pos_surface_center_of_mass": pos_surface_center_of_mass, + "geometry_coordinates": geom_centers, + "surf_grid": surf_grid, + "sdf_surf_grid": sdf_surf_grid, + "surface_mesh_centers": surface_coordinates, + "surface_mesh_neighbors": surface_neighbors, + "surface_normals": surface_normals, + "surface_neighbors_normals": surface_neighbors_normals, + "surface_areas": surface_sizes, + "surface_neighbors_areas": surface_neighbors_sizes, + "surface_fields": surface_fields, + "surface_min_max": surf_grid_max_min, + "length_scale": length_scale, + "global_params_values": torch.unsqueeze(global_params_values, -1), + "global_params_reference": torch.unsqueeze(global_params_reference, -1), } elif model_type == "volume": data_dict = { @@ -720,17 +731,13 @@ def main(cfg: DictConfig): "volume_mesh_centers": volume_coordinates, "volume_min_max": vol_grid_max_min, "surface_min_max": surf_grid_max_min, - "length_scale": np.array(length_scale, dtype=np.float32), - "global_params_values": np.expand_dims( - np.array(global_params_values, dtype=np.float32), -1 - ), - "global_params_reference": np.expand_dims( - np.array(global_params_reference, dtype=np.float32), -1 - ), + "length_scale": length_scale, + "global_params_values": torch.unsqueeze(global_params_values, -1), + "global_params_reference": torch.unsqueeze(global_params_reference, -1), } data_dict = { - key: torch.from_numpy(np.expand_dims(np.float32(value), 0)) + key: torch.unsqueeze(value, 0) for key, value in data_dict.items() } @@ -807,7 +814,7 @@ def main(cfg: DictConfig): prediction_vol = prediction_vol[0] c_min = vol_grid_max_min[0] c_max = vol_grid_max_min[1] - volume_coordinates = unnormalize(volume_coordinates, c_max, c_min) + volume_coordinates = unnormalize_np(volume_coordinates, c_max, c_min) ids_in_bbox = np.where( (volume_coordinates[:, 0] < c_min[0]) | (volume_coordinates[:, 0] > c_max[0]) @@ -827,31 +834,32 @@ def main(cfg: DictConfig): ) l2_volume_all.append(np.sqrt(l2_error) / np.sqrt(l2_gt)) + # import pdb; pdb.set_trace() if prediction_surf is not None: - surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 0:1]) + surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 0:1].cpu().numpy()) surfParam_vtk.SetName(f"{surface_variable_names[0]}Pred") celldata_all.GetCellData().AddArray(surfParam_vtk) - surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 1:]) + surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 1:].cpu().numpy()) surfParam_vtk.SetName(f"{surface_variable_names[1]}Pred") celldata_all.GetCellData().AddArray(surfParam_vtk) - write_to_vtp(celldata_all, vtp_pred_save_path) + # write_to_vtp(celldata_all, vtp_pred_save_path) if prediction_vol is not None: - volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 0:3]) + volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 0:3].cpu().numpy()) volParam_vtk.SetName(f"{volume_variable_names[0]}Pred") polydata_vol.GetPointData().AddArray(volParam_vtk) - volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 3:4]) + volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 3:4].cpu().numpy()) volParam_vtk.SetName(f"{volume_variable_names[1]}Pred") polydata_vol.GetPointData().AddArray(volParam_vtk) - volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 4:5]) + volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 4:5].cpu().numpy()) volParam_vtk.SetName(f"{volume_variable_names[2]}Pred") polydata_vol.GetPointData().AddArray(volParam_vtk) - write_to_vtu(polydata_vol, vtu_pred_save_path) + # write_to_vtu(polydata_vol, vtu_pred_save_path) l2_surface_all = np.asarray(l2_surface_all) # num_files, 4 l2_volume_all = np.asarray(l2_volume_all) # num_files, 4 diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 954114ae46..eb6331331e 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -276,6 +276,10 @@ def main(cfg: DictConfig) -> None: # Get scaling factors - precompute them if this fails! ###################################################### vol_factors, surf_factors = load_scaling_factors(cfg) + + vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]]) + surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]]) + vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) ###################################################### # Configure the model diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index d72c0193f3..5902b9d857 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -481,7 +481,7 @@ def process_surface( queries=surface_coordinates, k=self.config.num_surface_neighbors, ) - + # print(f"Full surface coordinates shape: {full_surface_coordinates.shape}") # Pull out the neighbor elements. # Note that `neighbor_indices` is the index into the original, # full sized tensors (full_surface_coordinates, etc). @@ -495,10 +495,8 @@ def process_surface( # Better to normalize everything after the kNN and sampling if self.config.normalize_coordinates: - # surf_grid = normalize(surf_grid, s_max, s_min) surface_coordinates = normalize(surface_coordinates, s_max, s_min) surface_neighbors = normalize(surface_neighbors, s_max, s_min) - # Make sure to normalize the center of mass for the normals_com_surface calc center_of_mass = normalize(center_of_mass, s_max, s_min) pos_normals_com_surface = surface_coordinates - center_of_mass @@ -595,13 +593,12 @@ def process_volume( if self.config.normalize_coordinates: volume_coordinates = normalize(volume_coordinates, c_max, c_min) grid = normalize(volume_grid, c_max, c_min) - # This is used later in the SDF, apply the same scaling to the mesh - # coordinates: normed_vertices = normalize(stl_vertices, c_max, c_min) center_of_mass = normalize(center_of_mass, c_max, c_min) else: grid = volume_grid normed_vertices = stl_vertices + center_of_mass = center_of_mass ######################################################################## # Apply scaling to the targets, if desired: @@ -744,7 +741,8 @@ def process_data(self, data_dict): use_sign_winding_number=True, ) return_dict["sdf_surf_grid"] = sdf_surf_grid - + return_dict["surf_grid"] = surf_grid + # Store this only if normalization is active: if self.config.normalize_coordinates: return_dict["surface_min_max"] = torch.stack([s_min, s_max]) @@ -810,6 +808,9 @@ def process_data(self, data_dict): return_dict.update(surface_dict) + for key, value in return_dict.items(): + print(f"Key: {key}, Max: {torch.amax(value)}, Min: {torch.amin(value)}") + exit() return return_dict def scale_model_targets( @@ -932,7 +933,7 @@ def __iter__(self): def compute_scaling_factors( - cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None + cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None, max_samples=20, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ Using the dataset at the path, compute the mean, std, min, and max of the target keys. @@ -956,6 +957,7 @@ def compute_scaling_factors( mean, std, min_val, max_val = compute_mean_std_min_max( dataset, field_keys=target_keys, + max_samples=max_samples, ) return mean, std, min_val, max_val diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index faa62f6ed6..c4bd4b8590 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -1177,9 +1177,11 @@ def compute_mean_std_min_max( # Update min/max batch_min = field_data.amin(dim=(0)) batch_max = field_data.amax(dim=(0)) + min_val[field_key] = torch.minimum(min_val[field_key], batch_min) - max_val[field_key] = torch.maximum(max_val[field_key], batch_max) + max_val[field_key] = torch.maximum(max_val[field_key], batch_max) + # Update running mean and M2 (Welford's algorithm) delta = batch_mean - mean[field_key] N[field_key] += batch_n # batch_n should also be torch.int64 From 961d4ba8d83d66af079c95c556848c2f445be650 Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Wed, 8 Oct 2025 11:37:38 -0700 Subject: [PATCH 70/98] remove print command --- physicsnemo/datapipes/cae/domino_datapipe.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 5902b9d857..1a8e7e6164 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -808,9 +808,6 @@ def process_data(self, data_dict): return_dict.update(surface_dict) - for key, value in return_dict.items(): - print(f"Key: {key}, Max: {torch.amax(value)}, Min: {torch.amin(value)}") - exit() return return_dict def scale_model_targets( From 14be02f71f5dd7ea0f05cbf19986e63d6a1fa895 Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Wed, 8 Oct 2025 12:24:31 -0700 Subject: [PATCH 71/98] fixing issues in test --- examples/cfd/external_aerodynamics/domino/src/test.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py index e5423207a6..cbdd6ca4ba 100644 --- a/examples/cfd/external_aerodynamics/domino/src/test.py +++ b/examples/cfd/external_aerodynamics/domino/src/test.py @@ -430,7 +430,8 @@ def main(cfg: DictConfig): :, 1: ] # Assuming triangular elements mesh_indices_flattened = stl_faces.flatten() - length_scale = np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)) + length_scale = np.array(np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)), dtype=np.float32) + length_scale = torch.from_numpy(length_scale).to(torch.float32).to(dist.device) stl_sizes = mesh_stl.compute_cell_sizes(length=False, area=True, volume=False) stl_sizes = np.array(stl_sizes.cell_data["Area"], dtype=np.float32) stl_centers = np.array(mesh_stl.cell_centers().points, dtype=np.float32) @@ -552,6 +553,7 @@ def main(cfg: DictConfig): surface_coordinates = torch.from_numpy(surface_coordinates).to(torch.float32).to(dist.device) surface_normals = torch.from_numpy(surface_normals).to(torch.float32).to(dist.device) surface_sizes = torch.from_numpy(surface_sizes).to(torch.float32).to(dist.device) + surface_fields = torch.from_numpy(surface_fields).to(torch.float32).to(dist.device) if cfg.model.num_neighbors_surface > 1: @@ -651,7 +653,7 @@ def main(cfg: DictConfig): use_sign_winding_number=True, ) sdf_nodes = sdf_nodes.reshape(-1, 1) - vol_grid_max_min = np.asarray([c_min, c_max]) + vol_grid_max_min = torch.stack([c_min, c_max]) if cfg.data.normalize_coordinates: sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) @@ -844,7 +846,7 @@ def main(cfg: DictConfig): surfParam_vtk.SetName(f"{surface_variable_names[1]}Pred") celldata_all.GetCellData().AddArray(surfParam_vtk) - # write_to_vtp(celldata_all, vtp_pred_save_path) + write_to_vtp(celldata_all, vtp_pred_save_path) if prediction_vol is not None: volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 0:3].cpu().numpy()) @@ -859,7 +861,7 @@ def main(cfg: DictConfig): volParam_vtk.SetName(f"{volume_variable_names[2]}Pred") polydata_vol.GetPointData().AddArray(volParam_vtk) - # write_to_vtu(polydata_vol, vtu_pred_save_path) + write_to_vtu(polydata_vol, vtu_pred_save_path) l2_surface_all = np.asarray(l2_surface_all) # num_files, 4 l2_volume_all = np.asarray(l2_volume_all) # num_files, 4 From eb62dce25d3a79c2ad6323f139d0f6256d0e61af Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Wed, 8 Oct 2025 12:52:41 -0700 Subject: [PATCH 72/98] fixing errors in test.py --- .../external_aerodynamics/domino/src/test.py | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py index cbdd6ca4ba..93fd16c25c 100644 --- a/examples/cfd/external_aerodynamics/domino/src/test.py +++ b/examples/cfd/external_aerodynamics/domino/src/test.py @@ -748,41 +748,41 @@ def main(cfg: DictConfig): ) if prediction_surf is not None: - surface_sizes = np.expand_dims(surface_sizes, -1) + surface_sizes = torch.unsqueeze(surface_sizes, -1) - pres_x_pred = np.sum( + pres_x_pred = torch.sum( prediction_surf[0, :, 0] * surface_normals[:, 0] * surface_sizes[:, 0] ) - shear_x_pred = np.sum(prediction_surf[0, :, 1] * surface_sizes[:, 0]) + shear_x_pred = torch.sum(prediction_surf[0, :, 1] * surface_sizes[:, 0]) - pres_x_true = np.sum( + pres_x_true = torch.sum( surface_fields[:, 0] * surface_normals[:, 0] * surface_sizes[:, 0] ) - shear_x_true = np.sum(surface_fields[:, 1] * surface_sizes[:, 0]) + shear_x_true = torch.sum(surface_fields[:, 1] * surface_sizes[:, 0]) - force_x_pred = np.sum( + force_x_pred = torch.sum( prediction_surf[0, :, 0] * surface_normals[:, 0] * surface_sizes[:, 0] - prediction_surf[0, :, 1] * surface_sizes[:, 0] ) - force_x_true = np.sum( + force_x_true = torch.sum( surface_fields[:, 0] * surface_normals[:, 0] * surface_sizes[:, 0] - surface_fields[:, 1] * surface_sizes[:, 0] ) - force_y_pred = np.sum( + force_y_pred = torch.sum( prediction_surf[0, :, 0] * surface_normals[:, 1] * surface_sizes[:, 0] - prediction_surf[0, :, 2] * surface_sizes[:, 0] ) - force_y_true = np.sum( + force_y_true = torch.sum( surface_fields[:, 0] * surface_normals[:, 1] * surface_sizes[:, 0] - surface_fields[:, 2] * surface_sizes[:, 0] ) - force_z_pred = np.sum( + force_z_pred = torch.sum( prediction_surf[0, :, 0] * surface_normals[:, 2] * surface_sizes[:, 0] - prediction_surf[0, :, 3] * surface_sizes[:, 0] ) - force_z_true = np.sum( + force_z_true = torch.sum( surface_fields[:, 0] * surface_normals[:, 2] * surface_sizes[:, 0] - surface_fields[:, 3] * surface_sizes[:, 0] ) @@ -801,14 +801,14 @@ def main(cfg: DictConfig): ] ) - l2_gt = np.mean(np.square(surface_fields), (0)) - l2_error = np.mean(np.square(prediction_surf[0] - surface_fields), (0)) - l2_surface_all.append(np.sqrt(l2_error / l2_gt)) + l2_gt = torch.mean(torch.square(surface_fields), (0)) + l2_error = torch.mean(torch.square(prediction_surf[0] - surface_fields), (0)) + l2_surface_all.append(torch.sqrt(l2_error / l2_gt)) print( "Surface L-2 norm:", dirname, - np.sqrt(l2_error) / np.sqrt(l2_gt), + torch.sqrt(l2_error) / torch.sqrt(l2_gt), ) if prediction_vol is not None: @@ -816,8 +816,8 @@ def main(cfg: DictConfig): prediction_vol = prediction_vol[0] c_min = vol_grid_max_min[0] c_max = vol_grid_max_min[1] - volume_coordinates = unnormalize_np(volume_coordinates, c_max, c_min) - ids_in_bbox = np.where( + volume_coordinates = unnormalize(volume_coordinates, c_max, c_min) + ids_in_bbox = torch.where( (volume_coordinates[:, 0] < c_min[0]) | (volume_coordinates[:, 0] > c_max[0]) | (volume_coordinates[:, 1] < c_min[1]) @@ -827,14 +827,14 @@ def main(cfg: DictConfig): ) target_vol[ids_in_bbox] = 0.0 prediction_vol[ids_in_bbox] = 0.0 - l2_gt = np.mean(np.square(target_vol), (0)) - l2_error = np.mean(np.square(prediction_vol - target_vol), (0)) + l2_gt = torch.mean(torch.square(target_vol), (0)) + l2_error = torch.mean(torch.square(prediction_vol - target_vol), (0)) print( "Volume L-2 norm:", dirname, - np.sqrt(l2_error) / np.sqrt(l2_gt), + torch.sqrt(l2_error) / torch.sqrt(l2_gt), ) - l2_volume_all.append(np.sqrt(l2_error) / np.sqrt(l2_gt)) + l2_volume_all.append(torch.sqrt(l2_error) / torch.sqrt(l2_gt)) # import pdb; pdb.set_trace() if prediction_surf is not None: From bac53655a2a37c413cfbb10497700b2bdac4abcd Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:54:50 -0700 Subject: [PATCH 73/98] Update volumetric sub sampling so that it is more robust when not reading volume data. --- .../datapipes/cae/drivaer_ml_dataset.py | 55 +++++++++++-------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py index c4bd4b8590..6d46a40879 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/drivaer_ml_dataset.py @@ -80,6 +80,8 @@ def __init__( self.volume_sampling_size = None + self.is_volumetric = any(["volume" in key for key in self.keys_to_read]) + @abstractmethod def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: """ @@ -252,14 +254,15 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: raise ValueError(f"Keys {keys_missing} not found in file {filename}") # Make sure to select the slice outside of the loop. - if self.volume_sampling_size is not None: - volume_slice = self.select_random_sections_from_slice( - 0, - in_data["volume_mesh_centers"].shape[0], - self.volume_sampling_size, - ) - else: - volume_slice = slice(0, in_data["volume_mesh_centers"].shape[0]) + if self.is_volumetric: + if self.volume_sampling_size is not None: + volume_slice = self.select_random_sections_from_slice( + 0, + in_data["volume_mesh_centers"].shape[0], + self.volume_sampling_size, + ) + else: + volume_slice = slice(0, in_data["volume_mesh_centers"].shape[0]) # This is a slower basic way to do this, to be improved: data = {} @@ -311,14 +314,15 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: raise ValueError(f"Keys {missing_keys} not found in file {filename}") # Make sure to select the slice outside of the loop. - if self.volume_sampling_size is not None: - volume_slice = self.select_random_sections_from_slice( - 0, - group["volume_mesh_centers"].shape[0], - self.volume_sampling_size, - ) - else: - volume_slice = slice(0, group["volume_mesh_centers"].shape[0]) + if self.is_volumetric: + if self.volume_sampling_size is not None: + volume_slice = self.select_random_sections_from_slice( + 0, + group["volume_mesh_centers"].shape[0], + self.volume_sampling_size, + ) + else: + volume_slice = slice(0, group["volume_mesh_centers"].shape[0]) # This is a slower basic way to do this, to be improved: data = {} @@ -587,14 +591,17 @@ def read_file(self, filename: pathlib.Path) -> dict[str, torch.Tensor]: # Make sure to select the slice outside of the loop. # We need - if self.volume_sampling_size is not None: - volume_slice = self.select_random_sections_from_slice( - 0, - read_futures["volume_mesh_centers"].shape[0], - self.volume_sampling_size, - ) - else: - volume_slice = slice(0, read_futures["volume_mesh_centers"].shape[0]) + if self.is_volumetric: + if self.volume_sampling_size is not None: + volume_slice = self.select_random_sections_from_slice( + 0, + read_futures["volume_mesh_centers"].shape[0], + self.volume_sampling_size, + ) + else: + volume_slice = slice( + 0, read_futures["volume_mesh_centers"].shape[0] + ) # Trigger an async read of each data item: # (Each item will be a numpy ndarray after this:) From 2ceefc75381004e9319841065465716a7f176aec Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:59:21 -0700 Subject: [PATCH 74/98] Make sure differentiable loss tensors are detached before transfer to CPU --- examples/cfd/external_aerodynamics/domino/src/train.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index eb6331331e..0f08b95cfa 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -204,7 +204,7 @@ def train_epoch( optimizer.zero_grad() # Gather data and report - running_loss += loss.item() + running_loss += loss.detach().item() elapsed_time = time.perf_counter() - start_time io_time = io_end_time - io_start_time start_time = time.perf_counter() @@ -223,7 +223,9 @@ def train_epoch( ) loss_string += ( " " - + f"\t".join([f"{l.item():<10.3e}" for l in loss_dict.values()]) + + f"\t".join( + [f"{l.detach().item():<10.3e}" for l in loss_dict.values()] + ) + "\n" ) @@ -237,7 +239,7 @@ def train_epoch( last_loss = running_loss / (i_batch + 1) # loss per batch if dist.rank == 0: logger.info( - f" Device {device}, batch: {i_batch + 1}, loss norm: {loss.item():.5f}" + f" Device {device}, batch: {i_batch + 1}, loss norm: {loss.detach().item():.5f}" ) tb_x = epoch_index * len(dataloader) + i_batch + 1 tb_writer.add_scalar("Loss/train", last_loss, tb_x) From d955d87797a58f2e6a2c3a9e35caff9bb2fb5684 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:59:53 -0700 Subject: [PATCH 75/98] remove printouts. --- .../external_aerodynamics/domino/src/benchmark_dataloader.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index e3f24a5fff..339363195a 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -97,9 +97,6 @@ def benchmark_io_epoch( gpu_start_info = nvmlDeviceGetMemoryInfo(gpu_handle) start_time = time.perf_counter() for i_batch, sample_batched in enumerate(dataloader): - # for key in sample_batched.keys(): - # print(f"Key {key} shape: {sample_batched[key].shape} with mean {sample_batched[key].mean()} and std {sample_batched[key].std()} ") - # Gather data and report elapsed_time = time.perf_counter() - start_time start_time = time.perf_counter() From d05e6531a28ea617e4af166aeb6ed393ffe13018 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Thu, 9 Oct 2025 12:10:47 -0700 Subject: [PATCH 76/98] Increase data reading size, for sub-sampling. --- physicsnemo/datapipes/cae/domino_datapipe.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 1a8e7e6164..93e7d15b93 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -742,7 +742,7 @@ def process_data(self, data_dict): ) return_dict["sdf_surf_grid"] = sdf_surf_grid return_dict["surf_grid"] = surf_grid - + # Store this only if normalization is active: if self.config.normalize_coordinates: return_dict["surface_min_max"] = torch.stack([s_min, s_max]) @@ -867,7 +867,7 @@ def set_dataset(self, dataset: Iterable) -> None: if self.config.volume_sample_from_disk: # We deliberately double the data to read compared to the sampling size: - self.dataset.set_volume_sampling_size(2 * self.config.volume_points_sample) + self.dataset.set_volume_sampling_size(10 * self.config.volume_points_sample) def __len__(self): if self.dataset is not None: @@ -930,7 +930,11 @@ def __iter__(self): def compute_scaling_factors( - cfg: DictConfig, input_path: str, target_keys: list[str], use_cache=None, max_samples=20, + cfg: DictConfig, + input_path: str, + target_keys: list[str], + use_cache=None, + max_samples=20, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ Using the dataset at the path, compute the mean, std, min, and max of the target keys. From 06ca0850071f18f81f52630588126e327168c543 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 10 Oct 2025 06:46:39 -0700 Subject: [PATCH 77/98] Add more tests to the datapipe for domino --- physicsnemo/datapipes/cae/domino_datapipe.py | 2 - test/datapipes/test_domino_datapipe.py | 340 +++++++++++++++---- 2 files changed, 281 insertions(+), 61 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 93e7d15b93..867a14a38d 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -728,8 +728,6 @@ def process_data(self, data_dict): else: normed_vertices = data_dict["stl_coordinates"] - return_dict["surf_grid"] = surf_grid - # For SDF calculations, make sure the mesh_indices_flattened is an integer array: mesh_indices_flattened = data_dict["stl_faces"].to(torch.int32) diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py index bb463792ff..4b8b3df0bb 100644 --- a/test/datapipes/test_domino_datapipe.py +++ b/test/datapipes/test_domino_datapipe.py @@ -27,7 +27,7 @@ from pytest_utils import import_or_fail from scipy.spatial import ConvexHull -from physicsnemo.datapipes.cae.domino_datapipe2 import ( +from physicsnemo.datapipes.cae.domino_datapipe import ( CachedDoMINODataset, DoMINODataConfig, DoMINODataPipe, @@ -427,14 +427,31 @@ def test_domino_datapipe_coordinate_normalization( sample = dataset[0] validate_sample_structure(sample, model_type, gpu_output=True) - v_coords = sample["volume_mesh_centers"] - s_coords = sample["surface_mesh_centers"] + # Check all the volume coordinates: + for volume_key in ["volume_mesh_centers"]: + coords = sample[volume_key] + check_tensor_normalization( + coords, normalize_coordinates, sample_in_bbox, is_surface=False + ) + + # Check all the surface coordinates: + for surface_key in ["surface_mesh_centers", "surface_mesh_neighbors"]: + coords = sample[surface_key] + if surface_key == "surface_mesh_neighbors": + coords = coords.reshape((1, -1, 3)) + check_tensor_normalization( + coords, normalize_coordinates, sample_in_bbox, is_surface=True + ) + + +def check_tensor_normalization( + tensor, normalize_coordinates, sample_in_bbox, is_surface +): + """Check if a tensor is normalized properly.""" # Batch size is 1 here, but in principle this could be a loop: - v_min = torch.min(v_coords[0], dim=0).values - v_max = torch.max(v_coords[0], dim=0).values - s_min = torch.min(s_coords[0], dim=0).values - s_max = torch.max(s_coords[0], dim=0).values + t_min = torch.min(tensor[0], dim=0).values + t_max = torch.max(tensor[0], dim=0).values # If normalization is enabled, coordinates should be in [-2, 2] range if normalize_coordinates: @@ -443,12 +460,12 @@ def test_domino_datapipe_coordinate_normalization( # that were already inside the box should be present. # That means that all values should be between -1 and 1 - assert v_min[0] >= -1 - assert v_min[1] >= -1 - assert v_min[2] >= -1 - assert v_max[0] <= 1 - assert v_max[1] <= 1 - assert v_max[2] <= 1 + assert t_min[0] >= -1 + assert t_min[1] >= -1 + assert t_min[2] >= -1 + assert t_max[0] <= 1 + assert t_max[1] <= 1 + assert t_max[2] <= 1 else: # When normalizing the coordinates, the values of the bbox @@ -463,56 +480,248 @@ def test_domino_datapipe_coordinate_normalization( # So, field_range = (2 - -1) = 3 # new_val = 2 * (5 - -1)/ 3 - 1 = 3 - vol_x_rescale = 1 / (VOL_BBOX_XMAX - VOL_BBOX_XMIN) - vol_y_rescale = 1 / (VOL_BBOX_YMAX - VOL_BBOX_YMIN) - vol_z_rescale = 1 / (VOL_BBOX_ZMAX - VOL_BBOX_ZMIN) - - assert v_min[0] >= 2 * (DATA_XMIN - VOL_BBOX_XMIN) * vol_x_rescale - 1 - assert v_min[1] >= 2 * (DATA_YMIN - VOL_BBOX_YMIN) * vol_y_rescale - 1 - assert v_min[2] >= 2 * (DATA_ZMIN - VOL_BBOX_ZMIN) * vol_z_rescale - 1 - assert v_max[0] <= 2 * (DATA_XMAX - VOL_BBOX_XMIN) * vol_x_rescale - 1 - assert v_max[1] <= 2 * (DATA_YMAX - VOL_BBOX_YMIN) * vol_y_rescale - 1 - assert v_max[2] <= 2 * (DATA_ZMAX - VOL_BBOX_ZMIN) * vol_z_rescale - 1 - - surf_x_rescale = 1 / (SURF_BBOX_XMAX - SURF_BBOX_XMIN) - surf_y_rescale = 1 / (SURF_BBOX_YMAX - SURF_BBOX_YMIN) - surf_z_rescale = 1 / (SURF_BBOX_ZMAX - SURF_BBOX_ZMIN) - - assert s_min[0] >= 2 * (DATA_XMIN - SURF_BBOX_XMIN) * surf_x_rescale - 1 - assert s_min[1] >= 2 * (DATA_YMIN - SURF_BBOX_YMIN) * surf_y_rescale - 1 - assert s_min[2] >= 2 * (DATA_ZMIN - SURF_BBOX_ZMIN) * surf_z_rescale - 1 - assert s_max[0] <= 2 * (DATA_XMAX - SURF_BBOX_XMIN) * surf_x_rescale - 1 - assert s_max[1] <= 2 * (DATA_YMAX - SURF_BBOX_YMIN) * surf_y_rescale - 1 - assert s_max[2] <= 2 * (DATA_ZMAX - SURF_BBOX_ZMIN) * surf_z_rescale - 1 + if is_surface: + x_rescale = 1 / (SURF_BBOX_XMAX - SURF_BBOX_XMIN) + y_rescale = 1 / (SURF_BBOX_YMAX - SURF_BBOX_YMIN) + z_rescale = 1 / (SURF_BBOX_ZMAX - SURF_BBOX_ZMIN) + target_min_x = 2 * (DATA_XMIN - SURF_BBOX_XMIN) * x_rescale - 1 + target_min_y = 2 * (DATA_YMIN - SURF_BBOX_YMIN) * y_rescale - 1 + target_min_z = 2 * (DATA_ZMIN - SURF_BBOX_ZMIN) * z_rescale - 1 + target_max_x = 2 * (DATA_XMAX - SURF_BBOX_XMIN) * x_rescale - 1 + target_max_y = 2 * (DATA_YMAX - SURF_BBOX_YMIN) * y_rescale - 1 + target_max_z = 2 * (DATA_ZMAX - SURF_BBOX_ZMIN) * z_rescale - 1 + else: + x_rescale = 1 / (VOL_BBOX_XMAX - VOL_BBOX_XMIN) + y_rescale = 1 / (VOL_BBOX_YMAX - VOL_BBOX_YMIN) + z_rescale = 1 / (VOL_BBOX_ZMAX - VOL_BBOX_ZMIN) + target_min_x = 2 * (DATA_XMIN - VOL_BBOX_XMIN) * x_rescale - 1 + target_min_y = 2 * (DATA_YMIN - VOL_BBOX_YMIN) * y_rescale - 1 + target_min_z = 2 * (DATA_ZMIN - VOL_BBOX_ZMIN) * z_rescale - 1 + target_max_x = 2 * (DATA_XMAX - VOL_BBOX_XMIN) * x_rescale - 1 + target_max_y = 2 * (DATA_YMAX - VOL_BBOX_YMIN) * y_rescale - 1 + target_max_z = 2 * (DATA_ZMAX - VOL_BBOX_ZMIN) * z_rescale - 1 + + assert t_min[0] >= target_min_x + assert t_min[1] >= target_min_y + assert t_min[2] >= target_min_z + assert t_max[0] <= target_max_x + assert t_max[1] <= target_max_y + assert t_max[2] <= target_max_z else: if sample_in_bbox: # We've sampled in the bbox but NOT normalized. # So, the values should exclusively be in the BBOX ranges: - assert v_min[0] >= VOL_BBOX_XMIN - assert v_min[1] >= VOL_BBOX_YMIN - assert v_min[2] >= VOL_BBOX_ZMIN - assert v_max[0] <= VOL_BBOX_XMAX - assert v_max[1] <= VOL_BBOX_YMAX - assert v_max[2] <= VOL_BBOX_ZMAX - - assert s_min[0] >= SURF_BBOX_XMIN - assert s_min[1] >= SURF_BBOX_YMIN - assert s_min[2] >= SURF_BBOX_ZMIN - assert s_max[0] <= SURF_BBOX_XMAX - assert s_max[1] <= SURF_BBOX_YMAX - assert s_max[2] <= SURF_BBOX_ZMAX + + if is_surface: + assert t_min[0] >= SURF_BBOX_XMIN + assert t_min[1] >= SURF_BBOX_YMIN + assert t_min[2] >= SURF_BBOX_ZMIN + assert t_max[0] <= SURF_BBOX_XMAX + assert t_max[1] <= SURF_BBOX_YMAX + assert t_max[2] <= SURF_BBOX_ZMAX + else: + assert t_min[0] >= VOL_BBOX_XMIN + assert t_min[1] >= VOL_BBOX_YMIN + assert t_min[2] >= VOL_BBOX_ZMIN + assert t_max[0] <= VOL_BBOX_XMAX + assert t_max[1] <= VOL_BBOX_YMAX + assert t_max[2] <= VOL_BBOX_ZMAX else: # Not sampling, and also # Not normalizing, values should be in data range only: - assert v_min[0] >= DATA_XMIN and v_max[0] <= DATA_XMAX - assert v_min[1] >= DATA_YMIN and v_max[1] <= DATA_YMAX - assert v_min[2] >= DATA_ZMIN and v_max[2] <= DATA_ZMAX - assert s_min[0] >= DATA_XMIN and s_max[0] <= DATA_XMAX - assert s_min[1] >= DATA_YMIN and s_max[1] <= DATA_YMAX - # Surface points always should be > 0 - assert s_min[2] >= 0 and s_max[2] <= DATA_ZMAX + assert t_min[0] >= DATA_XMIN and t_max[0] <= DATA_XMAX + assert t_min[1] >= DATA_YMIN and t_max[1] <= DATA_YMAX + + if is_surface: + # Surface points always should be > 0 + assert t_min[2] >= 0 and t_max[2] <= DATA_ZMAX + else: + assert t_min[2] >= DATA_ZMIN and t_max[2] <= DATA_ZMAX + + return True + + +@pytest.mark.parametrize("model_type", ["surface"]) +@pytest.mark.parametrize("normalize_coordinates", [True, False]) +@pytest.mark.parametrize("sample_in_bbox", [True, False]) +def test_domino_datapipe_surface_normalization( + zarr_dataset, pytestconfig, model_type, normalize_coordinates, sample_in_bbox +): + """Test normalization functionality. + + This test is meant to make sure all the peripheral outputs are + normalized properly. FOcus on surface here. + + We could do them all in one test but it gets unweildy, and if there + are failures it helps nail down exactly where. + """ + cuda = torch.cuda.is_available() + + dataset = create_basic_dataset( + zarr_dataset, + model_type, + gpu_preprocessing=cuda, + gpu_output=cuda, + normalize_coordinates=normalize_coordinates, + sampling=True, + sample_in_bbox=sample_in_bbox, + ) + + # Here's a list of values to check, and the behavior we expect: + + # surf_grid - normalized by s_min, s_max + sample = dataset[0] + surf_grid = sample["surf_grid"] + + # If normalizing, surf_grid should be between -1 and 1. + # Otherwise, should be between s_min and s_max + if not normalize_coordinates: + target_min = torch.tensor([SURF_BBOX_XMIN, SURF_BBOX_YMIN, SURF_BBOX_ZMIN]) + target_max = torch.tensor([SURF_BBOX_XMAX, SURF_BBOX_YMAX, SURF_BBOX_ZMAX]) + else: + target_min = torch.tensor([-1.0, -1.0, -1.0]) + target_max = torch.tensor([1.0, 1.0, 1.0]) + + target_min = target_min.to(surf_grid.device) + target_max = target_max.to(surf_grid.device) + + # Flatten all the grid coords: + surf_grid = surf_grid.reshape((-1, 3)) + + assert torch.all(surf_grid >= target_min) + assert torch.all(surf_grid <= target_max) + + # sdf_surf_grid - should have max values less than || s_max - s_min|| + + max_norm_allowed = torch.norm(target_max - target_min) + + sdf_surf_grid = sample["sdf_surf_grid"] + assert torch.all(sdf_surf_grid <= max_norm_allowed) + # (Negative values are ok but we don't really check that.) + + # surface_min_max should only be in the dict if normaliztion is on: + if normalize_coordinates: + assert "surface_min_max" in sample + s_mm = sample["surface_min_max"] + assert s_mm.shape == (1, 2, 3) + + assert torch.allclose( + s_mm[0, 0], + torch.tensor([SURF_BBOX_XMIN, SURF_BBOX_YMIN, SURF_BBOX_ZMIN]).to( + s_mm.device + ), + ) + assert torch.allclose( + s_mm[0, 1], + torch.tensor([SURF_BBOX_XMAX, SURF_BBOX_YMAX, SURF_BBOX_ZMAX]).to( + s_mm.device + ), + ) + + else: + assert "surface_min_max" not in sample + + # For the rest of the values, checks are straightforward: + + assert torch.all(sample["surface_areas"] > 0) + assert torch.all(sample["surface_neighbors_areas"] > 0) + + # No checks implemented on the following, yet: + # - pos_surface_center_of_mass + + +@pytest.mark.parametrize("model_type", ["volume"]) +@pytest.mark.parametrize("normalize_coordinates", [True, False]) +@pytest.mark.parametrize("sample_in_bbox", [True, False]) +def test_domino_datapipe_volume_normalization( + zarr_dataset, pytestconfig, model_type, normalize_coordinates, sample_in_bbox +): + """Test normalization functionality. + + This test is meant to make sure all the peripheral outputs are + normalized properly. FOcus on volume here. + + We could do them all in one test but it gets unweildy, and if there + are failures it helps nail down exactly where. + """ + cuda = torch.cuda.is_available() + + dataset = create_basic_dataset( + zarr_dataset, + model_type, + gpu_preprocessing=cuda, + gpu_output=cuda, + normalize_coordinates=normalize_coordinates, + sampling=True, + sample_in_bbox=sample_in_bbox, + ) + + # Here's a list of values to check, and the behavior we expect: + + # grid - normalized by s_min, s_max + sample = dataset[0] + grid = sample["grid"] + + # If normalizing, surf_grid should be between -1 and 1. + # Otherwise, should be between s_min and s_max + if not normalize_coordinates: + target_min = torch.tensor([VOL_BBOX_XMIN, VOL_BBOX_YMIN, VOL_BBOX_ZMIN]) + target_max = torch.tensor([VOL_BBOX_XMAX, VOL_BBOX_YMAX, VOL_BBOX_ZMAX]) + else: + target_min = torch.tensor([-1.0, -1.0, -1.0]) + target_max = torch.tensor([1.0, 1.0, 1.0]) + + target_min = target_min.to(grid.device) + target_max = target_max.to(grid.device) + + # Flatten all the grid coords: + grid = grid.reshape((-1, 3)) + + assert torch.all(grid >= target_min) + assert torch.all(grid <= target_max) + + # sdf_grid - should have max values less than || s_max - s_min|| + + max_norm_allowed = torch.norm(target_max - target_min) + + sdf_grid = sample["sdf_grid"] + assert torch.all(sdf_grid <= max_norm_allowed) + # (Negative values are ok but we don't really check that.) + + # surface_min_max should only be in the dict if normaliztion is on: + if normalize_coordinates: + assert "volume_min_max" in sample + s_mm = sample["volume_min_max"] + assert s_mm.shape == (1, 2, 3) + + assert torch.allclose( + s_mm[0, 0], + torch.tensor([VOL_BBOX_XMIN, VOL_BBOX_YMIN, VOL_BBOX_ZMIN]).to(s_mm.device), + ) + assert torch.allclose( + s_mm[0, 1], + torch.tensor([VOL_BBOX_XMAX, VOL_BBOX_YMAX, VOL_BBOX_ZMAX]).to(s_mm.device), + ) + + else: + assert "volume_min_max" not in sample + + sdf_nodes = sample["sdf_nodes"] + pos_volume_closest_norm = torch.norm(sample["pos_volume_closest"], dim=-1).reshape( + sdf_nodes.shape + ) + assert torch.allclose(pos_volume_closest_norm, sdf_nodes) + # No checks implemented on the following, yet: + # - pos_volume_center_of_mass + + # The center of mass should be inside the mesh. So, the displacement + # from the center of mass should be exclusively larger than the sdf: + pos_volume_center_of_mass_norm = torch.norm( + sample["pos_volume_center_of_mass"], dim=-1 + ).reshape(sdf_nodes.shape) + assert torch.all(pos_volume_center_of_mass_norm > sdf_nodes) @import_or_fail(["warp", "cupy", "cuml"]) @@ -575,18 +784,30 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf @import_or_fail(["warp", "cupy", "cuml"]) -@pytest.mark.parametrize("model_type", ["volume"]) +@pytest.mark.parametrize("model_type", ["volume", "surface", "combined"]) @pytest.mark.parametrize("scaling_type", [None, "min_max_scaling", "mean_std_scaling"]) def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestconfig): """Test field scaling functionality.""" use_cuda = torch.cuda.is_available() - if scaling_type == "min_max_scaling": - volume_factors = [10.0, -10.0] # [max, min] - elif scaling_type == "mean_std_scaling": - volume_factors = [0.0, 1.0] # [mean, std] + if model_type in ["volume", "combined"]: + volume_factors = torch.tensor( + [ + [10.0, -10.0, 10.0, 10.0, 10.0], + [10.0, -10.0, 10.0, 10.0, 10.0], + ] + ) else: volume_factors = None + if model_type in ["surface", "combined"]: + surface_factors = torch.tensor( + [ + [10.0, -10.0, 10.0, 10.0], + [10.0, -10.0, 10.0, 10.0], + ] + ) + else: + surface_factors = None dataset = create_basic_dataset( zarr_dataset, @@ -595,6 +816,7 @@ def test_domino_datapipe_scaling(zarr_dataset, model_type, scaling_type, pytestc gpu_output=use_cuda, scaling_type=scaling_type, volume_factors=volume_factors, + surface_factors=surface_factors, ) sample = dataset[0] From 8a91a18a4f2fe513f8564ae70a40c1564ce26b92 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 10 Oct 2025 06:54:08 -0700 Subject: [PATCH 78/98] Rename DrivaerMLDataset to CAE Dataset. --- .../cae/{drivaer_ml_dataset.py => cae_dataset.py} | 12 ++++++------ physicsnemo/datapipes/cae/domino_datapipe.py | 10 +++++----- test/datapipes/test_domino_datapipe.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) rename physicsnemo/datapipes/cae/{drivaer_ml_dataset.py => cae_dataset.py} (99%) diff --git a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py similarity index 99% rename from physicsnemo/datapipes/cae/drivaer_ml_dataset.py rename to physicsnemo/datapipes/cae/cae_dataset.py index 6d46a40879..4b3dd0bfde 100644 --- a/physicsnemo/datapipes/cae/drivaer_ml_dataset.py +++ b/physicsnemo/datapipes/cae/cae_dataset.py @@ -734,7 +734,7 @@ def is_vtk_directory(file: pathlib.Path) -> bool: ) -class DrivaerMLDataset: +class CAEDataset: """ Dataset reader for DrivaerML and similar datasets. In general, this dataset supports reading dictionary-like data, and returning a @@ -1118,7 +1118,7 @@ def set_volume_sampling_size(self, volume_sampling_size: int): def compute_mean_std_min_max( - dataset: DrivaerMLDataset, field_keys: list[str], max_samples: int = 20 + dataset: CAEDataset, field_keys: list[str], max_samples: int = 20 ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ Compute the mean, standard deviation, minimum, and maximum for a specified field @@ -1127,7 +1127,7 @@ def compute_mean_std_min_max( Uses a numerically stable online algorithm for mean and variance. Args: - dataset (DrivaerMLDataset): The dataset to process. + dataset (CAEDataset): The dataset to process. field_key (str): The key for the field to normalize. Returns: @@ -1184,11 +1184,11 @@ def compute_mean_std_min_max( # Update min/max batch_min = field_data.amin(dim=(0)) batch_max = field_data.amax(dim=(0)) - + min_val[field_key] = torch.minimum(min_val[field_key], batch_min) - max_val[field_key] = torch.maximum(max_val[field_key], batch_max) - + max_val[field_key] = torch.maximum(max_val[field_key], batch_max) + # Update running mean and M2 (Welford's algorithm) delta = batch_mean - mean[field_key] N[field_key] += batch_n # batch_n should also be torch.int64 diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 867a14a38d..46f5848dc1 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -37,8 +37,8 @@ from torch.distributed.tensor.placement_types import Replicate from torch.utils.data import Dataset -from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( - DrivaerMLDataset, +from physicsnemo.datapipes.cae.cae_dataset import ( + CAEDataset, compute_mean_std_min_max, ) from physicsnemo.distributed import DistributedManager @@ -229,7 +229,7 @@ def __init__( self.preproc_device = ( dist.device if self.config.gpu_preprocessing else torch.device("cpu") ) - # The drivaer_ml_dataset will automatically target this device + # The cae_dataset will automatically target this device # In an async transfer. self.output_device = ( dist.device if self.config.gpu_output else torch.device("cpu") @@ -946,7 +946,7 @@ def compute_scaling_factors( device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - dataset = DrivaerMLDataset( + dataset = CAEDataset( data_dir=input_path, keys_to_read=target_keys, keys_to_read_if_available={}, @@ -1182,7 +1182,7 @@ def create_domino_dataset( preload_depth = 1 pin_memory = False - dataset = DrivaerMLDataset( + dataset = CAEDataset( data_dir=input_path, keys_to_read=keys_to_read, keys_to_read_if_available=keys_to_read_if_available, diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py index 4b8b3df0bb..a4f7159c1f 100644 --- a/test/datapipes/test_domino_datapipe.py +++ b/test/datapipes/test_domino_datapipe.py @@ -27,12 +27,12 @@ from pytest_utils import import_or_fail from scipy.spatial import ConvexHull +from physicsnemo.datapipes.cae.cae_dataset import CAEDataset from physicsnemo.datapipes.cae.domino_datapipe import ( CachedDoMINODataset, DoMINODataConfig, DoMINODataPipe, ) -from physicsnemo.datapipes.cae.drivaer_ml_dataset import DrivaerMLDataset Tensor = torch.Tensor @@ -293,7 +293,7 @@ def create_basic_dataset( "global_params_reference": torch.tensor([1.225, 10.0]), } - dataset = DrivaerMLDataset( + dataset = CAEDataset( data_dir=input_path, keys_to_read=keys_to_read, keys_to_read_if_available=keys_to_read_if_available, From e151de0016fb6d0bcdc53ec704feaa7d9350132f Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 10 Oct 2025 07:40:08 -0700 Subject: [PATCH 79/98] Add metrics to printouts and tbfile. --- .../src/{ => deprecated}/train_sharded.py | 0 .../external_aerodynamics/domino/src/train.py | 80 +++++++++++- .../external_aerodynamics/domino/src/utils.py | 122 ++++++++++++++++++ 3 files changed, 195 insertions(+), 7 deletions(-) rename examples/cfd/external_aerodynamics/domino/src/{ => deprecated}/train_sharded.py (100%) diff --git a/examples/cfd/external_aerodynamics/domino/src/train_sharded.py b/examples/cfd/external_aerodynamics/domino/src/deprecated/train_sharded.py similarity index 100% rename from examples/cfd/external_aerodynamics/domino/src/train_sharded.py rename to examples/cfd/external_aerodynamics/domino/src/deprecated/train_sharded.py diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 0f08b95cfa..eb8b8ba63d 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -31,6 +31,7 @@ import os import re from typing import Literal, Any +from tabulate import tabulate import apex import numpy as np @@ -78,7 +79,7 @@ from loss import compute_loss_dict -from utils import get_num_vars, load_scaling_factors +from utils import get_num_vars, load_scaling_factors, compute_l2, all_reduce_dict def validation_step( @@ -86,6 +87,8 @@ def validation_step( model, device, logger, + tb_writer, + epoch_index, use_sdf_basis=False, use_surface_normals=False, integral_scaling_factor=1.0, @@ -98,8 +101,11 @@ def validation_step( vol_factors: torch.Tensor | None = None, add_physics_loss=False, ): + dm = DistributedManager() running_vloss = 0.0 with torch.no_grad(): + metrics = None + for i_batch, sample_batched in enumerate(dataloader): sampled_batched = dict_to_device(sample_batched, device) @@ -127,8 +133,37 @@ def validation_step( ) running_vloss += loss.item() + local_metrics = compute_l2( + prediction_surf, prediction_vol, sampled_batched, dataloader + ) + if metrics is None: + metrics = local_metrics + else: + metrics = { + key: metrics[key] + local_metrics[key] for key in metrics.keys() + } avg_vloss = running_vloss / (i_batch + 1) + metrics = {key: metrics[key] / (i_batch + 1) for key in metrics.keys()} + + metrics = all_reduce_dict(metrics, dm) + + if dm.rank == 0: + logger.info( + f" Device {device}, batch: {i_batch + 1}, VAL loss norm: {loss.detach().item():.5f}" + ) + tb_x = epoch_index + for key in metrics.keys(): + tb_writer.add_scalar(f"L2 Metrics/val/{key}", metrics[key], tb_x) + + metrics_table = tabulate( + [[k, v] for k, v in metrics.items()], + headers=["Metric", "Average Value"], + tablefmt="pretty", + ) + logger.info( + f"\nEpoch {epoch_index} VALIDATION Average Metrics:\n{metrics_table}\n" + ) return avg_vloss @@ -155,7 +190,7 @@ def train_epoch( surf_factors: torch.Tensor | None = None, add_physics_loss=False, ): - dist = DistributedManager() + dm = DistributedManager() running_loss = 0.0 last_loss = 0.0 @@ -165,6 +200,7 @@ def train_epoch( start_time = time.perf_counter() with Profiler(): io_start_time = time.perf_counter() + metrics = None for i_batch, sampled_batched in enumerate(dataloader): io_end_time = time.perf_counter() if add_physics_loss: @@ -195,6 +231,22 @@ def train_epoch( add_physics_loss, ) + # Compute metrics: + if isinstance(prediction_vol, tuple): + # This is if return_neighbors is on for volume: + prediction_vol = prediction_vol[0] + + local_metrics = compute_l2( + prediction_surf, prediction_vol, sampled_batched, dataloader + ) + if metrics is None: + metrics = local_metrics + else: + # Sum the running total: + metrics = { + key: metrics[key] + local_metrics[key] for key in metrics.keys() + } + loss = loss / loss_interval scaler.scale(loss).backward() @@ -237,12 +289,25 @@ def train_epoch( io_start_time = time.perf_counter() last_loss = running_loss / (i_batch + 1) # loss per batch - if dist.rank == 0: + # Normalize metrics: + metrics = {key: metrics[key] / (i_batch + 1) for key in metrics.keys()} + # reduce metrics across batch: + metrics = all_reduce_dict(metrics, dm) + if dm.rank == 0: logger.info( f" Device {device}, batch: {i_batch + 1}, loss norm: {loss.detach().item():.5f}" ) tb_x = epoch_index * len(dataloader) + i_batch + 1 tb_writer.add_scalar("Loss/train", last_loss, tb_x) + for key in metrics.keys(): + tb_writer.add_scalar(f"L2 Metrics/train/{key}", metrics[key], epoch_index) + + metrics_table = tabulate( + [[k, v] for k, v in metrics.items()], + headers=["Metric", "Average Value"], + tablefmt="pretty", + ) + logger.info(f"\nEpoch {epoch_index} Average Metrics:\n{metrics_table}\n") return last_loss @@ -278,10 +343,9 @@ def main(cfg: DictConfig) -> None: # Get scaling factors - precompute them if this fails! ###################################################### vol_factors, surf_factors = load_scaling_factors(cfg) - - vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]]) - surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]]) - vol_factors_tensor = torch.from_numpy(vol_factors).to(dist.device) + + # vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]]) + # surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]]) ###################################################### # Configure the model @@ -538,6 +602,8 @@ def main(cfg: DictConfig) -> None: model=model, device=dist.device, logger=logger, + tb_writer=writer, + epoch_index=epoch, use_sdf_basis=cfg.model.use_sdf_in_basis_func, use_surface_normals=cfg.model.use_surface_normals, integral_scaling_factor=initial_integral_factor, diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py index 4cc04d0d3c..9b742677b2 100644 --- a/examples/cfd/external_aerodynamics/domino/src/utils.py +++ b/examples/cfd/external_aerodynamics/domino/src/utils.py @@ -20,6 +20,7 @@ from typing import Dict, Optional, Any import numpy as np import torch +import torch.distributed as dist import pickle from pathlib import Path from typing import Literal @@ -344,3 +345,124 @@ def load_scaling_factors( surf_factors_tensor = surf_factors_tensor.to(dm.device, dtype=torch.float32) return vol_factors_tensor, surf_factors_tensor + + +def compute_l2( + pred_surface: torch.Tensor | None, + pred_volume: torch.Tensor | None, + batch, + dataloader, +) -> dict[str, torch.Tensor]: + """ + Compute the L2 norm between prediction and target. + + Requires the dataloader to unscale back to original values + """ + + l2_dict = {} + + if pred_surface is not None: + _, target_surface = dataloader.unscale_model_outputs( + surface_fields=batch["surface_fields"] + ) + _, pred_surface = dataloader.unscale_model_outputs(surface_fields=pred_surface) + l2_surface = metrics_fn_surface(pred_surface, target_surface) + l2_dict.update(l2_surface) + if pred_volume is not None: + target_volume, _ = dataloader.unscale_model_outputs( + volume_fields=batch["volume_fields"] + ) + pred_volume, _ = dataloader.unscale_model_outputs(volume_fields=pred_volume) + l2_volume = metrics_fn_volume(pred_volume, target_volume) + l2_dict.update(l2_volume) + + return l2_dict + + +def metrics_fn_surface( + pred: torch.Tensor, + target: torch.Tensor, +) -> dict[str, torch.Tensor]: + """ + Computes L2 surface metrics between prediction and target. + + Args: + pred: Predicted values (normalized). + target: Target values (normalized). + + Returns: + Dictionary of L2 surface metrics for pressure and shear components. + """ + + l2_num = (pred - target) ** 2 + l2_num = torch.sum(l2_num, dim=1) + l2_num = torch.sqrt(l2_num) + + l2_denom = target**2 + l2_denom = torch.sum(l2_denom, dim=1) + l2_denom = torch.sqrt(l2_denom) + + l2 = l2_num / l2_denom + + metrics = { + "l2_surf_pressure": torch.mean(l2[:, 0]), + "l2_shear_x": torch.mean(l2[:, 1]), + "l2_shear_y": torch.mean(l2[:, 2]), + "l2_shear_z": torch.mean(l2[:, 3]), + } + + return metrics + + +def metrics_fn_volume( + pred: torch.Tensor, + target: torch.Tensor, +) -> dict[str, torch.Tensor]: + """ + Computes L2 volume metrics between prediction and target. + """ + l2_num = (pred - target) ** 2 + l2_num = torch.sum(l2_num, dim=1) + l2_num = torch.sqrt(l2_num) + + l2_denom = target**2 + l2_denom = torch.sum(l2_denom, dim=1) + l2_denom = torch.sqrt(l2_denom) + + l2 = l2_num / l2_denom + + metrics = { + "l2_vol_pressure": torch.mean(l2[:, 0]), + "l2_velocity_x": torch.mean(l2[:, 1]), + "l2_velocity_y": torch.mean(l2[:, 2]), + "l2_velocity_z": torch.mean(l2[:, 3]), + "l2_nut": torch.mean(l2[:, 4]), + } + + return metrics + + +def all_reduce_dict( + metrics: dict[str, torch.Tensor], dm: DistributedManager +) -> dict[str, torch.Tensor]: + """ + Reduces a dictionary of metrics across all distributed processes. + + Args: + metrics: Dictionary of metric names to torch.Tensor values. + dm: DistributedManager instance for distributed context. + + Returns: + Dictionary of reduced metrics. + """ + # TODO - update this to use domains and not the full world + + if dm.world_size == 1: + return metrics + + for key, value in metrics.items(): + dist.all_reduce(value) + value = value / dm.world_size + metrics[key] = value + + return metrics From 6b2e8d9dea2a877b249b8dd6ff65d614057d94c3 Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Thu, 9 Oct 2025 18:10:41 -0700 Subject: [PATCH 80/98] cleaning up test and datapipe --- .../domino/src/conf/config.yaml | 4 +- .../external_aerodynamics/domino/src/test.py | 43 ++------- physicsnemo/datapipes/cae/domino_datapipe.py | 96 +++++++++---------- 3 files changed, 56 insertions(+), 87 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index 13c00823b8..b5a3ebefbc 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -82,10 +82,10 @@ data: # Input directory for training and validation data gpu_preprocessing: true gpu_output: true normalize_coordinates: true - sample_in_bbox: True + sample_in_bbox: true sampling: true scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl - volume_sample_from_disk: true + volume_sample_from_disk: false max_samples_for_statistics: 200 # ┌───────────────────────────────────────────┐ diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py index 93fd16c25c..dc6b8ad27e 100644 --- a/examples/cfd/external_aerodynamics/domino/src/test.py +++ b/examples/cfd/external_aerodynamics/domino/src/test.py @@ -445,17 +445,8 @@ def main(cfg: DictConfig): # Center of mass calculation center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes) - if cfg.data.bounding_box_surface is None: - s_max = np.amax(stl_vertices, 0) - s_min = np.amin(stl_vertices, 0) - else: - bounding_box_dims_surf = [] - bounding_box_dims_surf.append(np.asarray(cfg.data.bounding_box_surface.max)) - bounding_box_dims_surf.append(np.asarray(cfg.data.bounding_box_surface.min)) - s_max = np.float32(bounding_box_dims_surf[0]) - s_min = np.float32(bounding_box_dims_surf[1]) - s_max = torch.from_numpy(s_max).to(torch.float32).to(dist.device) - s_min = torch.from_numpy(s_min).to(torch.float32).to(dist.device) + s_max = torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.max)).to(torch.float32).to(dist.device) + s_min = torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.min)).to(torch.float32).to(dist.device) nx, ny, nz = cfg.model.interp_res @@ -608,19 +599,8 @@ def main(cfg: DictConfig): volume_coordinates = torch.from_numpy(volume_coordinates).to(torch.float32).to(dist.device) volume_fields = torch.from_numpy(volume_fields).to(torch.float32).to(dist.device) - bounding_box_dims = [] - bounding_box_dims.append(np.asarray(cfg.data.bounding_box.max)) - bounding_box_dims.append(np.asarray(cfg.data.bounding_box.min)) - - if bounding_box_dims is None: - c_max = s_max + (s_max - s_min) / 2 - c_min = s_min - (s_max - s_min) / 2 - c_min[2] = s_min[2] - else: - c_max = np.float32(bounding_box_dims[0]) - c_min = np.float32(bounding_box_dims[1]) - c_max = torch.from_numpy(c_max).to(dist.device) - c_min = torch.from_numpy(c_min).to(dist.device) + c_max = torch.from_numpy(np.asarray(cfg.data.bounding_box.max)).to(torch.float32).to(dist.device) + c_min = torch.from_numpy(np.asarray(cfg.data.bounding_box.min)).to(torch.float32).to(dist.device) # Generate a grid of specified resolution to map the bounding box # The grid is used for capturing structured geometry features and SDF representation of geometry @@ -655,11 +635,6 @@ def main(cfg: DictConfig): sdf_nodes = sdf_nodes.reshape(-1, 1) vol_grid_max_min = torch.stack([c_min, c_max]) - if cfg.data.normalize_coordinates: - sdf_node_closest_point = normalize(sdf_node_closest_point, c_max, c_min) - else: - sdf_node_closest_point = sdf_node_closest_point - pos_volume_closest = volume_coordinates - sdf_node_closest_point pos_volume_center_of_mass = volume_coordinates - center_of_mass_normalized @@ -786,9 +761,9 @@ def main(cfg: DictConfig): surface_fields[:, 0] * surface_normals[:, 2] * surface_sizes[:, 0] - surface_fields[:, 3] * surface_sizes[:, 0] ) - print("Drag=", dirname, force_x_pred, force_x_true) - print("Lift=", dirname, force_z_pred, force_z_true) - print("Side=", dirname, force_y_pred, force_y_true) + print("Drag=", dirname, force_x_pred.cpu().numpy(), force_x_true.cpu().numpy()) + print("Lift=", dirname, force_z_pred.cpu().numpy(), force_z_true.cpu().numpy()) + print("Side=", dirname, force_y_pred.cpu().numpy(), force_y_true.cpu().numpy()) aero_forces_all.append( [ dirname, @@ -808,7 +783,7 @@ def main(cfg: DictConfig): print( "Surface L-2 norm:", dirname, - torch.sqrt(l2_error) / torch.sqrt(l2_gt), + np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()), ) if prediction_vol is not None: @@ -832,7 +807,7 @@ def main(cfg: DictConfig): print( "Volume L-2 norm:", dirname, - torch.sqrt(l2_error) / torch.sqrt(l2_gt), + np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()), ) l2_volume_all.append(torch.sqrt(l2_error) / torch.sqrt(l2_gt)) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 46f5848dc1..7839ce4de3 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -304,15 +304,12 @@ def __init__( self.dataset = None - def compute_stl_scaling_and_surface_grids( - self, - stl_vertices: torch.Tensor, - ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + def compute_stl_scaling_and_surface_grids(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Compute the min and max for the defining mesh. If the user supplies a bounding box, we use that. Otherwise, - it's created dynamically from the min/max of the stl vertices. + it raises an error. The returned min/max and grid are used for surface data. """ @@ -324,23 +321,17 @@ def compute_stl_scaling_and_surface_grids( s_min = self.config.bounding_box_dims_surf[1] surf_grid = self.default_surface_grid else: - # Create the grid dynamically - s_min = torch.amin(stl_vertices, 0) - s_max = torch.amax(stl_vertices, 0) - surf_grid = create_grid(s_max, s_min, self.config.grid_resolution) + raise ValueError("Bounding box dimensions are not set in config") return s_min, s_max, surf_grid - def compute_volume_scaling_and_grids( - self, s_min: torch.Tensor, s_max: torch.Tensor - ): + def compute_volume_scaling_and_grids(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Compute the min and max and grid for volume data. If the user supplies a bounding box, we use that. Otherwise, - it's created dynamically from the surface min/max. + it raises an error. - This will be 2x longer in x and y and the same in z as the surface bounding box. """ # Determine the volume min / max locations @@ -348,13 +339,8 @@ def compute_volume_scaling_and_grids( c_max = self.config.bounding_box_dims[0] c_min = self.config.bounding_box_dims[1] volume_grid = self.default_volume_grid - else: - # Create the grid based on the surface grid - c_max = s_max + (s_max - s_min) / 2 - c_min = s_min - (s_max - s_min) / 2 - c_min[2] = s_min[2] - volume_grid = create_grid(c_max, c_min, self.config.grid_resolution) + raise ValueError("Bounding box dimensions are not set in config") return c_min, c_max, volume_grid @@ -377,9 +363,7 @@ def downsample_geometry( stl_vertices, geometry_points ) if geometry_coordinates_sampled.shape[0] < geometry_points: - geometry_coordinates_sampled = pad( - geometry_coordinates_sampled, geometry_points, pad_value=-100.0 - ) + raise ValueError("Sampled points is more than points in the surface mesh") geom_centers = geometry_coordinates_sampled else: geom_centers = stl_vertices @@ -432,6 +416,8 @@ def process_surface( surface_sizes = surface_sizes[ids_in_bbox] if surface_fields is not None: surface_fields = surface_fields[ids_in_bbox] + else: + raise ValueError("Surface fields are not present") ######################################################################## # Perform Down sampling of the surface fields. @@ -464,6 +450,8 @@ def process_surface( # Select out the sampled points for non-neighbor arrays: if surface_fields is not None: surface_fields = surface_fields[idx_surface] + else: + raise ValueError("Surface fields are not present") # Subsample the normals and sizes: surface_normals = surface_normals[idx_surface] @@ -560,6 +548,8 @@ def process_volume( volume_coordinates = volume_coordinates[ids_in_bbox] if volume_fields is not None: volume_fields = volume_fields[ids_in_bbox] + else: + raise ValueError("Volume fields are not present") ######################################################################## # Apply sampling to the volume coordinates and fields @@ -584,6 +574,8 @@ def process_volume( # Apply the same sampling to the targets, too: if volume_fields is not None: volume_fields = volume_fields[idx_volume] + else: + raise ValueError("Volume fields are not present") volume_coordinates = volume_coordinates_sampled @@ -679,9 +671,7 @@ def process_data(self, data_dict): stl_coordinates = data_dict["stl_coordinates"] - s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids( - stl_coordinates - ) + s_min, s_max, surf_grid = self.compute_stl_scaling_and_surface_grids() if isinstance(stl_coordinates, ShardTensor): mesh = stl_coordinates._spec.mesh @@ -739,8 +729,7 @@ def process_data(self, data_dict): use_sign_winding_number=True, ) return_dict["sdf_surf_grid"] = sdf_surf_grid - return_dict["surf_grid"] = surf_grid - + # Store this only if normalization is active: if self.config.normalize_coordinates: return_dict["surface_min_max"] = torch.stack([s_min, s_max]) @@ -761,30 +750,11 @@ def process_data(self, data_dict): # Determine the volumetric bounds of the data: ######################################################################## # Compute the min/max for volume an the unnomralized grid: - c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids(s_min, s_max) - - # For volume data, we store this only if normalizing coordinates: - if self.model_type == "volume" or self.model_type == "combined": - if self.config.normalize_coordinates: - return_dict["volume_min_max"] = torch.stack([c_min, c_max]) - - if self.model_type == "volume" or self.model_type == "combined": - volume_fields_raw = ( - data_dict["volume_fields"] if "volume_fields" in data_dict else None - ) - volume_dict = self.process_volume( - c_min, - c_max, - volume_coordinates=data_dict["volume_mesh_centers"], - volume_grid=volume_grid, - center_of_mass=center_of_mass, - stl_vertices=data_dict["stl_coordinates"], - stl_indices=mesh_indices_flattened, - volume_fields=volume_fields_raw, - ) - - return_dict.update(volume_dict) + c_min, c_max, volume_grid = self.compute_volume_scaling_and_grids() + ######################################################################## + # Process the surface data + ######################################################################## if self.model_type == "surface" or self.model_type == "combined": surface_fields_raw = ( data_dict["surface_fields"] if "surface_fields" in data_dict else None @@ -806,6 +776,30 @@ def process_data(self, data_dict): return_dict.update(surface_dict) + ######################################################################## + # Process the volume data + ######################################################################## + # For volume data, we store this only if normalizing coordinates: + if self.model_type == "volume" or self.model_type == "combined": + return_dict["volume_min_max"] = torch.stack([c_min, c_max]) + + if self.model_type == "volume" or self.model_type == "combined": + volume_fields_raw = ( + data_dict["volume_fields"] if "volume_fields" in data_dict else None + ) + volume_dict = self.process_volume( + c_min, + c_max, + volume_coordinates=data_dict["volume_mesh_centers"], + volume_grid=volume_grid, + center_of_mass=center_of_mass, + stl_vertices=data_dict["stl_coordinates"], + stl_indices=mesh_indices_flattened, + volume_fields=volume_fields_raw, + ) + + return_dict.update(volume_dict) + return return_dict def scale_model_targets( From 0b721fc89bfbdc187fc28ee50e744e65548b8224 Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Tue, 14 Oct 2025 06:51:05 -0700 Subject: [PATCH 81/98] benchmarked code for accuracy, set configs, scaling factor calculation updated --- .../domino/src/conf/config.yaml | 8 ++- .../external_aerodynamics/domino/src/test.py | 53 ++++++++-------- .../external_aerodynamics/domino/src/train.py | 5 +- physicsnemo/datapipes/cae/cae_dataset.py | 60 ++++++++++++++----- physicsnemo/datapipes/cae/domino_datapipe.py | 2 +- physicsnemo/models/domino/geometry_rep.py | 19 ++++-- physicsnemo/models/domino/model.py | 14 ++++- 7 files changed, 106 insertions(+), 55 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index b5a3ebefbc..6b7fa5cb3a 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -85,8 +85,8 @@ data: # Input directory for training and validation data sample_in_bbox: true sampling: true scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl - volume_sample_from_disk: false - max_samples_for_statistics: 200 + volume_sample_from_disk: true + max_samples_for_statistics: 100 # ┌───────────────────────────────────────────┐ # │ Domain Parallelism Settings │ @@ -118,7 +118,7 @@ model: use_surface_normals: true # Use surface normals and surface areas for surface computation? use_surface_area: true # Use only surface normals and not surface area integral_loss_scaling_factor: 100 # Scale integral loss by this factor - normalization: min_max_scaling # or mean_std_scaling + normalization: mean_std_scaling # or min_max_scaling encode_parameters: false # encode inlet velocity and air density in the model surf_loss_scaling: 5.0 # scale surface loss with this factor in combined mode vol_loss_scaling: 1.0 # scale volume loss with this factor in combined mode @@ -144,6 +144,8 @@ model: processor_type: conv # conv or unet (conv is better; fno, fignet to be added) self_attention: false # can be used only with unet cross_attention: false # can be used only with unet + surface_sdf_scaling_factor: [0.01, 0.02, 0.04] # Scaling factor for SDF, smaller is more emphasis on surface + volume_sdf_scaling_factor: [0.04] # Scaling factor for SDF, smaller is more emphasis on surface nn_basis_functions: # Hyperparameters for basis function network base_layer: 512 fourier_features: true diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py index dc6b8ad27e..2fa0de34f7 100644 --- a/examples/cfd/external_aerodynamics/domino/src/test.py +++ b/examples/cfd/external_aerodynamics/domino/src/test.py @@ -33,6 +33,9 @@ from hydra.utils import to_absolute_path from omegaconf import DictConfig, OmegaConf +# This will set up the cupy-ecosystem and pytorch to share memory pools +from physicsnemo.utils.memory import unified_gpu_memory + import numpy as np import cupy as cp @@ -57,7 +60,7 @@ from physicsnemo.utils.domino.vtk_file_utils import * from physicsnemo.utils.sdf import signed_distance_field from physicsnemo.utils.neighbors import knn -from utils import ScalingFactors +from utils import ScalingFactors, load_scaling_factors # AIR_DENSITY = 1.205 # STREAM_VELOCITY = 30.00 @@ -202,7 +205,10 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): running_tloss_vol += loss_fn(tpredictions_batch, target_batch) prediction_vol[:, start_idx:end_idx] = tpredictions_batch - prediction_vol = unnormalize(prediction_vol, vol_factors[0], vol_factors[1]) + if cfg.model.normalization == "min_max_scaling": + prediction_vol = unnormalize(prediction_vol, vol_factors[0], vol_factors[1]) + elif cfg.model.normalization == "mean_std_scaling": + prediction_vol = unstandardize(prediction_vol, vol_factors[0], vol_factors[1]) # print(np.amax(prediction_vol, axis=(0, 1)), np.amin(prediction_vol, axis=(0, 1))) prediction_vol[:, :, :3] = ( @@ -290,8 +296,12 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): running_tloss_surf += loss_fn(tpredictions_batch, target_batch) prediction_surf[:, start_idx:end_idx] = tpredictions_batch + if cfg.model.normalization == "min_max_scaling": + prediction_surf = unnormalize(prediction_surf, surf_factors[0], surf_factors[1]) + elif cfg.model.normalization == "mean_std_scaling": + prediction_surf = unstandardize(prediction_surf, surf_factors[0], surf_factors[1]) prediction_surf = ( - unnormalize(prediction_surf, surf_factors[0], surf_factors[1]) + prediction_surf * stream_velocity[0, 0] ** 2.0 * air_density[0, 0] ) @@ -348,21 +358,7 @@ def main(cfg: DictConfig): ###################################################### pickle_path = os.path.join(cfg.data.scaling_factors) - try: - scaling_factors = ScalingFactors.load(pickle_path) - except FileNotFoundError: - raise FileNotFoundError( - f"Scaling factors not found at: {pickle_path}; please run compute_statistics.py to compute them." - ) - - # vol_factors = np.asarray([scaling_factors.max_val["volume_fields"], scaling_factors.min_val["volume_fields"]]) - # surf_factors = np.asarray([scaling_factors.max_val["surface_fields"], scaling_factors.min_val["surface_fields"]]) - - vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]]) - surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]]) - - vol_factors = torch.from_numpy(vol_factors).to(dist.device) - surf_factors = torch.from_numpy(surf_factors).to(dist.device) + vol_factors, surf_factors = load_scaling_factors(cfg) print("Vol factors:", vol_factors) print("Surf factors:", surf_factors) @@ -457,14 +453,13 @@ def main(cfg: DictConfig): # SDF calculation on the grid using WARP time_start = time.time() - sdf_surf_grid = signed_distance_field( + sdf_surf_grid, _ = signed_distance_field( normed_stl_vertices_cp, mesh_indices_flattened, surf_grid_normed, use_sign_winding_number=True, ) - sdf_surf_grid = sdf_surf_grid[0] - + surf_grid_max_min = torch.stack([s_min, s_max]) # Get global parameters and global parameters scaling from config.yaml @@ -549,6 +544,8 @@ def main(cfg: DictConfig): if cfg.model.num_neighbors_surface > 1: time_start = time.time() + # print(f"file: {dirname}, surface coordinates shape: {surface_coordinates.shape}") + # try: ii, dd = knn( points=surface_coordinates, queries=surface_coordinates, @@ -562,6 +559,13 @@ def main(cfg: DictConfig): surface_neighbors_normals = surface_neighbors_normals[:, 1:] surface_neighbors_sizes = surface_sizes[ii] surface_neighbors_sizes = surface_neighbors_sizes[:, 1:] + # except: + # print(f"file: {dirname}, memory error in knn") + # print("setting surface neighbors to 0") + # surface_neighbors = surface_coordinates + # surface_neighbors_normals = surface_normals + # surface_neighbors_sizes = surface_sizes + # cfg.model.num_neighbors_surface = 1 else: surface_neighbors = surface_coordinates surface_neighbors_normals = surface_normals @@ -616,13 +620,12 @@ def main(cfg: DictConfig): # SDF calculation on the grid using WARP time_start = time.time() - sdf_grid = signed_distance_field( + sdf_grid, _ = signed_distance_field( normed_stl_vertices_vol, mesh_indices_flattened, grid, use_sign_winding_number=True, ) - sdf_grid = sdf_grid[0] # SDF calculation time_start = time.time() @@ -778,7 +781,7 @@ def main(cfg: DictConfig): l2_gt = torch.mean(torch.square(surface_fields), (0)) l2_error = torch.mean(torch.square(prediction_surf[0] - surface_fields), (0)) - l2_surface_all.append(torch.sqrt(l2_error / l2_gt)) + l2_surface_all.append(np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy())) print( "Surface L-2 norm:", @@ -809,7 +812,7 @@ def main(cfg: DictConfig): dirname, np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()), ) - l2_volume_all.append(torch.sqrt(l2_error) / torch.sqrt(l2_gt)) + l2_volume_all.append(np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy())) # import pdb; pdb.set_trace() if prediction_surf is not None: diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index eb8b8ba63d..37634b8715 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -343,9 +343,8 @@ def main(cfg: DictConfig) -> None: # Get scaling factors - precompute them if this fails! ###################################################### vol_factors, surf_factors = load_scaling_factors(cfg) - - # vol_factors = np.asarray([[ 2.9064691e+00, 1.3743978e+00,1.2992665e+00, 1.0714761e+00, 3.2597079e-03], [-2.9988267e+00, -1.3753892e+00, -1.2892706e+00, -1.1400493e+00, 1.0002602e-11]]) - # surf_factors = np.asarray([[ 1.8464564, 0.09996139, 0.07988136, 0.05437989], [-2.0476909, -0.10289095, -0.07811281, -0.05411612]]) + + vol_factors_tensor = vol_factors ###################################################### # Configure the model diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py index 4b3dd0bfde..a78157936a 100644 --- a/physicsnemo/datapipes/cae/cae_dataset.py +++ b/physicsnemo/datapipes/cae/cae_dataset.py @@ -1180,15 +1180,7 @@ def compute_mean_std_min_max( batch_mean = field_data.mean(axis=(0)) batch_M2 = ((field_data - batch_mean) ** 2).sum(axis=(0)) batch_n = field_data.shape[0] - - # Update min/max - batch_min = field_data.amin(dim=(0)) - batch_max = field_data.amax(dim=(0)) - - min_val[field_key] = torch.minimum(min_val[field_key], batch_min) - - max_val[field_key] = torch.maximum(max_val[field_key], batch_max) - + # Update running mean and M2 (Welford's algorithm) delta = batch_mean - mean[field_key] N[field_key] += batch_n # batch_n should also be torch.int64 @@ -1204,11 +1196,6 @@ def compute_mean_std_min_max( print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds") start = time.perf_counter() - global_end = time.perf_counter() - global_time = global_end - global_start - - print(f"Total time: {global_time:.2f} seconds for {max_samples} samples") - var = {} std = {} for field_key in field_keys: @@ -1217,4 +1204,47 @@ def compute_mean_std_min_max( ) # Convert N to Python int for division std[field_key] = torch.sqrt(var[field_key]) - return mean, std, min_val, max_val + start = time.perf_counter() + for i, data in enumerate(dataset): + if i >= max_samples: + break + + for field_key in field_keys: + field_data = data[field_key] + + batch_n = field_data.shape[0] + + # # Update min/max + + mean_sample = mean[field_key] + std_sample = std[field_key] + # import pdb; pdb.set_trace() + mask = torch.ones_like(field_data, dtype=torch.bool) + for v in range(field_data.shape[-1]): + idx = (field_data[:, v] < mean_sample[v] - 12 * std_sample[v]) | (field_data[:, v] > mean_sample[v] + 12 * std_sample[v]) + idx = torch.where(idx) + mask[idx] = False + + batch_min = [] + batch_max = [] + for v in range(field_data.shape[-1]): + batch_min.append(field_data[mask[:, v], v].min()) + batch_max.append(field_data[mask[:, v], v].max()) + + batch_min = torch.stack(batch_min) + batch_max = torch.stack(batch_max) + + min_val[field_key] = torch.minimum(min_val[field_key], batch_min) + max_val[field_key] = torch.maximum(max_val[field_key], batch_max) + + end = time.perf_counter() + iteration_time = end - start + print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds") + start = time.perf_counter() + + global_end = time.perf_counter() + global_time = global_end - global_start + + print(f"Total time: {global_time:.2f} seconds for {max_samples} samples") + + return mean, std, min_val, max_val \ No newline at end of file diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 7839ce4de3..99f086e024 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -859,7 +859,7 @@ def set_dataset(self, dataset: Iterable) -> None: if self.config.volume_sample_from_disk: # We deliberately double the data to read compared to the sampling size: - self.dataset.set_volume_sampling_size(10 * self.config.volume_points_sample) + self.dataset.set_volume_sampling_size(100 * self.config.volume_points_sample) def __len__(self): if self.dataset is not None: diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py index 0268e8f160..c2fbe9f606 100644 --- a/physicsnemo/models/domino/geometry_rep.py +++ b/physicsnemo/models/domino/geometry_rep.py @@ -28,7 +28,7 @@ # from .encodings import fourier_encode -def scale_sdf(sdf: torch.Tensor) -> torch.Tensor: +def scale_sdf(sdf: torch.Tensor, scaling_factor: float = 0.04) -> torch.Tensor: """ Scale a signed distance function (SDF) to emphasize surface regions. @@ -42,7 +42,7 @@ def scale_sdf(sdf: torch.Tensor) -> torch.Tensor: Returns: Tensor with scaled SDF values in range [-1, 1] """ - return sdf / (0.4 + torch.abs(sdf)) + return sdf / (scaling_factor + torch.abs(sdf)) class GeoConvOut(nn.Module): @@ -263,6 +263,7 @@ def __init__( radii: Sequence[float], neighbors_in_radius, hops=1, + sdf_scaling_factor: Sequence[float] = [0.04], model_parameters=None, # activation_conv: nn.Module, # activation_processor: nn.Module, @@ -281,6 +282,7 @@ def __init__( self.self_attention = geometry_rep.geo_processor.self_attention self.activation_conv = get_activation(geometry_rep.geo_conv.activation) self.activation_processor = geometry_rep.geo_processor.activation + self.sdf_scaling_factor = sdf_scaling_factor self.bq_warp = nn.ModuleList() self.geo_processors = nn.ModuleList() @@ -389,7 +391,7 @@ def __init__( elif geometry_rep.geo_processor.processor_type == "conv": self.geo_processor_sdf = nn.Sequential( GeoProcessor( - input_filters=6, + input_filters=5+len(self.sdf_scaling_factor), output_filters=geometry_rep.geo_conv.base_neurons_out, model_parameters=geometry_rep.geo_processor, ), @@ -465,15 +467,22 @@ def forward( if self.geo_encoding_type == "both" or self.geo_encoding_type == "sdf": # Expand SDF sdf = torch.unsqueeze(sdf, 1) - # Scaled sdf to emphasize near surface - scaled_sdf = scale_sdf(sdf) # Binary sdf binary_sdf = torch.where(sdf >= 0, 0.0, 1.0) # Gradients of SDF sdf_x, sdf_y, sdf_z = torch.gradient(sdf, dim=[2, 3, 4]) + scaled_sdf = [] + # Scaled sdf to emphasize near surface + for s in range(len(self.sdf_scaling_factor)): + s_sdf = scale_sdf(sdf, self.sdf_scaling_factor[s]) + scaled_sdf.append(s_sdf) + + scaled_sdf = torch.cat(scaled_sdf, dim=1) + # Process SDF and its computed features sdf = torch.cat((sdf, scaled_sdf, binary_sdf, sdf_x, sdf_y, sdf_z), 1) + sdf_encoding = self.geo_processor_sdf(sdf) sdf_encoding = self.geo_processor_sdf_out(sdf_encoding) diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index aea2e91ad4..20be346dcb 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -30,7 +30,7 @@ from .encodings import ( MultiGeometryEncoding, ) -from .geometry_rep import GeometryRep +from .geometry_rep import GeometryRep, scale_sdf from .mlps import AggregationModel from .solutions import SolutionCalculatorSurface, SolutionCalculatorVolume @@ -298,6 +298,7 @@ def __init__( radii=model_parameters.geometry_rep.geo_conv.volume_radii, neighbors_in_radius=model_parameters.geometry_rep.geo_conv.volume_neighbors_in_radius, hops=model_parameters.geometry_rep.geo_conv.volume_hops, + sdf_scaling_factor=model_parameters.geometry_rep.geo_processor.volume_sdf_scaling_factor, model_parameters=model_parameters, ) @@ -306,6 +307,7 @@ def __init__( radii=model_parameters.geometry_rep.geo_conv.surface_radii, neighbors_in_radius=model_parameters.geometry_rep.geo_conv.surface_neighbors_in_radius, hops=model_parameters.geometry_rep.geo_conv.surface_hops, + sdf_scaling_factor=model_parameters.geometry_rep.geo_processor.surface_sdf_scaling_factor, model_parameters=model_parameters, ) @@ -351,8 +353,9 @@ def __init__( position_encoder_base_neurons = model_parameters.position_encoder.base_neurons self.activation = get_activation(model_parameters.activation) self.use_sdf_in_basis_func = model_parameters.use_sdf_in_basis_func + self.sdf_scaling_factor = model_parameters.geometry_rep.geo_processor.volume_sdf_scaling_factor if self.output_features_vol is not None: - inp_pos_vol = 7 if model_parameters.use_sdf_in_basis_func else 3 + inp_pos_vol = 7 + len(self.sdf_scaling_factor) if model_parameters.use_sdf_in_basis_func else 3 self.fc_p_vol = FourierMLP( input_features=inp_pos_vol, @@ -519,13 +522,18 @@ def forward(self, data_dict): # SDF on volume mesh nodes sdf_nodes = data_dict["sdf_nodes"] + scaled_sdf_nodes = [] + for i in range(len(self.sdf_scaling_factor)): + scaled_sdf_nodes.append(scale_sdf(sdf_nodes, self.sdf_scaling_factor[i])) + scaled_sdf_nodes = torch.cat(scaled_sdf_nodes, dim=-1) + # Positional encoding based on closest point on surface to a volume node pos_volume_closest = data_dict["pos_volume_closest"] # Positional encoding based on center of mass of geometry to volume node pos_volume_center_of_mass = data_dict["pos_volume_center_of_mass"] if self.use_sdf_in_basis_func: encoding_node_vol = torch.cat( - (sdf_nodes, pos_volume_closest, pos_volume_center_of_mass), dim=-1 + (sdf_nodes, scaled_sdf_nodes, pos_volume_closest, pos_volume_center_of_mass), dim=-1 ) else: encoding_node_vol = pos_volume_center_of_mass From 992c0872b6cd560efb9c3ec3a2a15b20e29e233c Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Tue, 14 Oct 2025 10:57:51 -0700 Subject: [PATCH 82/98] fixing merge issue in datapipe --- .../cfd/external_aerodynamics/domino/src/conf/config.yaml | 6 +++--- physicsnemo/datapipes/cae/domino_datapipe.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index 6b7fa5cb3a..b1b25515b4 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -71,8 +71,8 @@ variables: # │ Data Configs │ # └───────────────────────────────────────────┘ data: # Input directory for training and validation data - input_dir: /lustre/rranade/modulus_dev/data/aws_data_all/ - input_dir_val: /lustre/rranade/modulus_dev/data/aws_data_all_val/ + input_dir: /user/data/aws_data_all/ + input_dir_val: /user/data/aws_data_all_val/ bounding_box: # Bounding box dimensions for computational domain min: [-3.5, -2.25, -0.32] max: [8.5, 2.25, 3.00] @@ -209,7 +209,7 @@ eval: # Testing configurable parameters test_path: /user/testing_data # Dir for testing data in raw format (vtp, vtu ,stls) save_path: /user/predicted_data # Dir to save predicted results in raw format (vtp, vtu) checkpoint_name: DoMINO.0.455.pt # Name of checkpoint to select from saved checkpoints - scaling_param_path: /lustre/rranade/modulus_dev/corey_fork/physicsnemo/examples/cfd/external_aerodynamics/domino/outputs/DrivAerML_Dataset/ + scaling_param_path: /user/scaling_params refine_stl: False # Automatically refine STL during inference #TODO - This was hardcoded anyways, remove it. # stencil_size: 7 # Stencil size for evaluating surface and volume model diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 99f086e024..05243e99a5 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -729,6 +729,7 @@ def process_data(self, data_dict): use_sign_winding_number=True, ) return_dict["sdf_surf_grid"] = sdf_surf_grid + return_dict["surf_grid"] = surf_grid # Store this only if normalization is active: if self.config.normalize_coordinates: From 34997ed7ab7aaf30158975d3bde5b1bfc9af95e7 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 14 Oct 2025 13:55:20 -0700 Subject: [PATCH 83/98] Update readme to include shuffling and performance notes. --- .../external_aerodynamics/domino/README.md | 57 +++++- .../src/shuffle_volumetric_curator_output.py | 176 ++++++++++++++++++ 2 files changed, 228 insertions(+), 5 deletions(-) create mode 100644 examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md index a786772071..a6cafd803d 100644 --- a/examples/cfd/external_aerodynamics/domino/README.md +++ b/examples/cfd/external_aerodynamics/domino/README.md @@ -127,7 +127,7 @@ knowledge of the dataset: To facilitate setting reasonable values of these, you can use the `compute_statistics.py` script. This will load the core dataset as defined -in your `config.yaml` file, loop over several events (20, by default), and +in your `config.yaml` file, loop over several events (200, by default), and both print and store the surface/volume field statistics as well as the coordinate statistics. @@ -211,9 +211,6 @@ The `domain_size` represents the number of GPUs used for each batch - setting but with extra overhead. `shard_grid` and `shard_points` will enable domain parallelism over the latent space and input/output points, respectively. -Please see `src/train_sharded.py` for more details regarding the changes -from the standard training script required for domain parallel DoMINO training. - As one last note regarding domain-parallel training: in the phase of the DoMINO where the output solutions are calculated, the model can used two different techniques (numerically identical) to calculate the output. Due to the @@ -245,7 +242,13 @@ To mitigate this, by default in DoMINO we use the Rapids Memory Manager to disable this you can do so with an environment variable: ```bash -export DOMINO_DISABLE_RMM=True +export PHYSICSNEMO_DISABLE_RMM=True +``` + +Or remove this line from the training script: + +```python +from physicsnemo.utils.memory import unified_gpu_memory ``` > Note - why not make it configurable? We have to set up the shared memory @@ -253,6 +256,50 @@ export DOMINO_DISABLE_RMM=True > been read. So, we enable by default and the opt-out path is via the > environment. +#### Reduced Volume Reads + +The dataset size for volumetric data can be quite substantial - DrivAerML, for +example, has mesh sizes of 160M points per example. Even though the models +do not process all 160M points, in order to down sample dynamically they all +must be read from disk - which can exceed bandwidth and CPU decoding capacity +on nodes with multiple GPUs. + +As a performance enhancement, DoMINO's data pipeline offers a mitigation: instead +of reading an entire volumetric mesh, during preprocessing we _shuffle_ the +volumetric inputs and outputs (in tandem) and subsequent reads choose random +slices of the volumetric data. By default, DoMINO will read about 100x more data +than necessary for the sampling size. This allows the pipeline to still apply +cuts for data inside of the bounding box, and further random sampling to improve +training stability. To enable/disable this parameter, set +`data.volume_sample_from_disk=True` (enable) or `False` (disable) + +> Note - if you volumetric data is not larger than a few million mesh points, +> pre-shuffling and sampling from disk is likely not necessary for you. + +`physicsnemo-curator` supports shuffling the volumetric data during preprocessing. +If, however, you've already preprocessed your data and just want to apply +shuffling, use the script at `src/shuffle_volumetric_curator_output.py` + +The shuffling script will also apply sharding to the output files, which +improves IO performance. So, `zarr>=3.0` is required to use the outputs from +curator. `src/shuffle_volumetric_curator_output.py` is meant to be an example of how +to apply shuffling, so modify and update as you need for your dataset. + +> If you have tensorstore installed (it's in `requirements.txt`), the data reader +> will work equally well with Zarr 2 or Zarr 3 files. + +#### Overall Performance + +DoMINO is a computationally complex and challenging workload. Over the course +of several releases, we have chipped away at performance bottlenecks to speed +up the training and inference time (with `inference_on_stl.py`). Overall +training performance has decreased from about 5 days to just over 4 hours, with +eight H100 GPUs. We hope these optimizations enable you to explore more +parameters and surrogate models; if there is a performance issue you see, +please open an issue on GitHub. + +![Results from DoMINO for RTWT SC demo](../../../../docs/img/domino_perf.png) + ### Training with Physics Losses DoMINO supports enforcing of PDE residuals as soft constraints. This can be used diff --git a/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py b/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py new file mode 100644 index 0000000000..02678d9e61 --- /dev/null +++ b/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py @@ -0,0 +1,176 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import multiprocessing as mp +from functools import partial + +import numpy as np +import shutil + +import zarr +from numcodecs import Blosc + +""" +This script reads each zarr file from a specified directory, and copies the +data to the output directory. For the keys "volume_fields" and "volume_mesh_centers", +the script will apply a permutation (aka shuffle) of those fields in tandem. + +Since the datasets used are often very large, this script also applies +sharding to the output files which is a Zarr3 feature. + +Therefore, zarr >= 3.0 is required. +""" + + +def check_file_completeness(input_file: str, output_file: str) -> bool: + """ + Check if the output file exists and contains all required data from input file. + """ + if not os.path.exists(output_file): + return False + + in_file = zarr.open(input_file, mode="r") + try: + out_file = zarr.open(output_file, mode="r") + except zarr.errors.PathNotFoundError: + print(f"No output, returning False") + return False + + # Check if all keys except 'filename' exist and have same shapes + for key in in_file.keys(): + if key == "filename": + continue + if key not in out_file and key not in out_file.attrs: + print(f"Key {key} not in output, returning False") + return False + if isinstance(in_file[key], zarr.Array): + if key in out_file.attrs: + continue + if in_file[key].shape != out_file[key].shape: + print(f"Key {key} shape mismatch, returning False") + return False + return True + + +def store_array(store, name: str, data: np.ndarray): + # By default, chunk size is 10k points: + chunk_size = (10_000,) + data.shape[1:] + # By default, shard size is 2 million points: + shard_size = (2_000_000,) + data.shape[1:] + + zarr.create_array( + store=store, + name=name, + data=data, + chunks=chunk_size, + shards=shard_size, + compressors="auto", + ) + + +def copy_file_with_shuffled_volume_data(input_file: str, output_file: str): + """ + Copy a file with shuffled volume data, using Zarr v3 sharding for efficient storage. + Only processes if the output file doesn't exist or is incomplete. + """ + file_is_complete = check_file_completeness(input_file, output_file) + if file_is_complete: + print(f"Skipping {output_file} - already complete") + return True + + print(f"Processing {input_file} -> {output_file}") + + # return False + + # if the output folder exists but isn't complete, purge it. + # It's probably an interrupted conversion. + if os.path.exists(output_file): + shutil.rmtree(output_file) + + # return file_is_complete + volume_keys = ["volume_fields", "volume_mesh_centers"] + + in_file = zarr.open(input_file, mode="r") + + # Create store with sharding configuration + store = zarr.storage.LocalStore(output_file) + root = zarr.group(store=store) + + # First copy all non-volume data + for key in in_file.keys(): + if key not in volume_keys: + if key == "filename": + continue + in_data = in_file[key] + if in_data.shape != (): + # For array data, use the same chunks as input but with sharding + store_array(store, key, in_data[:]) + else: + # Store scalar values as attributes + root.attrs[key] = in_data[()] + + # Open and shuffle the volume data + volume_fields = in_file["volume_fields"][:] + volume_mesh_centers = in_file["volume_mesh_centers"][:] + + # Generate a permutation + permutation = np.random.permutation(volume_fields.shape[0]) + + # Shuffle the volume data + shuffled_volume_fields = volume_fields[permutation] + shuffled_volume_mesh_centers = volume_mesh_centers[permutation] + + store_array(store, "volume_fields", shuffled_volume_fields) + store_array(store, "volume_mesh_centers", shuffled_volume_mesh_centers) + + print(f"Processed {output_file} - COMPLETE") + return True + + +def process_file(file: str, top_dir: str, out_dir: str): + """ + Process a single file, creating output directory if needed. + """ + os.makedirs(out_dir, exist_ok=True) + input_path = os.path.join(top_dir, file) + output_path = os.path.join(out_dir, file) + return copy_file_with_shuffled_volume_data(input_path, output_path) + + +def main(): + top_dir = "/lustre/fsw/coreai_modulus_cae/coreya/datasets/domino/val/" + out_dir = "/lustre/fsw/coreai_modulus_cae/coreya/datasets/domino/val_shuffled2/" + + # Get list of files to process + files = os.listdir(top_dir) + files = files[0:2] + + # Create a partial function with fixed directories + process_func = partial(process_file, top_dir=top_dir, out_dir=out_dir) + + # Use multiprocessing to process files in parallel + num_cores = max(1, 64) # Leave one core free + print(f"Processing {len(files)} files using {num_cores} cores") + + with mp.Pool(num_cores) as pool: + results = pool.map(process_func, files) + print(f"Results: {results}") + print(f"Total conversions: {sum(results)}") + + +if __name__ == "__main__": + main() From 97e4354011e6da407505204600a018a55ff5618c Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Tue, 14 Oct 2025 15:59:49 -0500 Subject: [PATCH 84/98] Add domino perf plot --- docs/img/domino_perf.png | Bin 0 -> 59746 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/img/domino_perf.png diff --git a/docs/img/domino_perf.png b/docs/img/domino_perf.png new file mode 100644 index 0000000000000000000000000000000000000000..0038267354894237f05051f3ea65a456ea8a1b6e GIT binary patch literal 59746 zcmeFZcRZE-A3uH&N~A%ejJr~{GLlUcmAz#a$KKnKU8ID}%wr^E6|y;rlD$`UwqtV~ z+xK<4Klk_d|L^bb^?2Nm+fiND`#RVAHJ;;jg{UaWQBg2ZAP@+uhw`#&2n3lF0zvZP zBpLi>SMe|%{wLycU&lq=-ps}QsS^^R_|(P0#@@xo@)?U8(#hG<-j1K=_AMSkE*1+H z7YAn%US8Y(`!{&(oy>W4_bo)>ETZgSNh_WTKED?zL_J^|48XieYqvY=| z%;1j?Wq8%1Zz$#vQv?umu&8Cee*Kz}{WYTv+h8q+){=n@+sVPxuUYztuWCujl1f*y z-6vXLnXZt)uf`vk7h4X?58<(uPPRqy3T8K&Oz{$U;VdbZvnMaX)&Cw+4D9dcss8(f z_+@)S>c0G$8yekC%96aRhqz(~yU-_I<|XAwmIeXtBh zoc!-+dC~uOiU0RSArSv>M&w-?iMl{^y+Vq`SXpfSg=p}6RlX6%CuHn80&zBg z5N)0oKVH*5CHJj2cofTJ3ICuhBV6Kl``sx@udOb*KnAO_-_Qk$jbIn|> zuk7j{b$l+YdNuj78&sC`6&MV+OYGZR7P3oP9<9Rr**RA@&E!3f<5_VUacpGloO|HH zkloVkmE$n^UF*xU`*@KthkB~*9%V`0Pn8QF6Z5?Hx3_;@wjVU|Jw!HMaSpAr87c4j zsEoEN8+A*3dqtS9EAiCz);%0{c`}%}w7{V1$ycks5rpshpC9>;VmSK0n8;PlrFboc zOCH@#9Tu?a%`R`(bRTuqZg4HJ9^ff(onI}}(aL`^6ZQi)U}RtAw%q*nP`}hBe$;1w z*=^}d=|HlTBys22R+~cYxys4<;Nep@lZ=CzP}%!c>eA!R?ZS;Wl#=EH zgb|1j8N@`jGYG^ler$h!ajo}0nZ57&54t+vNWoWy#=+(-vD|iRip62>t7D7@-t|FL zwrdl0Zj&$Ro^++(=av**_z<B%1m^gxyIV0Q%`%2r96*icNoL&c?AbUgz7)n%wQp5wptCosdh#MK*^;APt=RK)7AI}=B~YClk5 z5H!G=(3K%a&a7Q#H~MX)!Z{GX{QE0sy|quTs$9h!F@hyoh4?hW?|O#rz$fWK`WFf7 z1Lck;`27m!xhKVDZ8kqc>0^y7XS14xEwyG}ox66vMwmU#xGuo0U)Q{1<6aMH~gV+`m@kJ zsC1#5nc4RBO*he3s>NLwH6E$_GpxaX)NnzWa!d;oDrnuWQ?w6%-FCdzn;zBsLT!ki zN-U^~THc%&b5OKAju|&7R7ndkaQs25NbwRDJL25odO-t{bSx*fS9k&P z9_98#IeyG5-)5-jX`DBtqY>dp8a@sS-I=Nb;x6-@#=4rM3TGq_);_ne@BB&LlsH(c ze}18p7%@#vXp!Wdte)j3;H)ILK8@%32a9`cUq|&sJbO}Rmpr+hr(1l!lw00ggnE~J zO(0Ns=w*sfUk~EjF_wEqHN(btRxWQfdZ^f3&nI>4@?u6zB2hOir33e8?PyjMw)-`n zTtpB1-h{NLg3ymb* z^>okj)@(%@c;osU#SNt|w9Tpqx(aq_+`3P7D@JuP=6>@jK^3h)cR^Okor%EF)K?6V zK^aT5+__|9RwX%0vDd=xem`?v(BTkbPM_=r1Nu`_c7lX?R#UZEi_d18kR7JPP>@q| z_qE&d?+TN53imEoazx!QWSG#p)BWein^7)Qn-XqYo#dvcVn+`Ca}j0l3%%8+HI30j zk$tmO*nvWK))@h3Ei_dOud46`Z>HWPanV(xeC5msckD*yCDc*&xt0!5HxvVkU8la< z4&T8a=~p;q?pIsgC`}kBv+Cn$!8SkseFHP@v(NpfC}AkCTir@z$y=%Svkv-rW2R-! zLvMR!6dM-b{x(s_-VNKkozwf+PRL>ao7VzQe`Ghqs_)av!MXgWPFfabyVahXBc7V8 zir(!d%&@mYvFld)`5VG0Uu(qvdy_E7VY(IlLT5iRd0a)eqgwp^7(`36`J=-8c3&&5 z{Gjdx+X82FJ9bY2nW?(zFJf<NW0G!;WS38MR6dSV0u5rJ;#IUyc7TIEj8q4|mC`E%$;3<9whS_S&_ zcUP-7^{=V>7OSkL{~V;2JaFiJkN}}ZwXY*s(!WW~Bz9Lu#&oqiU$5+&dY|a~RC%_TE~8nU}G+k;#jz-ue%Q zhDZOdL%Z;ttj!JY)P&T1@y%9#`qK5ZV5ZktZ=vTqYf@0->sPW2_a(gdPV^TTB;Hi) zjN{h3co&;jObn%sV*da#?`3**|hR>23;ASuhsc`tyJ9n>^#>ll@5!6b8y$2 z^itl9mT-okP)kMO7N6oD7Gf>4k}vKomyKVQP=>AcU83I_C1%yj<}N{57TR8%))r}E z@LhGvTwJ*=L9kxpBP3q`_GD4=i(EQq9VL4eL`F<%Itm$>$}D?sEV?E8>@7T9-DD`m zZ2z`isb)g?29i+`Z_c*0d=_^fag-ZeB@_I547u5)_GsK2pX}8}Exy(HYFNM8WBpr< zwjuovq_d0ytWl(+sR%>oELoAP*4&oPjvVskiRweD}y~`Sk>_#govEER}ic5GAi0ir2 z@%I2$r+0H{{$>_r9-vh8 zJ$@^DYI*lu&c$*$T@h5r{G|{>Q9<>5&D_|;<=n&W_2Nei@07J zZQPemEEFoQxTiBn{rl-3zd!{puPRWWiJ|NI(zRrOl(Fz`KBHDaV<6yVK+pd!f!cj# zWT>q?26D(a?A95xOkTax)*5uQ|0d@6u%pj>An(utJwWnsm0A+{X@~uSRLwn!z4>eI zdc|1W6ZU-tw77SyYmzHwOqBWgnsiV{WtR0o!3sY%dEkJ*jqs}~Xk|w|6&sZc^TR59 zhqq83*gkUuew_GZ3UBnbOkr@Tafw( z?W$co%-`%KKmh_K^5QNr5c#;5#OWnH=a5_&ouAS1VzGS!XoK#Yot6hJQ;Q|JFzN$^P8We5>A1)DylH}1XYN+Y2%=Z1h zHd$X^VBh-wwi|$hDn8lfUi!NZsa{lq7mVodEq0`?{@!dC?MQBMscn86+EV>tc>rHB zf0*O_Cf0S9*DDjBkLIi1O6kF4w#TvdX{)|S)`gn$wz|mRg_;#39>#^L)?=RmPuAGZ z)~nuTHAOm1-h~T?aWS|>=e(syt5VR@vI#p(TJ_-4m)>~MiCpYBhx1nd6vHQ3lXXF$AG@iG1=X+Z!F81>6 zXR;FjAE1j-!2#7K(_gX9N<3 z7#K4yDBb<1yxL4CIFVJ%c~(Jaf2bvLXAfIQ64H*a9Q^!r?e_1Fn4>+;MFq=aAFFMP z^26tI$?hXd?%e7H`oivF&eup9j+y#PIfhwAth;ix3n}Ae--)e#BdH4wl;{5$W9zgt z8BC7e3}*IuRW8w1j!VI}@^`q<6Q{XUZyl|5m{FSp(Des!b-g@EVlJ)Z2s%%mas|L| z^%8Ubt%!=5SUot59I| zEe+>m`krhqWB#_W9>d(Mbt=w(6ENM0;twuLklFBmB*swOiGtP_EdcWcwVtl26&iI> zZ?MI67;0aPZMW}`JboU8{{g5vu<>4;bBFkVcWkucZJ&c3g(1o#g+j>2l8X|(g+}At zS)0v}Ucd8X&`*!;g^8||zo)15bRFy8RcIX0JL-PSOO^O8b6^`l6cvvno83M-_ zagXRM)tSBHG)>D?2a>zE#lF)#8UEw)0*GEX$3ziBXQ=zW;7&%7#3Uu7h(LMLqg!k& z=PixZGapnVX(O6)5wA+|Z-vHARwV^o7;|E14ZnCvW$2QgOvgYORhuu!Up(;@e}Lg<=svJkX1ujkrv7Hs_8X0B1!Y;k~&feu^p>|;Z8S-8kzcJ`cYjNKMlTz&FQ%})jQUQ*QI2waF2{UhFp z+@e=qk3O)TjzMe@KczS!3GkUdERNWQ1FH$ovpi5#L}$&SOw{f>}NP$ycuB(+EVWI zlO9W!Oq$K@nvYnSef3r0yMlbW6Bn+PJBm3>V7(M#=DJ_T|Jz2XuMZz9dbBe|;z^6> zGL{xNs@%PBQ;D*w6_Vqq&9nzut?6*7tv}Swxh6kXF`~0I+s&U;w}T{m9`OIJZ~GZu zf>xG1bm_J2vN1fCn*OAj>qLX-a>T{VwGMo3eqzU&#Cg%IB!; ztiGugKD~P+C2L{-QpIpQ$bl$=p5M&WL%FfpZ;iZ2AR;*%@rs!GXhu+ek-B%s3z@B~ zB7nl|?w_=rbrM_kLh8Ic&G4%bbF`QuRC0Vzi^}b56P9+bRkofsJwyKqkMJ_$ zJMTtZ2MnHur}?gF#ouaveoBs{?dXUe~Yp7YBNKWRI3vF}^v zaGrY7`8v`6oM%U*^%dV_I(o=X;Y5hY$f}d zxIso-{Hm%V&La+Iwf_diI8BSY7IGtHS%neR*B3`jO3|R7wK%trb;?ziGE_OHuvMPV zREX#6|D?$|aD2~oMAT*87&wvwY>aB2&X-qMuc@~}0sHLcDV}D%bK1=Qkk+cOpC$_$ z=UyC(=ZUP%G{?NDtVT(C^g7SN7v5ab3Qgh+s<(vDyH>i4EcWJ1733Zr0ucJnla)f0 zLI{RMkv*RgsWc6}9lPVUOPoKmk6Z%y<$U*?eN4}%$d>JZmMeT%+(3t55qoWkHe zIREMT@x-hB`&ode>BHIvznDz@zJbe8!Gy@8=j50?uMaw36TOF@D(O*rQ~7*eSo-k# zmglQrl(DQ66r4)$kO$%x_q;xP^hVvQu{>y0#GIC^N$TbQ;nmjHa_u}$LN(k^r`Ne5 zst<2%m{f*KrhELsP<#x#=&MY_p~1G&k%uB$sE%CPir0|5iHl^{{1lBvr!&d!6{w=G zdZYC{CSMjXzuTLF??0~hP3vZ-3^nQgmu$ND_IG>LDc;*dL%+?EMP2Qhn==6*##%t? zZgUX*?Ou$=jBCz|I6>;gkO1mO3P_V9SYCQ4%3kY|%msEpX04QA<7avuk}AxzjZg5K zLQQTVE$}t z#c1^t>?SpfwVz_(Ap^^R*8`FSp)rq)QMX}RN2-8q-7j*A->2xWcT0Up>9KB4GC)!9 zlWXdgSezSh_7a#lfZADM);Ki7i~#n*paCB>fY<{X+FiP#E|5H!>Mk&iKMbDLWc?Y^ z_SSlHk=x2^d+8Jt*1 zlAQSjxd&KkB)=u}ZAO)V~; z74>?2X9)EqTSOMir-T8l{%Dpmvlo=|tfjCiUMpW7M%}2WThP&!KEnIOQJs4unY{_T;uE)pFGqL>u?}Vim2Q!% zq*5hzjDdx1lO)^3BML3*#=V1Fj{lQ#}B&O zDL_MCtM_vd+y6wQOQm8z`1zmmLQm;|>SWlL+yMHPO66FdXETUtwDHA{*s2pzoyirsVO{EPOp~XI$eKBh_1Ng_E&qv@- zuNGYEMomo`6}FR}kVGIB{Qj1@>m}2g^Aiz5Y>Uh`uY!Hu9Ogx?o?^VxmXF+glt=W^ z+VC-Q0kOXs0F3g3F3bhTuj;D6# z86DRvbE9z-mOGK(T&dM|qr;^m7awJIpR@BF$(gH=dAPyhk(|Bhc0CQPyNr1xdw+@=@zPFNEBpC-z8mRXyYg{M zofS$%&b&KmEEX1P^{c>ZlqA~M4w3o_3dq`u!N0 zcH#U1(JPAsB8)>6!-iE25Pw$*LhgsNzMi=btj71u2lmVxA50l31qw!so$T;^UhHg> z5o6iJnPA|9A9b%ff5VUpg|k+%kMP)5r>965N~HZQ*M^|xX1);KiFc@`f9TV^crp{U zDHD8#s(nR^MLy7h;{3}q#-ENbkLrG;qqZ5YYtXZiY9Ej&Fr0mk>(d^+@2wz!)fJaW zU(O9stekc+y3mF(qA)eZ9ZoH4+r_Bib19`TtbS;YE^ zvG#2^=@svZ{4C~)7x!|D9e6hi>D$EH5e-(((#9X0aaKZ!9;-zfykKcOm|=Ydx*MhV`EiFxc6 z$eh-}-E7>nxEanM$f2$q2hg6iej?Ega^jt~ zB|x+-Ax^uf8sFOpJt#QvS7{ zrdXkgQex%J4gTELl4{C=7iS_GMfba6{E(xhp122G-pO>ze~OKgVx)9ZGfW|~RuB>}BEE}LuZA*X|2zuSpn625sw zD}j{ffcye-WEo{)nOT$dM6>!=D@~z55!FVK!kux+-{bP$8fuo30&JI*(Wj5+ z#9bH{?yXw;rrvxaTw^-nOLfl(os`F!EK#fmvJz#PCIvs!k^hc|1hzwBKdB9yY%;h>J^oWuE#6 zSnH$_52UiC(ahfvs^N#eh3J2yqL+8J{&Eq-wv^PC%sQkIC%Sl+?}Ptlj0s|m)9 zRAx`dpnX5zN%>f?9^FKL``HUBW(l!HKeU;OB#3~j=~}*&M7g z+vbkRtb0tqlN*sBNV>5$`)PF(d1n+j^jH|pRf5eb8uk=sN|rS2YQy$;3@|?4t&Pt? z-0lbAO>`=hU;kqOW;&TTPWP~tkf-W4qkMzB`PRuzelX{~{RvxjjJ7#UeQFC>{VJ(f zj~a65E4A-9K1KvlfAEdAJMWF}Y4WfF7n{;uOQho6~bXfOOqa_%r%Nx)-< zd1)UmlEG70H%>q{7MBv`IS!{|8al0yCIYE{jwr`e#S2)fwYplXD3c7@*Wk2*iY^aL z`cmiWYm4!(_Kg^Hi2EFPXz$f42imUpOKVG+0C{;Et=y<2-J}zk(`*saf8jf(aQykE zqQ0~s?*96=_Rx){2gN}^*D+S-Z)%FM9<5-Geae)Z_(;TDBwc`Wc0ZUqfiyst(JAq5 z`lkHZRrqlk(ZYGOsXK2?6Y}W!f_Lt>PkYaH7hKz#y@vJBW5W!_+vF;d4-@a$@{NRL zVLE~OumK_Y|vg#Ku-OZt>ApcpcH>>#_; z*$}g1FTp=CU!A+|)9GH#VfH6`a)9jn-nKzCsvOhuT97UO=i-wG#bdnHYu~TfW&y|< zqfcPJ@-|@srF#?9+zx?jqmplMKtdO}V*Bq?owQ12GX%Q)Eeaz!CF6Y$08$(~$p@%) zE0tkJCEcd!_(WpTZm22>*vDiH0_ssV9T$|v&hfvIFGL$!<8s%jgKqTrWIuE+F=Scn znvT)wPM!KXU}(KC|LR)Uug>9Vp!UNn|8}I%{dSps=j3lcy=MplWc&&U3FGbwtS`yToa@Cc^1$*2ZWT?ey zDDQBXWZRF|I>t>rBLD5~t7TuZ>i@;0@izeV5!d8n%{yG@72m6xnTcUFx8KGQrEkWe zihvm6`I|dTX-Ya%t|Qgil1-X3VezhhpUk{ixL zbihid%`$H~=Uht0faa8B9OPFZM-gCw7!t)>Y`cGO;t3g}CjBYkaJk?^R6D7*fon`v z0ntjFA&OnbdEO=cTSQr*;l&q1-XEQrcrH^LTxN4U&?Danpza61ewp=@8h{$Ox3l&-Z0pAqop zC`EVD;#teXo5?h-6ldagb5@mk5gD6t5@jJA1E&yDhHPi$`wCt_7DKcN3@AJZKib_Q z>(C&RAv$v`RX}D0LSOt-hf?6s-0G=>%DXk*H5bD z$!a1$@`|BZ6Tw=8e!lT=a4GyBnV=Ja*N>L9(PVAsds)yz^#hotXm)kJ=>EIK( zM-`O29J|)LzIC?7^9Q!LEp-1l+~UoRUrBMm(%SopaSrDmoiZc~C5x$1n_+O z9tu1jxUAo`4)@(Sq-TUm9Le zr5Eb^jwiDR>j<`B-{^v5jStGl`;GuxhEy=w1z@RH#)Lju7JPeSC`9#WQT%MqLJ1l6 zKwL$jN4M;&GAfzobN2+AEOl#l(`w(^C|5Igw!r#wjr1}&oJhUqEK=o3G8s&}TOF3O z!rqp)VT-#foXv$)mPlQ{$3%AVjd`h{NvIkKAdE`N%1a=-0JkXEgC&sjEvIVmHB47I zp3ai$$w=w}3q||(QQSiHOpLW^1y77-m-Pqy`*zpg)G^!_YRX(%@3j={l=U3f;xTvL zjmf0VP0G(DTI}Z>s!YhxGmUx45g}^aOC)NJ1*;5(3>iXu&5it{RE|0Vjy=q0Hglyu z@pQH8j_vniY(7lQE*BnO3Kq2wV!G`8z(r}Z-GA>nhl&fqF(UOLY!@Vl<*@i7Hkaq= z$y3z5p2NNpJ(Ys1Zx2_UhZdSK+<}TR=V@^CNcalaW<8-`*u$ApyzqQ2UQshXQ zL{^@3Y%TPRdR`k^sFH5{f%Q&zPM0X7IJt0`&vlb~iZ|=SKJDPi!zI+XE(d3J#QEHZ%0Or?meNxzqen{bxq0ru6xk^4E>Pu#K*7?wMF7i)1Z~ zWQ;Go=g`TT>&3aW8ImBTO(=jJ_pAQ$eAh`);1yO4lvXLw#z{brr`%c^;$K^`iEg!M{R(Eu zW|iPWa;`ypZ|e?D6`6OW{B;!F-RhL7mnZW27WFRCJeR24vPwy={{q-_QNVgaBs!RC3J6&{)vyMoidRHif)IGc zWEbQN5)h;*qPcYWmMSS|IcV-Km-G<7q2oO>9R!Z6pNA>U1%F*oq4XEBp-?us4uFy5 zmfOouC5ahbYTx)iNKE7F5h8d;=nzBu94c5+nJc!TII81kHVIy^&c?U%47R zpi`J-xanc!Ww13Ce!bs|d&@aGYV%>9>&py$vmX)3cb@{66iYBH?pXJ8-2&SoS0Nq3PeY0fsP|nzvy4yMrF-WrVvEdLtS=T@`FPDlM2K@_ zq{r8DL|(F zj$kvd+&o3Cu;RsL&eQklX%oKaebQi$L#6>hahI^FaezTGCJy-=Cw z^HX5optvI@C)4J9OjUQQ<8to6-Mh4w{a>4RJl{?UUVGAb^GT^1Uw5cwjc*Ba^Na2| zDgMTo-3VsO2bE)m36q7bXJed2XWyIX2i-5)uIlvEE8eczj5n0?{#ngckk?J=g(PM# zmK*=lOwr?R18ks^{`p()qDeOh?y>I2B1Ve`Io|4b;ta=KxHn@Je9IR0@1r@9W%GwL z+??!8%}J`d8p!zhnkSi$Xx*8#$iSVx@K?3~ClVa;q6FamzvKUhv7F*2ip0dwdIlaJoFs4_T%GPZ{CG z#v3mCpC8{Z8hkFI`+wf>KI3r*N99nd?eO<8JZO}I4Ivk7XItY-9VYJq!JK?LKqF75 zD@=Iy62bJf453i;{UyB;iJwLt{pEjeW%qZgbl_6Fd5Yrzwea*=>~L9OUygdC&(0+I zz_(p^W@X^pYe+#Yvg3;Z?S_n*BBZVmxD*dc6#{dO&mF7Y zUr0Z;lj%$0z+vhF;b{wW#>yP{OO`?Cd&@a|0ukZy-yA^128f{6R15;YxH5>s;F%qCzd9qV8`}VJE;U{_?Y^JB_Lb@q( zSpeEG#Re2IIOLfDETY>|#6_(Fods3!U`_giu%$<^+&t;W@)0QB3L2HRpiYI8eLWia zErHY$gKc3JgP+rE!QI^VqZ*vgOP}k{6cc>Lkp9lsEc`ty3-j+{^Kh_n{vh_3D#%x- z>dMsl^2|=Gfgk$NR~PuT^K>R-D*&~8AEQS6(%V8vD*yrsFaT3&-7tZs8U+il79?~^ z=|~V`ejSjJAJN6^j3od|U%`yp55BJGeAnFFUFg?Ga8ScXIW)4*Ru@7cxntIPX&=mHw$s1f zt+*$PxE6uMN(aRD(PjP)v7W{Ld@NIy_r4p!Pwwnp4Ud$b=xoC@n)|lLnz8mQNA#no zqaIUlwQ5kVqQ?iDqpLr|7_nxu`oKL_REmR-ysZ!SH|qR)@n8V|9ZJt{18y2Sv3tl* zz*U-pIU#)%42lJnKL9%r5apW+9%Q)9?~*2Q!VYnQuj40cHCFApR>>z)bWdD4=KdeWEe+vZ`Q8!zw}X#kIA-iR5(Had9$=2)3kl*3 zS3>Hw0b7tJ>X+dTZ`kNX#_XjZK<3<%lm0ziGRS6VK4y?SnOtfnfrDW|U5G~KunlI_yC@3E^SiN933 z(5YX}C^RRnH+UnoQlYnBC6y8wdMqxH zaL1OxaR}%VF%N?p$#$_#1#D-UOlb-oB0{`L@Lp)it*75#OyF`0Dx1MX9hr;Oe$WK2 zd4tU&f;*nDr;C)p$U}#hBe*%1wukM@w2i#`0Ipa!ZOHWl(P^{4y<8ysH(ZseZt<~& zy3CRuT~bkAVTm!kiCkclB3RhVQ;XCNJPAcDf@A9J;({YsVD29>?}Ly_VOR()(pu$D zn%bQ|r?s!Q|i@N32K>Tb2S5+Cbn9%A8pnMqMvnj~Y?cc|U5Kb_J z;?xHXy@6JcMy?jOm166X8a?CT`Yq#cew0-0Tfwc2?juhLR^r^ooEs+Ki%&=f(%muN z9m>VZpDw9X3$0+Dp-lh#nf+D{>x7I7(PHLhAjNT}V0Eatk(BGyHfz~=)<=W?1YdbZ z7-t|FhAEZ`Uy4%!2zj%yaSP#HJt{=Ro<^7zIJNrA?2Y35nVOP>o%y1RH)bT-U{Cq--OghT8LepaTu50^Fno|O1u5G%&KsF2R(5Wu28?C8@O{3ucO^L=ck4K zz2C+y3tFl(2v|%6oU43Q#3xJ3>SFX^ zH6ETr<>CdkEG0(Bh<1YyjiHXjRrvr|8~X4tN*_t7nXMAr#93JV=SC=@PmSAVlDA7R zCBLrbk9d1B;_*t=s)RP`5zR@}1amucAGXiut?v|FZ6O$rXFh)b4sjbC^roC7vh zK&VxfMQ(_h;(5cP4ASm*wa;h{R9*|wv*aM5TJY428v%b8`Sxg~CshL`n4D^sI1ntZ z`e0$>A(;l9{3|e^I$*Qo8##uEDECRD(6I=}KsjG=ZoOT%Py4>pJ!AxoS9hSS3|wnC z7SB-o@+3HbrBd1dErFT-eG#{(65F`5J~g_L0(EQXOCuAO_;vzI&QVT+$B7T3ZwujO z>u_sr%OTxTV!sg51)a<%+{Zk#s>XmRj#GR${Kr=1qpwfQu44;#S^2P4Y=M8Z4$(=Z z)%y_r^zRYvxEoAgFjf=tzAP_l)0OMA? zjG<7u!GP4?Q5+vAWW7FzIBjhvJ>OvtsSPmMRqP+oM07SVD;oDUDB)J`g2^ZU!CMAG ze%*)XIy$v=ToZ7Sqh=J?p^2%!)lFzI9HKY~9I)u#e8#BkXMk^28`_4hA>i&Q_sOIP z&_^v~_UCHz#y#u?y@SN3)sN&QrYpk2)Q_bR8lyQgakj`@jUF(E4hrrleIN`5{H=L% zVOJIcuIj#3u}0jwdCm$au>%d?GAzPf@umO4B}A9`L-apnUFvU+Le!bh9?E?5y+Xu7 zZ)sLwHenUSeXLI+b@Z_|6$)N$VrovL7KPco*FEnQng~$OgIj1qdj0fUtcYizU@b%A z^%>mZ701Xe-%ztc&R^+>2S6N-*h(r^=0VtgbmbWABFqUqY42pA-|T5p?~eZy&6lmvrNd+R>7(QX2%Oo~t4 zsFRR64T@y^U69`Mk$qBC97+udMy&@X2zkl}$al7nu4#74Sbks>adC)eZi<{a%b*JV z5r+HSyf=cS&tdG}N}LmhI9Y@Qmmkm07oS5E-ue5BQw8sIiq?eq;~?=|x6y;k{XNK; zh|Zc0!F+;2r%c-q2-!J4$qkX6G_$8ujp6Q8jepW?H*>*1$pn4mzXuEbIfQg+0J9a( z$BE#C!21+F&mp-Kym%eklGDOd)k>Ju(cSEWUyMh#z_Qw&`S((CenL%BIHllv#V z^)%8a&!DQGRPu&mLi7}@BwNV#cduRfBv~9QA3*f?;RLM)#$F`Nwj1FaVy+g4Fyu6> z_6RGb{ok!bJRyLph=W)j0|rVTk1|LIS2i6*-B&_U$G~GGq-8aN&(apWw_$tl!D!uI zP`hVUW;6NyWxZJr&?P+(#dCIOm+psdK-2#m(fZ$crBMGM0^SouihU3Y0?G~toUfie zDO8A$PavP&8C|QBlC5Of5OO{Qk?5_ zjJ_yH$b?zV0?BreT=i4ijb;^*!y2*60jGIB15Y#rx`fT{KYpJ&A|+&+p#hV(GxwW+ z53Nhw${rx<*#NcOHgM34c-|0_yeTZ~OZ3e@9djC2W(r+EJ}_n>V^U4Y$sgu34`g4|h~EI^-eYCkO%5_bFFEyvPJPVC3!0nJQ4aBcvXpdJ$t z1sI^b;{%w}iej# z(XjA_xArq38O6AY%%=wGkB4W3iEyl>2tm}EH;f7Xss#h^>_hu=|1RLe9X=@p%Nc-6 z;mJ^;>YN?}o%97JcSRm!%K=&PZNWPN{vHwTzYs)zcg3K3Y!4XuR$!sxmtMB7zTJXV zxv=$^w??2vmj#GwH^tf*>2yTsBXLIGgLT~%^o6DX($h%G##bvHdAvi z=gT&1D?+ArP&Dt`s)4>lEA8IPE8>{#heLvkA?Z@;06+D$8sEt=S&~O?8y)KQs^}|X zVn#F0n?bwN)-SOjs8{Xr9q&DtxP|l$OH^|20o3|M zT|>IR_aWsY4OXgWDwuGHV-b%k09sl_rcE~axzJ=dl1ueU8N}Z(^2TWn?h8r*8pIDT zWTBb1k9IfbhT)lB-j*Qc7q~SJPA;&JseP1cBl!P;!c3)QQ~7qd0$^+p$r2M~h7kyNdB zYI@9T^(zocX64G_NW$pjKpafG5RUkB@Bl9fY#PR&N&$3>^fI83w_q6_KP#RDo9S23 z8OFteI9{VM9FwY+?7OS?^^K=MAN@+OhzXivHkvt?fwgHWX(!ATlX6$Hv_tB@Qf&`J za6d4%P&-2QbC85>=B`Ii#_(;RgP@s#j4ot7URwh|wT+aRFk22-rY0Xs0LSA8wQ<;s8g})CPQy6~lx&=1)RiiMz<5@vDXm zH44(R6=9^e*Y@AA`xQ=cLV)U$)D4CcnnMh247}Ylbe7(is3HJji6FtogD za8*gSImqKr$jyMVIUtT;I$a?}99^Ucj4*?H4SW>^Vj31X*J^laU&ZsAhgIdz#MGS) zX%^Yt57#FQP)G`tuNajPGX;`UpBdANxR0!gmc9*q2_4%xQn{_9YY##l%3P1Zr2mec z{{ht$PPf}!KK?4qi#ubqJvm9W{_62dE(pe<~>U6Mt_6CaM|4pIFs$~r+G~&9F<6({gIL48(j%n_Zo>xcFwz#e=5zRsUUYfQxll7>Eqx zET(-_$BK-3)$GK6nKcq>t1cwJYe4A{jtHZ_A%D=82W&@-c}E7cr%yb3CqynD^EFGU z_(pihVolOhrK(##*$0_}b?G86PH>%1?5b)wHSe2yL?>|_ziiXunTLiXk;d7QujgPA zCbUusdDb&t=r^zUn6f`L=>xGN7WM1G3y=Sehlm2;YNaR*9|XxJ4x!Z{k;W_!72ovG zYs)t|W!swX^UwNS-~+FYWoQgTS*0=%Lw_k5Gz;B2;(($&_aB3NExmH4>Yj+pG zN5qU|pceO(Y5>KmUno--yF(NNSfnSUBPWcNpO?mHnIA(WK$N zAARlA)tw{-rNZyGeK};UsQx(mMFYslKi)?y;Q?o1Q1f+p-f_jDf4UBVy%}&i`ni1i zBG>1USy1B|#t%fhk9NZ=_Iw-a$CYOw(Y4Yb*5CAZijN)sdnt%}=s^)IL+mNjRhqVKC#x-3jgJO}qDyK1^@0Z*JJVfazM#d7?6%+(g|NmSy_{nJMt z;1hnIR^cVantLD});YlCB01&BJdL!x^YJwc{$_n5|OeO+cSej$jAC5<43boIo+qPj@g_! zu8o0`LReMH+w+!b`}E4ryVYE6|CXePPk%QHa9JCwCwYT2%2Le2k!9V*VMBFu=^ zdqeJTaqxnzd3&JDtc_t90Ca=v^;{fwpz&+c@g)JfkwG~5`$EGSkw)z4(GHI>^7U<> ze?iADWy0yNtkmwWOyCIh8@j5v8CQ!N#{_#gv(HZ2X&(W2B|!Zl%&Rnau1*5p2(M^| zKL834aLkHbMo)YKTD_9Y0BFud&CV`BGIKn<{Xr$|o*T?F>+p43&*F%NfRzM# zhke!rMrNqJz`3>r$e;+YqG@>{$hK}P6?0_v$}qDuyZ1-;-!Gj7L(p{`QSgF*JgCYm zdqDPE5+=$~VrCe9lnr8$fxeE5o?UN>@E!tipI}Mtu7W?!YlLr+*$L91S>^F>>q1xmv%I<2 zpWIRy5`;1tw5wjr9-lUTE1@4KUp1ALK$uYaeU*YJZ2_dVH3%&Fd`m{-mDfnjLw=-h?~+J@ot^&iG6%c+59noATQjQLLt7L@;?(1VxE ze4K#^tZ6u8O{S8DkBm(e&GRS~n$Vzm6h)d#1C9Hwl%5_rW2wl&&W#~#PQmhN z00n43l^*|6O1Y>MT(S%84}{8TG3d|g5oZ5-OLkfx)p~d9ZY0^MsN5RG>a@Qv+D^B8 zjqN}@9&jUF&4_OGBu{fE&KTld`XI4eUP~%gFIM7@v1C)sMA1p=ES5SY zK<82Ap?cRBtsD{}&m`-h6P@nJ(|Z>*rbqVrF*&yq_X>i*vj3rvjPZoxssxy)U##=0 z0YQaN?W7ynyQ~H-HER{C?D_Yt`gGx$^P05woFpldY1IvM0$606RHqXe$+G)Z#rtDS z@P>T;TDrrw3d3g^Gy_X<5@FdKW1 zcHn=dNMLJaWhje>s<=}*98ElDQPQ{c#Estzd2Et!;AO6$$|4i!cg$sUj8Xz(pR+2L zhRJ&vgj364P4ARDNn1%KUbi6^dJy5VO{>RtusWv_{|f>G2Cs;CtA_a2hb56taIUhR zGfR9$&al~jaUvcAwJ%PsPyd`#KrR#4K`HTpTgYaLyDT^U7ruB6rNeK@*y(-ki?5U{ zCzc95zx_TJoo--j7ZFenOgaM~q+K}kAaAaU?i}Mu845?jH~;g$4cZ`wBTja18QXQT zs!2s)f+RYS|ICM5>^sZ}gA8+_swGUErYLHh!}iXX?v@O*ZA=syRZ~<}AQd-0Lh%0d zOu7xU%}*DwySoGd8DohS-rM(${s0KIKAb-`!C^;kYYI4_Dvbqdm1gr(K~V9z+eS!O zNsuH(oHK%a|8OCbbUl{n)2>r=bI%D!Y02fT>P0@SHzP*$eu9e1m%F!t92^gTQIxa* z8cwMpgp(9bP9tn{yL>lsn;-RvNI!!3HIgC#8P)>yGmIuiVfGNiW=?-`Q2=rlve%-& zzbVf(k7%yP-qJGhIPRW19Nf5%>(R)px%k`_HHYvYE1YkQ0C!tP2~tHz_hxGNgcj_# zra?&CmrIUky!GluOnss70tC=Y_>l^2&|->mjb#9>|~YT^xt(?v``##v-W0 z|6#$ z*$j~f#i~Bt0*b4K3rfk{o0Yx~s-z=0eH}%MF0bu8((86r@%3WLH-h{z8pR|jcSqw- z01HknU(DL}Rx-dbSV%jgg0Q)QpkJE^D5e=GaMs#rx+&e_N@M}SU>n1(>R_Rkz$$u{#jn0awlDZ?l4h+{+b~)%fyo;gX)2IWow{A zxMB*u!PtBDbB4v7Lx2c_rUzVSlL_RO!zK%M@-Y!B#5G*oclkq{S>>BCB$UJp%F5YM z$2{xT)^Sa~vTXs24t95j865dB7lULjQ_~>6b8m9UNe$?^-JFR_su>+rmmu;g9@}5T zmrKm&CeUl#2Nu{j>l#Wcot^EtFIErNSMuHd4sD5DU10kVF)*N4#U&ORr939Ea`7gw z4AWZ`GRFa>-pG|^D^@hV$tSoug~@w%4DMO6uydQsC6W7Oei(98Xqkn&ChKb@p#eWPb4<7Bos=drNeQ#3LF_jI=^ema0S4tuIxinv-zwv*%pzT4SF{NPPVfz6zD~mQbGyK;8V3sH+w+E5hBX%zbwqcz zBF@8_;ii~HtAtjMiXKt%^-rJ%HbPQ#J8KG`-r@2}-!z*M!ayoU`wq5SbVKN>`C#Ptl?rl7f<@uXIT>=1QKN|nG1}u_iSi~Qtj4Xrw z>pm=^8{<8HbiqcEq&UN^3|wQc{|RBM213l^M}7s=TWa5zXXe zFHwkm$~L0Noxj}0TZCIhRbX$lm#6!VcD_e)`8SbeW;UTsnLvQ{bcN``6xEkeE z@F(Wu>(-UNs*oXhYBS|p##62mz1f**d${Oi#&{`0vTr|mrNJn;vrD}CMmW@oaoT57 zmV6Ts{1He&uTAc!$Ktztt-C*2VNEQx5G*^qj`u#FWoVV=eZIk@t3&!%bc!~KCUmN0 z*P%mk&9_1JuU1k{v;XlaDr%xYWnY=2cT=KEM8KC<5Z&ohEBMX^``b_x zJ$iieUsH&DP1Up{o5& zC7#35&`n6!3*kS}+IKPbxXC-0Y=`6~j0Q;7x|DJ(B0nJBNK#)Ry z{H6#1u_ijxLG=&q8Z>}FRdPV+iwKHk2HkUrVb#Ox=pMSy`x-4_OXDq?Gj`#GEx{Q>)KrnqW zCFULhE4g`wl{lTacAqHZO1O$m>z+P2@@tiSCf4@S69&=mYJiEb+lz~_olCtO$om?N zLc6Z&G>E`O4EWf+&_&FNQ4SNJhSuds>c8`j%9WX!nY38H+xSj4)h7#ljYzXA0VKdw zHjffD1eD_INB{1%Xf-7UjM=SuuCo~;^XUv#gvosy#4}e0*`xJ6hQ27x&`Mrvp1_K;39ia=hrsZr|M}wX2ViT^Z8=FhbxQAxWZBwVns31!%t2pp2D*Rs37li|#MxzupBFMRL&_S$Q zwcii(9j`%=o`OW+9SzF*UuoQ6AdnR8|HRLm%WRE3ze#JgwNHJ-%0$h<&g?g$wPS z?zYzGXfJ#10&>`%(zC>F?)-Qx&;jADP=jdhqsZBG zo*$MxtRixPhGUe4NUqF~5`&|o8-I-WaO=;+Jcg6WMt@Os88{HO8tzxdLj(iW>Z#EC zgH^5Ly3``gA;(i;I?r0Z7?8iu0VDownHxX4^LN3%X}s}6*juk8aZ$ziuQEd5PDqr_ z;68qF%P3JV0>0uTx?>U1CF2CCtAFlP9i}+|zmE7eZx+5z;jfh(xCP z5xeXsB~S_{_$qxpXcU?9q&-PMrJIm#4_)Hpg^Vny7A(6hRperb#QatzP!0~CVY>bPHhU9fG1@P@L^wtNgpHR0-Rn=(IeM8}mu3we%Lu13cNo$J zVq|bju7e?m@!%W)%oYCd%a9$eKpBEbn@*Y5g;QMJko zx*B^0I#IE|_SWF}b?-MjWp}xP+Bwk>gao5 zl)b{yafqhEHiuk0_~h?D3K$AIVXpZ0um=BJ%#i0o!Zi2Ce}2)_|NQ@TnO^laN+~=> z=K7+C!q@>PMqbP%%nj)x!f73}rw)3oH_yoD1o{5enZS%s;5Tg* zZ-VNi|0h`ZzD3+XyOThncI2zhvcqD^Z30ejORPH6%Hq7vvuXiA5s zY#&qFJWu21#Ys-#%0b6=|~{+dV$)XL65f16JPphSUV3aAfOFTBAHRWMgP z!48Z!72&A?5QY#leLc}OqU$JWHNP^?mn2lVX(bP~H6iB2!WGgU5V{s(WQ=8djH`1r zP42xuTuOhf7ElgSKx#cpvYvk?y5x|4%yy>>Cz3PN*ZpG!9?Pi$GLX;juMs=&i-Z~~ z4HVQu1-QE<-mb;=%c(foS`?tWTU_B6m%U_=D1zX2)NxUo9K#nLTgkm>6WmF}_9yey zEM@X(^N{@3LoFh*-3g95)Q)vj$rB@1G~9tJv!{m3%#c*9>yJnGKz5!%(Y^dONmBLv z2YhFHVG=l!WB{;|ao>dCrIyc@{0?jnJ-O8R1(9n#0`LCmzg>k-(IQLmDng+U;}OE$lrfh05C9n* z*f(KnMv}8=kLDQgr`wq^W7M)a7-z0=h6RpJdt@bzPbHnG;rI#oefx3#@11Z}viZ4I ziMbOdsVTT?S;)fLTJeV;{UC(fV!#FW?@H{-xb%qR+w4<{lpYwrB>;A94&?cQWFWEd z>D6z`*cF$p_&C2$>>z+U$zfXa@zRO_q`_5aBcV}~liMAsm3p~ziTGag=8H&stdmoT zm->3CYZ*QF{n$w(ikiqzP)Ih(KfueH-(9~q4TW0P!5W@qNM;l%C?K*`GUS#6JM!`sP+G;Js$$nl!oOf^4tCqf68u`RQd4N56WnF1Tbp^C0vvqRSu5yP&`9MBx$zoN*2v<9f$lkG5-Y^fba%dF1< zm;I-;E%WKj4{)N+)6+W}nL+y8Y^48J_R9SNGUNOe1(z)qRo-Abh>ane%syCR@#gOb zlEpb;@$0N`4J;+DhuVAOGL)snLuoz&^sALDBBjWZ<7YLyfK^yGMVLK+t9Qo3ABjs| zT#r#+viH*vF4!%JLj<{jSAgh4*7;|FO*^ZkIQ?>SR@bNn$OG|HNp=@x^e zIdX!RL6ol|v^+tYRxY3CiGjraXJG!tg_*<3lk=K`OU?QBbvR&s+?ZA`sLVW+d$ zPZ<#1-a#&((j0!~UrMKMU%W9}a+Om_Pjo|I{BPHw)%ABq8qPKd#j^z^9na~#r^sN) z$K0(XcTALYHtN?bys*~aKdqg024l5@WRet4x^((3bCl59#31oIuI{l3d)@5i_WqJZp z)(4@yPNffLVm>}F4Er%aG%4>Ur;EfPho6GEgBL$(RvpX=c_c_#ed?m#k;n|{J{J!^b{(zW$o z2YS@b+JvOMdKNNe-xHgjC3XKkJmv}loz)>24i(@qQpky9&zSx*?7J)^E ztt1^siUjb4pMN&9ms-WTO=J62pJ;;V7uz+djy&W!}=B+$1q(#SAXnMh|nL>mVh$F3^jS&=^v0SawIl8 zM0};E$R1uM)Up55&2bgV*Gz!wl1G~;H*L^E zY4qo?IZ_7F)v$Pd0O|Mmq_~WIF=L+%i^YfQuHvp|Mw=5r-$sqYEu%&G`l7=#cOF&T zp(ao=ttJggk*|wG8GSF#s{K?IH>A*ev8orJg^O7_**-m}43cpuNU2?x^EAHlt}stq zBGO$YxT;OD!AWouoo?iCDr06U%i&$I`w>8F%T1+r%^0LiojMdMmEi*Rtsv zm{j9#=}Me)QRS()4917cP~fk~?yqP&a=>_o(h2EDxbP&Fz=zyc%XM2SgT{CCI2(Us z`IQ`LK(LbR9!QdpXN{i{sMz(zB=omB-~vp3pfCrG@71)QMf%AteeA8tET3%ij$3bXZo5)^4n2}; zOUpef2`Zm^6PQr^Ll7ucWY#T8;$ zB0IaZ6;(yQ<6RCh?ULUWX4+6AO#UUaZb&LfOdAvSU;p6TT^DbG7J~%5?S+tx$XNrz zU~6SqhCWIV1f-e>S|xkd;;i+Fnsq66j%)U~Rw9GV-W6+jKsEYfTOGUnV$x#7!2J<$ zmQEhzzs*8_a=h7!4y@A1_mc0S{~4rZo`~vV(^W0n7aaBdKvUCb#W^0=yv>-9eb>~% zQMs|}4f_z9W58OlZxDU+?UtZ+MIV8(h`ZOf@4_&x-`gZxeNrbnud=m`v{VjewLTKa z>vVnQG&LaB!qxZR-$%v#%XzDk7_P!5l=rGu{uA|h4{7yinG-3U)7RHZ3O6RrIMlZ_l-?R5@#~)e{T~!CkV<>(=Z?tdCTrF}fmm|f^*jHcN}$&p zf_3O99Z%z>PK>PKeta=9YUN_OwNUJs;#5g@M)E+QlqMvSra;<~ShmO+c6D|Sz2eRR zZc>|Y{&4p1&j(@wRQ1mWl5s3;vw#N2AlPoeA+|bAC1pl;-_W~NEgkz#sY6!6zFWoU zDo#%`FeqaHSF_CyI-WK>GJ}T9%tNzRyHCi2aQfxGDf9g~rmFRN_qJ^*lwK}nyRrRZ zg_yRz&&IkieyaS$nJmf4*vxnZLp zvU6ND26nIVm4CK<`e01mk*4k7O7fhM+9}mVHpp6I#tSKBMa*1X!oR;V@OxmxP1(Tr z_%(&%y}hz6urx{0)8i*;;yLJrd$Z%)?77xG=g@66hKPRQKjte)6th{?O1mm%+e3Jw zTpowc3Yu&T>3!kZ%TREUHGF2gM*V(iMjLu}iC!sdbklU}6`A3?JexcDdJaeGi`KVc zGE;ESe~)fdqdd5`w%$8^N}+4NTeIyumZRzjF(Ok61DWWN-O~L%;PUGRf!3c#XjlR> z86uyfX34E?0}L;zzwt|qvd_~D1iqi%)FUEv4U3Y zjlr@3yhPXaNWvk>X*=;!y~af)G~AkB_!x)C9Dos)Q&*@LA&b+N3unANyH)Y;kmV~> z2Vg<3XsmA+;EobX-AeDMF-^ua)Q|Exz3UkbWuU=uq1efDS_heoW{C?~W>!4Kf9X2z zVwRc{)$H|9=4-#x*&Sn(t#TaYnn-hN%B2VEQ|@+xz3|Cc$(_zGGgPB?j?O|5?rkrW zeK{qYh1PzOQ4+93nxwsKd(F<8?}1NWYY6M=$x0r9PWIEuo=UF!r0U3?L1u*we{oBh4s$Go0?Y?`m~S&2e86 z+7#*!$)?t5P~GoKT2o~cw9+9DzmEGTyouW-pB%0S=6v}Z*HF>vkw_Oe><)8$Go`*x8Swur&;uEC{@xot;FpDLFKPUpXj z@yB$P3{&Aq+S^iJ$b2zDrbrci-`dVtwNoAXW?oY1S;Z*UHrO@b)Nj#|J(ZEGcRa(W z=A#1pY?2GkXPZ>L)wY#fH4HIV^H)pLNv52Pk=jJP7tNuwW@04@lDovjPvx!^yT1K( zKK=G{xcG6`2pACGOmR%~bb13=DRVY2^^T)=l-lUU~B&9PQ3R#WV%R^_!%a=vnRi^TMLl^$+#eNuAuk!UncaFUjF)nc9K z5HJ&AK0CjKxrHUIVcSqKd9>jK!_FwxOS)&4cpDdVF`br7H*Vm|SQqD`oui_A{28ya zCu>VyCukcRyYpgY7m#_!{qzkfNk@G>Ei3mpj}oy+Ozes1C4ZCdDC*S;z%CQm+vF%bC@Ff-lr)FsVo?djnXG$lH zFL%Qm&f|C1PF1kqy4?HogI@TXI}#!J8*jT3?}y#lb4rM>lnESfA_2Nfn6og5prz*LL6w1B? z(J|sJ6czazRvW8woY691sQH6b`p!I?9T4(w)NL7M8WNGLUOtR(ZJT$7JA%j_T#`%G zY5ZJ}wrv&j9d_qx?L%=wI;X;Q(R)@9)q_D#YG&XZ^GScV?Z+4Dx;n*(o1f5}a5v#o z15p)R(K+P=@czlBK#1kY&r;7t8;Apcf@~lx2PU;Wi&5S-_aVD!0>1L(l6c@tWjzB_ z2@m@p0lE~tzzfZiRgZV|pKG{Fd#pR;>zuy)hDQa_m$%rR%4x}Quvs~_Mn**6aP2Ps z!9fx|EhgiC-V7V4H22z1`gD-y6T6kYPBQ17PlTO$_Xf2hVX>Uh<CoG z&b+!M5AdjY3~1Ldg>yT_AyS6BHfr~<AS{8xq&^H{etiq>g3FBGqt2)k>awC}Nb9dJoPG4FqikO)tzx+fmdY zH6iqCcZ9L6{6@0x-O+!URTTrOm1*oYErqAGBASjhcNzWn{Lf2W=m}>fOiw7Fx^g>2 zjsD4O(80u!~N?|`)hi4=>t<@o|*s-$CqS=n5_9aG`+8^ol|v=rMXg0PnT z84UX)4t0I8_#*D;+y5s5CBU0PKTn|Ws1RC{_kiuN%3r~`rADC~+O=`6l?;FJe|xg3 z8!eOtJdUd<+$Z)~P^(8b6+k=1hv}Bjv>i1CpzqVlMk!_;$mx7iH1|o$r`lyLD1M(& zub5(3X9Bt3@JkJV;g4I!ubjx=!JiGtZz{Jzp%d73N@j7+9+}qjn%n%mL~XLr{5uAS zajWAQe57q_)!*BUbLhL@tF<+hVRbyi=9Asv61S1x5Z-@|yyJX}+$*%TbnN={T=nm3 zw0^HzOyf6V`?Tus@t@nDmshawwRhjq4 z0P1S*p@|IeKf}#z8UH|cOJMG>a9n7X%;dBs+N--p>hrOn_QHlOBxf_b5J#-ATw zBHfakkKE#Mv|;X?Qh`v?Myl_;IT_2AbO2s&j zR}n?o@;K681{ac$BBzD#Cu2=!LCYAT&n&gSC0h%kC{wul?!mxM;yX~dL|QhlZd$V+ z$ayll(bc_HZZw6KaOt(Cg?EtWS;W1}mXs&hXd-lAH>X$oNtVgXf(M z;#Kg@x98-qVfsHg0Aph{utG=^%4;!QG zh1bi|Etc_D!QRvU@c~GhP0CY3%NzM$bYA$44tD%TK*+N)D9Q8z1(en9$0(L>oCD|P zb^w0wFAa?!PYoodua&|*v3|-G#5>u@7_9HY2C4Ran%Tq>iWn~8J8R+2v`j3#dGQXy zKQcEU+4=6imcd;D8Z2T>rH%;P2f3Kt!4-k2=t}>u4_;3*)8pp|M?~z6aHqOTFQA`R zO>WvJR=Lhm4$LlUr;qBfoLF~9pe9Y{Cqo1;qf7onYfNXk$pK|kLJ76A>p%D#b{YU zw77k=i_uEQ%w&y)D~njZi2E2ZLL%7-`xGB!;iMCp%%{V>dAeG7!)zH4qrJ$CEOgp~$mh?#5#{tu&XSdRTAFW*IY0Wy#6$LA*|u&78N zCWHt~F-YpsNJ1KQo`{*&@5IdP`IpHY!o6l87&8Wk6%w~m@3K4Cm(ysC3q)`KM>G^# zQqt!+eu`Vi!DTUQmgpwX!uHf2#33cCqdJZxya@vZ4Iu=U(rbc6CG|%}WD_k<2^sQA zK%?v)3>A>R4QB;nG630M6O1m2305k)-xqVeG$Pb)Q?64wna;Va$z}dcD6H2R{dmR* zqlyv3+B?JHNMTQmqF~iyhGU9kxI{)vsOo7IX4yJSjHT^q%gbBt*Jb56b#vJ&*4%%~ zpPbPC^~7hR_gA9rFJ&j-ZSWaYNCqk9q4Nf{kUa!DO~v=OnBmZ>Mc*U);-T7w?Mqw+ zc#drcEJ<9mh~U5(PHrqWIPdeV6>%6gB#~RI9B0(9sn)_XlHY?*tS0`-y)m$_(3yoblC|CB!grK6Ccg!PG9pK@|G2BD~z}iOUWSWGyCJ28U?X5ol<2T5~6DiyDHFPKRA%uOb-H%0KfMOiN zzp7)~I=jafF5B-TR(LeFxE8T#!$AT!fF`{&BUecw*KY-qC<(C+dHw643r*A#99Jll z#Q&I_{X{cY)=XT0bau+torYb3Nn=Vhan~yi;5Q&S3(0y>bwHYIG=^Vm!c#JZwlnUg zD&ig)l>{*A z8o)1OkgWT(YA?K&^%jmJrSHG7=(CnD{3X3FtNOy9x%G_{apni_^qr^_Ztup%4 z0)^x%tG}>LtViY8N8}S|9sNr&-L&w65-)@Y%o7YEGo^`%9Ah4A-Jh=(S}Wyn0SDI@ zF-OFf>4VXBBBp*fLE2#LmWHX1)E~DFnPo9NV;B~g`RuyPTRMq=N-MC?a*QtWKqO<( zpVGezz)1?utwtpBOkWpNMpO$3RKPZu_B z#Orhn8lXmqJzUGQq0dAQEo`rjZP=egJlax@=6!6=*6#p?p9wZC1aHzp z=O00i#N%pr*X`bVw5Rg>+2bdA7D5Mi9{G0#wAE7WK!C20o+v|;jgHO4uiB9eCDm?| zTX>rOt8se3Z)_8RvbEO~<9?YV`%Z_vtP%J}xRe4lRVN4h_&y`oi;{H9hm{vTYuLBI zro4nw@>Q5%dAUDdsrHxu`wiR>xT z&%3^3@V866wHi{_bV#1_U#Ub}zg>V(Fb4~fi7u+ThDc>eT|gA?(A8Ry##tJsfv_eN z9SLM|ErwI+qXjqx%{$v1zbtGPd3x+hKO8^xuuQp;UejZMVo12+V8X%;ahvsG^kXY$ zj7IZCfjd{JgagnV?ReJFZNES|A4>c8dpI5Zwqxrik!4OSU;t+C=Pqx6Q7-$%lF~>( zT}us{79J9bHT?2M@t8-TN6uOxHR^C9b03E{LeZ@?+e2QKOJiPhHU=*1EE8UMl|Kve z-%vd-jz_*557ZJvuF}&EQ#5`A)`A$VFRLdm8SM$36MM8F2j2r$;)+%su@@mJ)5b1c zKB_hAQ}cxw!Yz*QNW*Zg8R|9Rro&S}{2Yc!;~k0ABsl~L@E~{S!+O%c)KO~x7?y-% zMDh-bym=47o&qEHSD=sA@$IvPTG>yti^jjBAClwXOQB!A7IDpdJ-V-S$*<RUi0UA9R$T9`r3_!_-_|`Lez+%HJg72jwK$y)I+qS0u6!ou&#@nw z^3aZ#@r>9mMc#&&LH3V9{GEFIA04zrd`mTMLeCfo0f7*SN+8Jyqtn@MheeERs=il z)RQ5iB~>-pw?HF{f0Cz-Ko7^EPrbx65GO+e#J))?pD&?gA|{7d{FMg7k@W?2qg}K; zNrPd{oz|=$$jZ`4lXzeuUL8!b4Wi|Er#*`nP<>MBcryB-+vB5{|M-OvN^#>!rWMBl z{FaRN&``Md4&i2${CZu^noB2h`4{Y7BL(9N82tEfcGQC^*GE;KPRvC`)ET@VJN$Sk zJQ<1#D~>Iow>k66=sUPqrY^sB4GgXM+jGvcY(iH-M+$9x*Ti&Wb_&oK&5DQEGG(68 zV5qB$qF;Iox7QX)SIc}J6kkNO1V;uYaN=j4L6t+B9|vy1E~9JB=ma6Mp3IL+jc#;( zdnvU^VEO9fdE@SUn^^Rqo`3{{d(eq6UTmN4=m06_>P5FpmQE$y=0jDsm&!$rEf7i{mSVnngawsaCe*b4Dkv~8#dLET~8ai)2XuE)mSZA(= zwa1jc0J!;?@?G}`utaNK1G-z!V?+6v{6(g6A)^I3yhNkO{N>5O5Xk+eCc1+q!Ml-$ z)pw~mj*s4bM-*D2y933i;EE;lzgsMge3C}&kSh!tv>l@G)T=OaZE@YcX>g#ahA;X^ zCfow25MsVg6xAP#?yTKaFgDP}wR`eAQemc6PV{&X8Ma#50bs~wA z1+DSZaKU{^;HDO-Ul9*_p+5QeK6c^TL@Wwfn-=}aO%D-9DsRq3Pjo2 zJu{t#DEx>Qg(iZeb>vb{j@vL&-di|8M)c(1q)0jWsR5i0@wK~HKl(Ratl5kN$$A`g zEu)YawUWgL<=3-~aA{S=(@8fYLmSJXz!Of_26os>+Mo#PXE2Aa<2J*01!uv!ZFFLg z=U*a@X~gjm?JnzcX3?f-J-s{S4u-833uktadf5jpYK;U4oWwUa>#7K~8n5>@hU6PL zQGoY&JWR`U4sosWS+o|z&`d4Y1@_(Qz`Vo6WvjN&PJ^6|BK_KuvOZ= zSAG19#%6pemv^8rleb;wUPzp18 zK80G4)T1S0t&m45tm4@}4mvtgt0%R1-F2M1t-i^U%^E{$AfnR|x=+CMeD|E{`FC)e zx}h}0V^8dwu?7~3(F_@HvYMALuGNF0&iF+U@dh9WJhJO#E!|+`job8a{LKBxKQ35y z&HP;`u9!IQ6O_JZHW6{6?)PEdq=L+^?#qd(bbeDQ1x**9XqxpGE+2h`^0)ZNKw=u! zGGeOvFJtYv0B5O%U;BSCECZbw^FMAWP)IHO$Nz1XZQ%ukkMY+d-V`KlTtmN@HTPfL zAF%?Y!5H?LCQb{I;1Q>8*gcOE2Xmj+e<+zU2MVj_y}krHh8vv;`U>w6UuQrs#vs}g z$Vl7&0T-5%PsH*|TFP~;f>dwBS7W&+8Ja*0DQK6jOhG$9GrHD{5gU>ne*fTgwE6ce zmJx0RxqKW`F&-#d>g~vbZzP8ee5P#knRR|Z%8L^^9mHreoh-GTABG)fp(Z5de2FLcnTKoSqijje~HYoskO%REN?Omt?+$4@t zc;1t7E716C!0{PF9C3-?2hm^wU^=jtu>ojIF7gPHM$#O98xyk?(jPb4r~T$yCKdFB6~fUC zRyRo%7?(4A92{g}Qc5c_YU0gp3dpcOt(MfG5{;dS}IdPYz7?7YI8A zw-^n4exqA%le^pL!so}?Jt9s&C4XL<7+HqG*PI6>c65DyAg<7z~DV$gH`oj1$*iIHFD0eH4& z54>3pSi^33!(qRSDI|1W#06oQ{5Kb?Sy&jxy=o(@MZ_U!tyPyvhXIGX={ohDtHe6z z;1CKNq10Gp+MZ_=#Cb(Ahf1`TqPT1^x46*C$n89K@Slg?;MgHQcYAz_o7fke(-p{~EpapkWt;+kvzs_oSVfzmOD44)Nh3Y! zEqU1ENuflpa_fkKN>22z0sm+I32+htYaIi4JU4;Y)9XhSStGQ6kB^g;C) zUkQN&(MQYm8TF}Qlj*xfJ;)qfiQ&)gb4}Nr zQ)gekIv_Jd)G_?ts+@F_>?s7B4`@TCQUI8_0BlzQ7RWj{L%oQ2T4dq!`w`DqzDo=- zfXqD!jAaULv)#gVQmeM8$?(A`|04Xki44fuY4z>hPV?pr%THo;hA&zh0hc^@L*~b~ z!x?-zbP%>4x#uWR_wG+8VDI;wf``}@}`@B5L zoI>3BtPf_iG9mOb*40ban=%7@4#u~VzR4-+Xm~5^LJu(G(_>@~%%9PC*Nx~#J2$rk zL$-cSsetByEdQ;k65j$;TqnLfDHiZdd%FVmw1iBlQ`p0OvSCD;QPC>A$hmb;UP#piI2q4wv0gZH zm6$TUUww85^936|`aOhW($`A@9KWq@1tvCqG|YXzc0Xls7Pq|_9mDJ#m@6SsB<-os ztjuiA_VVj!MmeS!7a~P{qdH0OU9krA>paw$L9OHebDzQCsTj^1e%RFU)qCvWrTozv zD%(PMGiiKFvkLNFlsHy|?ZiTWHR~K&iaZmIycqLO`W8H%fi> z&u;TT|Nc(X#P@4VQlH_|{8TYN?$AZ}bBNqBz!~>9@5FX6!Y7`>81iNUyzFD`^Y-01 zd~j%lh>D|Ev%X)y;RI{??eLRiEM~{nVju&;BsjNYDKQdP+b@%}ZlbJiDyz^s$w2o1 zS`kBe<*yQJ)F#0~6++_cFMXNpe3WEs?9o*JaVHVIJlCIUXL6O7vS7c{%~ygu{ZykB zFxrEkr}i}PIK(J0?q)dclUpgbKGuE`P#DkNlbpC6B{3{RTZ{40645Li%Ol!whc({p zrpWKWUHF0lO9?n(wvj?huMgO&waCV^$J%9uTsSRsg)Grxrg~iZo2{l*1W}h4B6{7w zwtdMaboU=~r;ho3$nYABP@wfSN|f;BvvyRn@YRjAmW1#itL$|S!1Q5DaztX50JnxT zI#LP|4A1aSdnX@;%3^;rLhg~vx0(Sohl1!W877^25TdK`7cF(`E}-`dAZ?LX5sV5k zBII?JATCwW41vMEWJDQF)SC5ReF}FKZFs;YVFB1iM4$sgi4~-YcI#9IqVWkc8=Q(I z)wPxQwzsZHD%fp&A0797M1#bf0<_R<1gFOO8FFDB?T6UgN$2uMG!cic5iGF)2T1`W zjt@*09o=ZBQq=~7qDX(-Kfmn7-28i;#(F!L<_>cr**G@syY|o>E@>%#^%)ZVaDHx0 z6*r(~F|mH<{uAtF`KwQ8>)y8Ptm@cL=~?mQ4TX2=?PF*;jy_}a_W1nWF~3Kty>9u$ z1}r#>`@J<~{N~P9?`Y{JsOOh)E4;(j6G_b+8SNXGElV-y;B8PI=byw%sVerpNz4cain|vp!rIySo1BRG@*HQa7&wcWf zwKV>PBzRf-e$BvEDL|1f5OaF&A2%W<)Q2cIi(&ffAXsxxkPsp*7JL+5b(>SsjklUx zD91wgHK%|-EGhOCjLX=(ZpARDua~c~=de7JyA+6iPyEwT6>d7!PzNCph0juXH>E)){5ht$?`;!Pphz6b@L-tpq?=AR_*M z-JQ3{hE6bw1B&4QCYK~;exP>tB2F{nPg+`8=->1PqR9fZqO8TCLA62wr`gINJ;wk9 z!2Z)(mT{G;OTfrzl7#-Q5hR~N$R^_tO-|yp^TO{QnEG4ps!-sY3M~6lJ7j7r!4f(X zg!Ce0iXx?v}~+L6Cn?xk?ar6IU;yb)gG@X06$KN3LDH>KMS~ zZ}}D38agcIz=b+Z zu5T*mfmxjlx`MHE=tgI{`Dss+kcRK|lZKO?n}5@Id^HOP!%UZol;> zuOF#hDc<47Gezi_wc^%>tfEMaFUD?^=KKxS$f=+Mwq4A`v+?8~7BNu7*eRkL96Ihl zY!pDc-LKg*hv8%%ruY|rEA|Cy>7|_rDc4|A+svMamtP@}nvB*%RKWy`=i+L)OmvwZ zLfLeyg)~^6e(~WJI$c;HnoUxjr0R`iYLF)#Y7K5@p=SX1`{uvG5sJSxOz{+MMXOx^X|#;w<;GyU`Xl?#Tzv!- z-xG$ZYMQguhg2-Nh}l~A@x*1cMm!uvdKjE_8p^p_F?q<^1FQB@f64mG>}$4Dp9x-B zfNYHg&pmc0!fS$FzWd38gEbGuOjM)#N!Qor9wKP8dj?B--Xm465Uc6W=@GbFu(63z z)=Cry*2;@>65I#3?AT!u=&ZU@?pD)_{SN`B-ybEO0;%Vk%&iVX-8JOX?>iW+`z;=IZLe+0Zqxa$^Hujt--L(qEUNqSB(fbk(bsyu{87 zW`Dmz?^gO%f|e3-Fl099*l(wBM`k=00F)BM>R$sM#!skS)j0N&Q5D{>#NY7RZHEBn zjNvm#!Mt$TND!8?qz9IxL|?eTHfAx~f-}I_IJo7Wr*heRMP9IuV|A?}yU!rni%^c1LtA zP!tX~F(^ejayMEB-T4jE!8Il zD(_TrEvcmj@qc?Cqe6RlpQomBe^1OA!)fGcC6{I8Hmo?Ky#Bq#$0noRnG7 z`F$KWwgWd-<-(GB=E*V+YH|5fbRLXbv>>}v4;tT*n~ z@N1VFbnc-pEBU=N^7le;st@P9U!%n~=@_dyJ8(1|+(1p*mqdf22xi9(O3FI1I8xrb zSRDV2I}ubr$Nl{x-+^va5F1LRw+BeE!qW?%LITgy4}zog>c0FsHccQ&3I~(&xKHTm z_JL2Ob6P4tbKL(L7#4SIo&i;(+1rCQDBWQRg#4N`Vg|A8%%#=B^tvtBP?5!CN`XeV z6kILnkyMt&z!g7Jd4hTcGUlvq9e<9@0&I;e_Ly3Pr*mqNzj~L%niD$|7>|-T{!vA} z<}9Mo?K@w|s*Rq^fsw$GTBp(Ov;5A~q9Qn=n}<51aO)#ef9*lvZ#MbiZ%OH3 z8hjh1+T)vhtWu7HzAc33sN%P@A?K%$_CfFqeU$U!1z2 zG8OPK`g{rK2}(fh?ifN^u-YLPnuZdU)6iL5P23_+498}EKbyS?8Kj4{C|nv6&=&S_ zg4DJ(=B^+pd9N$jLd@hrUJz)8i+(`Hxf*8Tqt8PHRFg%KpnjUkhrssBaWJQ7hX#WY z2FeNyuCqXLLZRa!pvJ4guUj~`cnWwpgF$%M2~ndL7H3{@&n~x%;93&ooU>gJ`6=f}uD3EQ!H-m^jsDea}v0X{vV~U1bjd~AeuBsSta2ZP$I6~ zClA<>OaN8bS-W8Bvv}TUc~=7=HdV7%+J;K|4Pc1=yx<*kGSDrx5&eUYTN?a|#R4vd&q9-exAZdw3} zip{+c5{80FKX!gZohKLhOk~;+_~|$gYRyiOF=xRsg=e%Eg@94*_by*>cOIC2EY`E< zo0P}gnd%mTZT0=uqv2A~IoWA~BOYBH$`OL&#SP6cNes%s^HbAfY>qXtenN@hSj>%E zs{MT$x$h~O`RAe#`>LN3SY}HqSqi0ERt_JIBjkWBAQ5dd2m`punA+8~+o=Bz^9lf* zn1jW4Wj!vxlgflk=>33Z-^8K93lAUD?PGIG^;Hn|4Fb(}vVqS-AsQTSrU(#=%b1h00mFv0GyW!SM zPEiD|l@1OLCZfaPM2E&(PIe{TC?oZ{t)JnOiG#ia$v-k15nB|7H<(_OQg@ltg>Vn*qUzW^xTDL zlM8udnskgr6MPk}0>lvx;9hDkrnf0PUONT|j)ZBX_o!VWN9W>%^{FRVw!DZqJp~A$ z>tH|Flkk7E_vP_a?rpzIWhWZV+7%TtMNyigY>@^sD-o$SO4JrAi`pn;$W#g`L#8A} zL@7hY(trvLrWBDOWwG#nuce;nIq&m+-p@IIoPW-F`)lvwX5IJizJ~8`U0_lhoIIG&&_yX+!W!N1=zUg__t59 zz?W1wVrPVu^IF25TX&eI3A{kkGx{E2sl>B`Uiz%M1Dp?A#*J3j2^X}Y1(q9u$EaN0 zbW+hY@R9bT<4vz1zZJzEpgKqJ!iKhEoNyK^$D^Kd>h{ys@{#o^82&`{TI5ZU>Faoj zF!%Y?lZzg%v`k?)alo@7;PTd?U^7!t{|HrkLDrds%vR&B?ie$u!a(XOgg)Sld#rTmfGs5$&L-t+b+Vf#~{R&|%8ZM0mUw;yg zA(wxc^W%dn(NX_}C*h5%R#j}u3m^zRkb3%nL@{!{%lVo;H)eUp>seS>P+C;(Yk^!$ zd(+=+fH2Ab+Rx`gYqi!b1}E^>Z(BE;OS^*P>|(QC01Ha#9(Q`AEqAVacU7hwk19mV z2|XFVVgb8PIqNRat|cC$>>e~wQe4_VtgiEZCdi7xFr17JvjmbnevLr;_G<2QfEHr;ZQ=50j6)$xo;z8o`3vYYnv3CafU{zBP~n zH4yGH9pvkV&svqDbrQ)2a^d)pYqA$!c0&K^_{bv8=C8$m0 zK8uY1ekNHd_r}u+L+A;JEnzKYv1|tC-#jxZS2TB==KLQcL4|w#$p^0nx|EIgfiTmI z7xe(q`it-)?Y12^{HZG{a~lhh$dCB2gsd^li;yfGzvQK2p7GC1p}d?6qIj#7(BH}3 z7%!ufY>@%>AYW2J6#n8ElgTWrtV|8d9{(sN_$Yr&m6N;lVf^(bB4(oXtZz841^P60 z^Oz}?J}2B6S0&`qzta1LF3C)mwsx$+X6zLGdQPG3X>OF7GJba%de3o~OvJJ@IlrXD ztH=qde(vyET}3AxUN=hfpQs`D$LB+qxCP0rvE6XEk@;NCHyw%mb=PWUd2;@Sk-YGy za7?+ z0XpwQ4Lu_w5)g$WR&@CI3M?|NSPGm$tgo-H;@c-kFB86T;uK}Bt6SmF5~k^h&p_@u zzTCFL8}o%Zh|xg|2DQSQYlZUnDg^3H@BmE1e2N<#3a9cYtV$uu{i>j z(nCi=QXy=V?xd86?(n$iQtLl*D7%n94wG9?@qR5zyzFpkARDb(a!(8AbO#SllCVLL zbg1+a2a1eoH3No8kkow~KrTB&kR6G4nm~0Pc10r)+SP1mCL|lUn)9l7)-L51He;cX zD8jt<5Hs<$u8!w>;?xlci}1EwF)R3zSy_m3`_)fCNP^!p7=w=RLe4h}TuwuSg81)( zh1jhD@G+!~szeX8H}*HKa1Q6pDe7ev7*Tl1Y&{k;>%1Ms{EnvzrM9;?|GMW8zo%Xb z0cuc_+SKjc9fa0#KL~Ow)Vtwz<8j_JlHsLK3)VI3u8=bhGb=TVob1y-w4xwHncs?x52h5=6q7Bw{t zLxFGx>V`8MIvwC({{Sj;?=6bClEis6q1xj?oy&`vI+g?gW!kyGg;l<{?ZFu&+06=&+S>! z%)wr0;h=36Dx;L{3Cp7xUdmEgPP}Oc2`1Y3@*O5|ONqJbr`C0Bc|{w2sZrgUSmd`* zj~cojqVDTQ{~b%(WfMOo`2Jv?_&AWX`TjgclJ-*-hWhL|$XZvWs}v?0(TA(U;Qy*V zRCFN&OzxMKHyR&bs!%=YE6!{w3ljbF4=RWUfgBW}ZM;)I()=l+c~egD_miTb+yv1fQsxRPXaOfDXn(=HP*|TBoUT^AfZ{ecU{1XwES(*U>DUJx(?k)LVB?u z801{ha4{E80(FglfJzvjO+}zz=XCrwgoPAxy#kZ(a5gZ=UlLoyqLbQjOESwkXDR!RS09O2ET6wV{lyFEqSbT1S@(@9X0TmK2gai5GoI2LIG z%M#AXV+fYA)j41Mf3@^6fRF}1B_k7ph?U6O>zZhIG(oV$WgzcPK{VNcw6|^}b?P{q zdbMwHn~q$hnypNX#bQ}uMTr$k+N-#WO)Cmm+g^jeCg#2nd|VMODMHi3Mj;dN#^Z`h z$=kK?+m|;g7#5RW(?vn8wH=qr-AL77>p7x=XiosK=5%z47%R)?P{b_IRMEx>?)Yjt z43HxTG$#d$GASnkFLmxn8**yHqju*sg&3WX8XvU07UK1Ya}{RMJF~(rPGU&hr8?3x z>f(@GV9!~K%hp9fGD=EHaE!^CSwM%kvup1*1e@E>5Y$t;Q51*y?)nSgA}3zmx&Mx>n!I_>Qj2D(XpXhWcfgpd->eAFt5-)odRu?NcOsqf;8oe!<-R8R}Mmq+E zWKoa@msmq^5f&nc=GVKoR4WEfHDfu!IwO741AtA*113U|abF&8NlFOrwCC&ts1Z2D z6=ZkQLFqB|O&0jfqw_t83G#*XGG-7~f(U5hY+`>7u;n5*(nJ)2H4U|H?>#WHC_waV z%}q-+=z-=@C$U*ZpUiZHK-^*u3Y2v1&2h%CO-T+$pM$qd1NK#AgM%dlujlRi0S?ED zP^HN1gU)_@(US}qjI?cb^zq}^n2u@%k--#8_-Z}@%n?;%o-~CkOywp8swpUD4i6&m z2tWWdmqu2$E?67hi84tiW^c|ckM)GEtb`XLBxAvL{{gt!CFg6ghp3D?Cz_0UGP4?k zcrPeaUr|$w0&rX2Pf-##^J4x3V69$|Imst4<=0||8CP94!Xz6dbF|Z&SI(MPEXu4w zVM8gtiIzTcsMXUc8KTOIv0sb*S=WOXr+oA2WYgC5K0)4pCu0Y!EP-O>)%Oq>tib=> zxaKkiqn}#aE*xu^UZdCh_fduKqF){4*G`+Lm;HGkdZpz?RL3_lb8l!1!hDiV)Bf6a}d!B>W0lii~U=d#>*T87u$q_Y;j)}2Ns#ZuU1HS zT@!#9xg#;YP35SQBfWMyNp8r`m{$~*<9;#*2(Z5Tec(U#thvUAdtD^!{`gbUe z#)MvedLlXHgu2u02kSFZ^3}U#k;uLK4XR5+8A5-3t*0 zf)!}Pb;9-m4OW_Y%R-+y@@K?%yhG%;GM%&Xxm+VpR%R(}gTKLCwOvodY?MCL|sWbT! zfA_a4L!1q5JAGwGRG)$m&{OJA0icTZZXZAwBVFl~5?$--lAwk7rJeZbT3?#E<0ybk zIj4sABi$~MZIIyEgug-*YN44Wiz>k=>m(ky2gOAZ47Fl}5At@oY@1b3eam<|`&~Nu z7d9t}cHS(uO(%JlQTD8@xFbg6KNWmexxxk2Nd=HZM7EvIs~2)kpW|egP-uN9BIK{G z`sKH2u7F71wx~qezy-de(AY@7RDmSpkWk(GOJK!RQ%BrwkpW+0EU}7OsAxpm)+O8w zbt6tx*^2^H=5{{F75z?8A==~|+HRV$O1L@DB~PpOS`So{0$BTcc7bLz)_sMhm0o-5 zbbrr~xSxpDZ-R=;#3%ZPzzVH=n+<(nc+T!9{bUJK-$SXIM!EM~zQ&l1;)OK$qIw$4 zMx81J)8&sr4e^ttQb8ADtgME|&*yLx(Am1`x8JEwLTe$ftT}g)Zp@r)dLFefWDLUo&nt0^+>@BzjeM+GFUbO8tTTg_5B|3DV{eI#sedn3yz7* zt#0*GD>aRrs5V-H(f<5OpX6{+>&0gaMas7*L;YFR2X?~vi>k+9k$54dNrW~xlF%o|jNuK+m))M$xg$1y><2eR!$X8w*o9F+q~cu*_B6)a6`%1{ zsO^d7k^w>#Wo4M7&|QSNK3Z32=xaSU0)sQs`;IsMpj5Gu zR%7n(xKnKX&B>-*d~C2(hq>r;AL74H!ARHP)VbjT1-V!@f=qj*6;`g0;sNf1v*s-O9H>yu#iNy%nN%0uuHGm-f49%$lFzB zG{Rg1mAuKDP5(T}3rjvULr}^KmT&iwA451Z!X$F)fr?RDq#9p+`OUG*hq7gF&DD&S ze~RjU-aPCnYaM32Di~e<4fqt%_oN_Uf4^I>jkluob15N=L=#pqdz(+>Kb+^`aBGSC z!RxV#TfV`w>O~+bi}gpZ^@*vu&FgL2(Rxj#(e84VW9XYX3b~s5wqX7G`gca|;YFLH zf4w3Q82l=Uz-}1_FRi8tpPg*9&HPrE$}&w=s7TYFxo-P;L9Mw_7>c*~RM#}s*AmI% zvy|dRwuT!?1PaTV-&sF8)jHaDNMHYXK;7zRdyUHuofVwzTPbu&P)(_ba zzWvI)a4yQv>IKqRsDx}?fi0enE8=G8A>QJda@{V7o+48cd6|mHNVL;nN+O9|z08^L z;2C=5vtOjV*)UB=Egr0)0%_Sxo(@k7a{x7M1$z@oza6!f>2($R-*Q@=8lYM)1KF(M zx)LGhz&MR;0NL@<+6qt`LZjCdtT8?Lw8blhrz-YZX-)#y{rsCsz{6gwsq}q6Ol3T4 zOs%eR6*yT9+7|fb6`&dtM=!ASe^i6&F0{>*=YD*~vr@y%9gPK*tv>x0DIN3lqI_=7 zNb6A4J8kNrc*EQ>E=7!M0z*BZkAMW~6LS04`Usc!zGTtP*YZ9bcGG6f>_W}xmXZ&z zyDH&Wdz>Y%KHT-4)bi7SqCARXeMwTfm@2?Pa zMy*%oo(OYe>?S`0lM?F&;t=%>PoVGJeE&3x2UV#1*UNU#xqeG->kcpxo*f#T>t+H@ z^QMfO{Hm9%38eJKe0`jJprxd@bXr_g@fc{l0 zQPWzEp+zluBj-iZM54HsA%6YU-MET|UswU!31_mA^1X@eJ&5;$gdJDm zw|Ad^w%F%lYr{C*H$$(&r zIzZ{GrPu^3K;pa}kjp-__O-RW{R}`K#VFTiUm+%&bZ=2eJ66Cisr~20GFMLT=86zi zghk3BqnaGg3s6+~#a`oV4v48TGJ6PPPeP1ME|f)0z5|IHCC zVdhSk6#Xt-xX^w+jzR?^_9USBbwe%4AleZvN&cv&fr_9Hd=)R@h6kSCL+DdusRtUP z(9h}Du=m6ZUEaOoLztyflmW8+fL%nfY#v>Ukz|I8jcYGy2XBIdIDp5qFRhJQd-vu% zCr^`u2d!w1&^jl2RhUDGs83r6uoAQdv9x7rr9jMjCE9Y@( zkbz?W_B{mzaei&}0s1z8H4f+hvm(>}BzxsRE$YD3FF)}}wf_oxH~ilF`*J(OcA@{H!UlNZBub(VFp{>~ zuCEp}53Q`OzS)tkrvXUIsHrCwLsiVanz&BVcAF3!{^LpZjD71y|0$=W#waatve4)H zk(7Ir1rOJHEG(N=GJ@)&%1zpYbyvY;Rkiw?mDR0sV-amBmmFh!Uw!1&HJM@W58PL1 z-pI`B4=aqLSC9QDFmcV#EZzebsj**RCC!70lB$&wG-4W9w@@@WQ z+l-3i?3I}~CsMZ}rsy$yi%!p&$}klEb(x^^;)YV1&t#3LdYLAqV~`WGsY$isC_D4p z)ocIysKya)vI-`2--hUDf%6Jp`XO3?0iM_utaLZh{PmEu0L7F4aX}}#X$9viM6^e? z+>BS18y@U^jgB%UxMiZf;&%BJjvpQxC0>~5=?6tD2Y*i*{Rilc`n6L3hnd@DXn=3l zwp3qSx=_qpe^%+_I&*d`z05-o8R`j+F9yoTj5EU;d@sr*jW!kZxb6h91 zLgrpMTf6|lL35L1*udZpP~UZf|8iE15C9#i>8P`WwNU?uZ2mUjcp4}zDr(Ox3|wg) z5F|b^3eO>|^XGRdO_-T^1W4Py2*5cdMBww^-JP*#1F6q$4X1yBO9WmjM-5T9_U(c^ zFep-L!`5%WgBK0oNFObwzNfaA6Uwv~0hoRRnmF|Qoaf}s`#01(eC0^Ze+7B-H#EYO zR|xwHI65?zz73C$k=(?_&3|z=59A(|&2uT38J>g8Z_(`?1VE$GO)9JpFknJcU$;50 zm+N82uSi-4>!%Itl70FV;Mj|?j1ne|)n*RB<*+?xjn2WtfWb&>5jj9)Uh2A0Ba_`i zR4UuBz$Nlws7%jKI0w*p`H`q&_CU{QSd&?SR?>7rw0fvjnIcM)wb=z&ywZ{ZPQV9R z$|dHAe?*v7-@7lWF3j$Y>WYKej*1d-wFHMD`so0v;TxV=e!&;GcgU!q794;|ace}&Z~;=E@tVx3vC>X4vJGyD8Ln~9$ihP?29dhY zKtb3D9%bj{07EMeSOr$~1Q}7hA1w0hknC!Ybd7F71w0kqiY2)Ty|eOg@*M#`o&f{0 zD)*|Or$U>C7>v5Vv*4ifyTaQ~vw1v_!dpq)zycdqHCAak8uA|DxHXIV1rL}XQV-eF zZOY~jvK;9!NtibVX7Ovt#Sw%yQ|-_mR*Zw$XECktlr>u<%9Ji2UFP)?Vn1Vog+x7T&iWN}K0w++wLxu#a2by(dV44Yh(z;C(<2ZBV-A)nro_%Km%$YMBBO1G%^K z0h+l`fIQUx6;ohYFez?7|Fs<&-SY*oD(ko%X5?16C4Om-#E|<6c^0&~wu#a7&^#as zF8hk+jtMTq#4X5QL@7P&2T!oSU4`36=xD0mTl+6IUS$C@US(}JB>5rcDze@885ze% z6XL6A%q}}FaJ5_kj#O%t>s}crm?u9Hc94+Wk19ooHIcTHR(1^HPF1P_D)sxatLLn^ z4aoY)G3d99AFlCkbb<^|nB_5oU%m1Qs5UuqIk+o51am*U2mQ+i>v~||)O?FO<=9qX0nzg;Pz8%!7?~)g*s$rYIyw>uDzH2tX~Im;FRTk8%V~VFO;`!M zj-gulaSm;2dvsLX`dV$UC8{)nHMo?=4PJ--!*KVq0Vk-EmZs_oEDVtir#e1BDp|%p zwX0sd6K}5Ld9|}Gnvn@_t%O;eXG8s;^<4}G_Nd#S*~E~vsM#4U`cQZ@ zMr^}xHOro=>{F{2JllTm^qv;r(dJ#*5OE56GM4MtSF7~yPd$BJzIW)4sfT#SE*B(n zm7&a1iQCt#Fnei+I1HPNNwHJeTc+p*tY1^u!nq>5)#rhL`@C-#=Ccp z3aHOc=v^xS^4+PNm52u|RPUf5NYBiR<6UmxPTCBBu2!9jb<5Gd3(v3zI4v8B*(d&K z9uOSOH(7^e0g-J4>%g97mDM1Aq-=g<8=3=eB*OI9T_ntCUGNe#RfLfmR^(kV3tTA4 zWyjwhnR(>^tpFh7xI*c}`v-!+4!*cG*UA=rNNZYJ$) zi){0IaFkEt)hc>P1R5xNX6BPBV`o<5uHWPpa|J)Zk1gG;(zr6M{E7-3y#R6BTY>3Q~P%B*d5Vc+Kt>_9v&a$4>^j zJSE-oW~+k1ruk3LSnOyn^Do3F4y|Q+-<3ak;IOK-dyweyVm=e_uLb)J`!60aN}wQ~<1giT!w`wI|LwUDC~sBc5oTxiMf>Ya6=<B3+)K6+zTB4LIOhgWT2to&jp#%n9o%~pE&NAJc_($9ln+}XkGeOgN{2$D0r&fD`9 zwazmXeo&;BwnJHiN;g5Z+>ztQ_I$RuooFc`s=E9(`bgYw2QB%lj%u{AcV0c{$S{Z zuB;Xm_xz)`yE(ilsE`ckRb?ThRyx@MjRG{+lO&{?=jtNFZ;ilS@Ql_X z4b|q7*p<@^ZJYD+dbcM}SgK9Rn#$)bGl{QW(Ak^*`keDeKtT}AU-8_Y7+ z{m2!#T)mk)odAqTem>7@Q3w3$LU+FT3MYMSw%FIT7$)bk-yOv!{q)47z+XR|yLq=h zVa!LkJV2Ul|9I!gM-6qel8vXmOWd3*<48M<563?1kbpC1+GA14q0Uv zH%&QFWs5x+``TuAxGk51#n&>OI#NRFO%1R|UT6QRr+jnTr~LNNEitFRs)9>fff=*y z)BvVkEHj+ zIaS#e6gH5C$qwlEYgC8Ioh_w7H-nFHrKq$=Hdm`)W{z&6zHIQe!5w<>F54!EdEQCs z{&~k@FVRM}kQsbLDfF>H$n10H+P_UGhF{XpS*2nx2~S>ZQhm9-I2@;B>Rzf=HS0k3x9eGO3~4Nsn_Ii{cSj>w-YAT&o+lq(_l=YciA-nZc? zdRZDjUH;ZQ^(fK7hlCo%hEYbP9UI0eUv`^Kd|hsyi!*-f=->c|Atj5x@`Zhy&qsMJ zQ?0uA{CxOYE7O9o`Z233ozt?)-zZa3PuFR2`;FA7TuIW+ySL%hG`+llX?j*$^}8=Gr7JytEM_kU|o^PDiG3#9qCK*AD2^3qgWk`PpF7 zeTiIiv}JMx6e}yjq?vz~KYn;>qJLEvvvA5Hn=EnlohZP%)(TV$5fHY21m-!jE2f7%||!T{MlZ{EW{8q?>-bAaOO*V%0b*ccAdvAAYELCX2+A zN1HxDFQB7n7cTwomXOj(p>OW{dOJzwaDGi*vu5(($iyVsr~`Fh*1kkV#hGqV*?R&b zBnmR1q%_K=Z7_*lpWuYqqL{{2BWSCJXpu*Vc^x88!?4HXo*2}Q1NSGu`t{+*4jw{EzuFh02EU8M6PZV}1;JEb$$ z0wi_l%cAV6(YDvAFK15s(C%mmjQ9IJp%F-%or+&u*=7{$jJpXcxksWqR{E$EEx`FRYvy$fvdr!YXbHz^tWd{55OHVT5|s}PW^GghHfO$K1g9ckN0eVtWyugA-=Fc4C{_UZL_88D;VnjU0HVF zZ+?)N()jC9%pzr>-nL+piM>k$d8)58Nc@V?E{v0`pqIEGKyNfP(zOrnBA>R2!!6IE z5ue6~CdLQ!Z+%m(B41EV;4+~-QV$A1!7m=8vLv$MH|@Ux#(f4M7y0fbE|EF(uH=VB z#IecK=Ug<(V=ULSVC!xWouK|i==zt7%WU=g&XzoE*_{ei21ntwrt6LYO_GYNzYKW5 z73eHSc3pLOmoR=io$hx-NPeMfXsOucbB!eMF%{@_xH|I}_4Mg1e#czLZ}I6|l7T3m1q`7I44h(*8lO^Zb7p0*Hdb t9|>pFvGnJ^C#?R5bo&4A9?d^As%C$qU+_n5H%AS>S#$dz=^OW){tuz_?uY;Y literal 0 HcmV?d00001 From 8fc358d45b5c911ccb70ab6c65b9c465153da628 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Wed, 15 Oct 2025 16:26:49 -0500 Subject: [PATCH 85/98] Update PR based on coderabbit review. --- .../external_aerodynamics/domino/README.md | 2 +- .../domino/src/benchmark_dataloader.py | 3 -- .../domino/src/deprecated/README.md | 2 +- .../domino/src/inference_on_stl.py | 1 - .../external_aerodynamics/domino/src/loss.py | 35 --------------- .../src/shuffle_volumetric_curator_output.py | 27 ++++++++--- .../external_aerodynamics/domino/src/train.py | 5 --- .../external_aerodynamics/domino/src/utils.py | 3 +- physicsnemo/datapipes/cae/cae_dataset.py | 39 +++++++++++----- physicsnemo/datapipes/cae/domino_datapipe.py | 45 +++++++++++++------ physicsnemo/models/domino/encodings.py | 2 +- physicsnemo/models/layers/ball_query.py | 3 ++ physicsnemo/models/layers/mlp_layers.py | 27 +---------- physicsnemo/utils/domino/utils.py | 2 +- test/datapipes/test_domino_datapipe.py | 5 --- test/models/domino/test_domino_mlps.py | 23 ---------- test/models/test_mlp_layers.py | 6 +-- 17 files changed, 92 insertions(+), 138 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md index a6cafd803d..21dc5adf1c 100644 --- a/examples/cfd/external_aerodynamics/domino/README.md +++ b/examples/cfd/external_aerodynamics/domino/README.md @@ -113,7 +113,7 @@ knowledge of the dataset: - The output fields (the labels) are normalized during training to a mean of zero and a standard deviation of one, averaged over the dataset. - The scaling is controlled by passing the `volume_factors` andg + The scaling is controlled by passing the `volume_factors` and `surface_factors` values to the datapipe. - The input locations are scaled by, and optionally cropped to, used defined bounding boxes for both surface and volume. Whether cropping occurs, or not, diff --git a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py index 339363195a..04ca2340e9 100644 --- a/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py +++ b/examples/cfd/external_aerodynamics/domino/src/benchmark_dataloader.py @@ -181,7 +181,4 @@ def main(cfg: DictConfig) -> None: if __name__ == "__main__": - # Profiler().enable("torch") - # Profiler().initialize() main() - # Profiler().finalize() diff --git a/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md b/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md index 9124353b7e..fb7d062f56 100644 --- a/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md +++ b/examples/cfd/external_aerodynamics/domino/src/deprecated/README.md @@ -1,5 +1,5 @@ # DoMINO Deprecation -The files in this folder have been deprecated as of the physicsnemo 25.11 release - +The files in this folder have been deprecated as of the PhysicsNeMo 25.11 release - they are no longer officially supported. They are kept here only as a reference, and may be removed in a future release. diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index 9707a7e6d1..89d7a9ba24 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -614,7 +614,6 @@ def main(cfg: DictConfig) -> None: global_features=num_global_features, model_parameters=cfg.model, ).to(dist.device) - # model = torch.compile(model, fullgraph=True, dynamic=True) # TODO make this configurable # Print model summary (structure and parmeter count). logger.info(f"Model summary:\n{torchinfo.summary(model, verbose=0, depth=2)}\n") diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py index e8a143b9c4..60c52beb32 100644 --- a/examples/cfd/external_aerodynamics/domino/src/loss.py +++ b/examples/cfd/external_aerodynamics/domino/src/loss.py @@ -19,47 +19,12 @@ from physicsnemo.utils.domino.utils import unnormalize -import time -import os -import re -import torch -import torchinfo - from typing import Literal, Any -import apex -import numpy as np -import hydra -from hydra.utils import to_absolute_path -from omegaconf import DictConfig, OmegaConf -import torch.distributed as dist -from torch.amp import GradScaler, autocast -from torch.nn.parallel import DistributedDataParallel -from torch.utils.data import DataLoader -from torch.utils.data.distributed import DistributedSampler -from torch.utils.tensorboard import SummaryWriter -from nvtx import annotate as nvtx_annotate import torch.cuda.nvtx as nvtx - -from physicsnemo.distributed import DistributedManager -from physicsnemo.launch.utils import load_checkpoint, save_checkpoint -from physicsnemo.launch.logging import PythonLogger, RankZeroLoggingWrapper - - -from physicsnemo.models.domino.model import DoMINO from physicsnemo.utils.domino.utils import * -# This is included for GPU memory tracking: -from pynvml import nvmlInit, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo -import time - -# Initialize NVML -nvmlInit() - - -from physicsnemo.utils.profiling import profile, Profiler - def compute_physics_loss( output: torch.Tensor, diff --git a/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py b/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py index 02678d9e61..553d4e575a 100644 --- a/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py +++ b/examples/cfd/external_aerodynamics/domino/src/shuffle_volumetric_curator_output.py @@ -82,7 +82,9 @@ def store_array(store, name: str, data: np.ndarray): ) -def copy_file_with_shuffled_volume_data(input_file: str, output_file: str): +def copy_file_with_shuffled_volume_data( + input_file: str, output_file: str, random_seed: int | None = None +): """ Copy a file with shuffled volume data, using Zarr v3 sharding for efficient storage. Only processes if the output file doesn't exist or is incomplete. @@ -127,6 +129,9 @@ def copy_file_with_shuffled_volume_data(input_file: str, output_file: str): volume_fields = in_file["volume_fields"][:] volume_mesh_centers = in_file["volume_mesh_centers"][:] + if random_seed is not None: + np.random.seed(random_seed) + # Generate a permutation permutation = np.random.permutation(volume_fields.shape[0]) @@ -152,18 +157,26 @@ def process_file(file: str, top_dir: str, out_dir: str): def main(): - top_dir = "/lustre/fsw/coreai_modulus_cae/coreya/datasets/domino/val/" - out_dir = "/lustre/fsw/coreai_modulus_cae/coreya/datasets/domino/val_shuffled2/" + import argparse + + parser = argparse.ArgumentParser(description="Shuffle volumetric curator output") + parser.add_argument("--input-dir", required=True, help="Input directory path") + parser.add_argument("--output-dir", required=True, help="Output directory path") + parser.add_argument( + "--num-cores", type=int, default=64, help="Number of cores to use" + ) + args = parser.parse_args() # Get list of files to process - files = os.listdir(top_dir) - files = files[0:2] + files = os.listdir(args.input_dir) # Create a partial function with fixed directories - process_func = partial(process_file, top_dir=top_dir, out_dir=out_dir) + process_func = partial( + process_file, top_dir=args.input_dir, out_dir=args.output_dir + ) # Use multiprocessing to process files in parallel - num_cores = max(1, 64) # Leave one core free + num_cores = max(1, args.num_cores) # Leave one core free print(f"Processing {len(files)} files using {num_cores} cores") with mp.Pool(num_cores) as pool: diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 37634b8715..070bb71412 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -343,8 +343,6 @@ def main(cfg: DictConfig) -> None: # Get scaling factors - precompute them if this fails! ###################################################### vol_factors, surf_factors = load_scaling_factors(cfg) - - vol_factors_tensor = vol_factors ###################################################### # Configure the model @@ -661,7 +659,4 @@ def main(cfg: DictConfig) -> None: if __name__ == "__main__": - # Profiler().enable("torch") - # Profiler().initialize() main() - # Profiler().finalize() diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py index 9b742677b2..9970c186f4 100644 --- a/examples/cfd/external_aerodynamics/domino/src/utils.py +++ b/examples/cfd/external_aerodynamics/domino/src/utils.py @@ -113,7 +113,7 @@ def get_keys_to_read( cfg_params_vec = [] for key in cfg.variables.global_parameters: if cfg.variables.global_parameters[key].type == "vector": - cfg_params_vec.append(*cfg.variables.global_parameters[key].reference) + cfg_params_vec.extend(cfg.variables.global_parameters[key].reference) else: cfg_params_vec.append(cfg.variables.global_parameters[key].reference) keys_to_read_if_available = { @@ -204,7 +204,6 @@ def coordinate_distributed_environment(cfg: DictConfig): "surface_mesh_centers": point_like_placement, "surface_normals": point_like_placement, "surface_areas": point_like_placement, - "surface_fields": point_like_placement, } else: domain_mesh = None diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py index a78157936a..21fb8c4783 100644 --- a/physicsnemo/datapipes/cae/cae_dataset.py +++ b/physicsnemo/datapipes/cae/cae_dataset.py @@ -550,6 +550,9 @@ def __init__( self, keys_to_read: list[str] | None, keys_to_read_if_available: dict[str, torch.Tensor] | None, + cache_bytes_limit: int = 10_000_000, + data_copy_concurrency: int = 72, + file_io_concurrency: int = 72, ) -> None: super().__init__(keys_to_read, keys_to_read_if_available) @@ -563,9 +566,9 @@ def __init__( self.context = ts.Context( { - "cache_pool": {"total_bytes_limit": 10_000_000}, - "data_copy_concurrency": {"limit": 72}, - "file_io_concurrency": {"limit": 72}, + "cache_pool": {"total_bytes_limit": cache_bytes_limit}, + "data_copy_concurrency": {"limit": data_copy_concurrency}, + "file_io_concurrency": {"limit": file_io_concurrency}, } ) @@ -1116,6 +1119,20 @@ def set_volume_sampling_size(self, volume_sampling_size: int): """ self.file_reader.set_volume_sampling_size(volume_sampling_size) + def close(self): + """ + Explicitly close the dataset and cleanup resources, including the ThreadPoolExecutor. + """ + if hasattr(self, "preload_executor") and self.preload_executor is not None: + self.preload_executor.shutdown(wait=True) + self.preload_executor = None + + def __del__(self): + """ + Cleanup resources when the dataset is destroyed. + """ + self.close() + def compute_mean_std_min_max( dataset: CAEDataset, field_keys: list[str], max_samples: int = 20 @@ -1180,7 +1197,7 @@ def compute_mean_std_min_max( batch_mean = field_data.mean(axis=(0)) batch_M2 = ((field_data - batch_mean) ** 2).sum(axis=(0)) batch_n = field_data.shape[0] - + # Update running mean and M2 (Welford's algorithm) delta = batch_mean - mean[field_key] N[field_key] += batch_n # batch_n should also be torch.int64 @@ -1215,28 +1232,30 @@ def compute_mean_std_min_max( batch_n = field_data.shape[0] # # Update min/max - + mean_sample = mean[field_key] std_sample = std[field_key] # import pdb; pdb.set_trace() mask = torch.ones_like(field_data, dtype=torch.bool) for v in range(field_data.shape[-1]): - idx = (field_data[:, v] < mean_sample[v] - 12 * std_sample[v]) | (field_data[:, v] > mean_sample[v] + 12 * std_sample[v]) + idx = (field_data[:, v] < mean_sample[v] - 12 * std_sample[v]) | ( + field_data[:, v] > mean_sample[v] + 12 * std_sample[v] + ) idx = torch.where(idx) mask[idx] = False - + batch_min = [] batch_max = [] for v in range(field_data.shape[-1]): batch_min.append(field_data[mask[:, v], v].min()) batch_max.append(field_data[mask[:, v], v].max()) - + batch_min = torch.stack(batch_min) batch_max = torch.stack(batch_max) min_val[field_key] = torch.minimum(min_val[field_key], batch_min) max_val[field_key] = torch.maximum(max_val[field_key], batch_max) - + end = time.perf_counter() iteration_time = end - start print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds") @@ -1247,4 +1266,4 @@ def compute_mean_std_min_max( print(f"Total time: {global_time:.2f} seconds for {max_samples} samples") - return mean, std, min_val, max_val \ No newline at end of file + return mean, std, min_val, max_val diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 05243e99a5..9d01ad0978 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -304,7 +304,9 @@ def __init__( self.dataset = None - def compute_stl_scaling_and_surface_grids(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + def compute_stl_scaling_and_surface_grids( + self, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Compute the min and max for the defining mesh. @@ -325,7 +327,9 @@ def compute_stl_scaling_and_surface_grids(self) -> tuple[torch.Tensor, torch.Ten return s_min, s_max, surf_grid - def compute_volume_scaling_and_grids(self) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + def compute_volume_scaling_and_grids( + self, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Compute the min and max and grid for volume data. @@ -363,7 +367,9 @@ def downsample_geometry( stl_vertices, geometry_points ) if geometry_coordinates_sampled.shape[0] < geometry_points: - raise ValueError("Sampled points is more than points in the surface mesh") + raise ValueError( + "Surface mesh has fewer points than requested sample size" + ) geom_centers = geometry_coordinates_sampled else: geom_centers = stl_vertices @@ -444,14 +450,12 @@ def process_surface( if surface_coordinates_sampled.shape[0] < self.config.surface_points_sample: raise ValueError( - "Sampled points is more than points in the surface mesh" + "Surface mesh has fewer points than requested sample size" ) # Select out the sampled points for non-neighbor arrays: if surface_fields is not None: surface_fields = surface_fields[idx_surface] - else: - raise ValueError("Surface fields are not present") # Subsample the normals and sizes: surface_normals = surface_normals[idx_surface] @@ -548,8 +552,6 @@ def process_volume( volume_coordinates = volume_coordinates[ids_in_bbox] if volume_fields is not None: volume_fields = volume_fields[ids_in_bbox] - else: - raise ValueError("Volume fields are not present") ######################################################################## # Apply sampling to the volume coordinates and fields @@ -568,14 +570,12 @@ def process_volume( # inputs were too few), pad the outputs: if volume_coordinates_sampled.shape[0] < self.config.volume_points_sample: raise ValueError( - "Sampled points is more than points in the volume mesh" + "Volume mesh has fewer points than requested sample size" ) # Apply the same sampling to the targets, too: if volume_fields is not None: volume_fields = volume_fields[idx_volume] - else: - raise ValueError("Volume fields are not present") volume_coordinates = volume_coordinates_sampled @@ -656,6 +656,22 @@ def calculate_volume_encoding( @torch.no_grad() def process_data(self, data_dict): + # Validate that all required keys are present in data_dict + required_keys = [ + "global_params_values", + "global_params_reference", + "stl_coordinates", + "stl_faces", + "stl_centers", + "stl_areas", + ] + missing_keys = [key for key in required_keys if key not in data_dict] + if missing_keys: + raise ValueError( + f"Missing required keys in data_dict: {missing_keys}. " + f"Required keys are: {required_keys}" + ) + # Start building the preprocessed return dict: return_dict = { "global_params_values": data_dict["global_params_values"], @@ -730,7 +746,7 @@ def process_data(self, data_dict): ) return_dict["sdf_surf_grid"] = sdf_surf_grid return_dict["surf_grid"] = surf_grid - + # Store this only if normalization is active: if self.config.normalize_coordinates: return_dict["surface_min_max"] = torch.stack([s_min, s_max]) @@ -860,7 +876,9 @@ def set_dataset(self, dataset: Iterable) -> None: if self.config.volume_sample_from_disk: # We deliberately double the data to read compared to the sampling size: - self.dataset.set_volume_sampling_size(100 * self.config.volume_points_sample) + self.dataset.set_volume_sampling_size( + 100 * self.config.volume_points_sample + ) def __len__(self): if self.dataset is not None: @@ -926,7 +944,6 @@ def compute_scaling_factors( cfg: DictConfig, input_path: str, target_keys: list[str], - use_cache=None, max_samples=20, ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """ diff --git a/physicsnemo/models/domino/encodings.py b/physicsnemo/models/domino/encodings.py index 55ce655090..7b27eeb134 100644 --- a/physicsnemo/models/domino/encodings.py +++ b/physicsnemo/models/domino/encodings.py @@ -147,7 +147,7 @@ def __init__( def calculate_total_neighbors_in_radius( self, geo_encoding_type: str, neighbors_in_radius: int, n_upstream_radii: int - ) -> list[int]: + ) -> int: if geo_encoding_type == "both": total_neighbors_in_radius = neighbors_in_radius * (n_upstream_radii + 1) elif geo_encoding_type == "stl": diff --git a/physicsnemo/models/layers/ball_query.py b/physicsnemo/models/layers/ball_query.py index 795958800a..77416bd57a 100644 --- a/physicsnemo/models/layers/ball_query.py +++ b/physicsnemo/models/layers/ball_query.py @@ -82,6 +82,9 @@ def forward( - outputs: Tensor containing coordinates of the neighboring points """ + if x.shape[0] != 1 or p_grid.shape[0] != 1: + raise ValueError("BQWarp only supports batch size 1") + if p_grid.shape[-1] != x.shape[-1] or x.shape[-1] != 3: raise ValueError("The last dimension of p_grid and x must be 3") diff --git a/physicsnemo/models/layers/mlp_layers.py b/physicsnemo/models/layers/mlp_layers.py index ec832ad6b1..5c8c3348a3 100644 --- a/physicsnemo/models/layers/mlp_layers.py +++ b/physicsnemo/models/layers/mlp_layers.py @@ -19,31 +19,6 @@ from .activations import get_activation -# class Mlp(nn.Module): -# def __init__( -# self, -# in_features, -# hidden_features=None, -# out_features=None, -# act_layer=nn.GELU, -# drop=0.0, -# ): -# super().__init__() -# out_features = out_features or in_features -# hidden_features = hidden_features or in_features -# self.fc1 = nn.Linear(in_features, hidden_features) -# self.act = act_layer() -# self.fc2 = nn.Linear(hidden_features, out_features) -# self.drop = nn.Dropout(drop) - -# def forward(self, x: torch.Tensor): -# x = self.fc1(x) -# x = self.act(x) -# x = self.drop(x) -# x = self.fc2(x) -# x = self.drop(x) -# return x - class Mlp(nn.Module): def __init__( @@ -66,7 +41,7 @@ def __init__( ] # If the activation is a string, get it. - # It's it's a type, instantiate it. + # If it's a type, instantiate it. # If it's a module, leave it be. if isinstance(act_layer, str): act_layer = get_activation(act_layer) diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index 8b7a982142..e3faae8123 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -469,7 +469,7 @@ def shuffle_array( for g in range(gap): points_per_chunk[g] += 1 elif gap < 0: - for g in range(gap): + for g in range(-gap): points_per_chunk[g] -= 1 # Create a list of indexes per chunk: diff --git a/test/datapipes/test_domino_datapipe.py b/test/datapipes/test_domino_datapipe.py index a4f7159c1f..9f665886bd 100644 --- a/test/datapipes/test_domino_datapipe.py +++ b/test/datapipes/test_domino_datapipe.py @@ -363,8 +363,6 @@ def validate_sample_structure(sample, model_type, gpu_output): expected_keys.extend(surface_keys) # Check that required keys are present and are torch tensors on correct device - for key in expected_keys: - print(f"Got key: {key} on device: {sample[key].device.type}") for key in expected_keys: if key in sample: # Some keys may be None if compute_scaling_factors=True @@ -748,9 +746,6 @@ def test_domino_datapipe_sampling(zarr_dataset, model_type, sampling, pytestconf sample = dataset[0] validate_sample_structure(sample, model_type, gpu_output=use_cuda) - for key in sample: - print(f"sample[{key}].shape: {sample[key].shape}") - if model_type in ["volume", "combined"]: for key in ["volume_mesh_centers", "volume_fields"]: if sampling: diff --git a/test/models/domino/test_domino_mlps.py b/test/models/domino/test_domino_mlps.py index d181f24401..8cf9546c2b 100644 --- a/test/models/domino/test_domino_mlps.py +++ b/test/models/domino/test_domino_mlps.py @@ -19,29 +19,6 @@ from .utils import validate_output_shape_and_values -# @pytest.mark.parametrize("device", ["cuda:0"]) -# @pytest.mark.parametrize("activation", ["relu", "gelu"]) -# @pytest.mark.parametrize("n_layers", [1, 2, 3, 5]) -# def test_mlp(device, activation, n_layers): -# """Test basic MLP functionality""" -# from physicsnemo.models.domino.mlps import MLP -# from physicsnemo.models.domino.model import get_activation - -# torch.manual_seed(0) - -# mlp = MLP( -# input_features=10, -# output_features=5, -# base_layer=32, -# activation=get_activation(activation), -# n_layers=n_layers, -# ).to(device) - -# x = torch.randn(4, 50, 10).to(device) -# output = mlp(x) - -# validate_output_shape_and_values(output, (4, 50, 5)) - @pytest.mark.parametrize("device", ["cuda:0"]) @pytest.mark.parametrize("activation", ["relu", "gelu"]) diff --git a/test/models/test_mlp_layers.py b/test/models/test_mlp_layers.py index 19db339ba8..7a943cc51b 100644 --- a/test/models/test_mlp_layers.py +++ b/test/models/test_mlp_layers.py @@ -50,7 +50,7 @@ def test_mlp_forward_accuracy(device): def test_mlp_activation_and_dropout(): model = Mlp(in_features=10, hidden_features=20, out_features=5, drop=0.5) - input_tensor = torch.randn(2, 10) # Assuming a batch size of 1 for simplicity + input_tensor = torch.randn(2, 10) # Batch size of 2 output_tensor = model(input_tensor) @@ -61,7 +61,7 @@ def test_mlp_different_activation(): model = Mlp( in_features=10, hidden_features=20, out_features=7, act_layer=torch.nn.ReLU ) - input_tensor = torch.randn(3, 10) # Assuming a batch size of 1 for simplicity + input_tensor = torch.randn(3, 10) # Batch size of 3 output_tensor = model(input_tensor) assert output_tensor.shape == torch.Size([3, 7]) @@ -69,7 +69,7 @@ def test_mlp_different_activation(): def test_multiple_hidden_layers(): model = Mlp(in_features=10, hidden_features=[20, 30], out_features=5) - input_tensor = torch.randn(4, 10) # Assuming a batch size of 1 for simplicity + input_tensor = torch.randn(4, 10) # Batch size of 4 output_tensor = model(input_tensor) assert output_tensor.shape == torch.Size([4, 5]) From 763d97855a71800f946148346d6fccfc92fa0e01 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Thu, 16 Oct 2025 08:53:27 -0500 Subject: [PATCH 86/98] Remove error that breaks validation / inference. --- physicsnemo/datapipes/cae/domino_datapipe.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index 9d01ad0978..fb9a920708 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -422,8 +422,6 @@ def process_surface( surface_sizes = surface_sizes[ids_in_bbox] if surface_fields is not None: surface_fields = surface_fields[ids_in_bbox] - else: - raise ValueError("Surface fields are not present") ######################################################################## # Perform Down sampling of the surface fields. From 0ea5f99316e0fabec0a7e53336ad60788e3726b1 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Thu, 16 Oct 2025 16:21:12 +0000 Subject: [PATCH 87/98] Update Domino model and tests: make sure pre-commit passes, remove unneeded layers, and update tests for latest model. --- physicsnemo/models/domino/geometry_rep.py | 23 ++- physicsnemo/models/domino/model.py | 50 +++-- test/models/data/domino_output-conv.pth | Bin 0 -> 5501 bytes test/models/data/domino_output-unet.pth | Bin 0 -> 5501 bytes test/models/data/domino_output.pth | Bin 5255 -> 0 bytes test/models/domino/test_domino.py | 187 +++++++++--------- .../models/domino/test_domino_geometry_rep.py | 13 +- 7 files changed, 144 insertions(+), 129 deletions(-) create mode 100644 test/models/data/domino_output-conv.pth create mode 100644 test/models/data/domino_output-unet.pth delete mode 100644 test/models/data/domino_output.pth diff --git a/physicsnemo/models/domino/geometry_rep.py b/physicsnemo/models/domino/geometry_rep.py index c2fbe9f606..eee192e600 100644 --- a/physicsnemo/models/domino/geometry_rep.py +++ b/physicsnemo/models/domino/geometry_rep.py @@ -74,7 +74,9 @@ def __init__( self.num_modes = model_parameters.num_modes if self.fourier_features: - input_features_calculated = input_features * (1 + 2 * self.num_modes) * neighbors_in_radius + input_features_calculated = ( + input_features * (1 + 2 * self.num_modes) * neighbors_in_radius + ) else: input_features_calculated = input_features * neighbors_in_radius @@ -86,10 +88,6 @@ def __init__( drop=0.0, ) - # self.fc1 = nn.Linear(input_features_calculated, base_neurons) - # self.fc2 = nn.Linear(base_neurons, base_neurons // 2) - # self.fc3 = nn.Linear(base_neurons // 2, model_parameters.base_neurons_in) - self.grid_resolution = grid_resolution self.activation = get_activation(model_parameters.activation) @@ -128,14 +126,14 @@ def forward( ) grid = grid.reshape(1, nx * ny * nz, 3, 1) - x = rearrange(x, "b x y z -> b x (y z)", x=nx*ny*nz, y=self.neighbors_in_radius, z=3) + x = rearrange( + x, "b x y z -> b x (y z)", x=nx * ny * nz, y=self.neighbors_in_radius, z=3 + ) if self.fourier_features: facets = torch.cat((x, fourier_encode(x, self.freqs)), axis=-1) else: facets = x - # x = self.activation(self.fc1(facets)) - # x = self.activation(self.fc2(x)) - # x = F.tanh(self.fc3(x)) + x = F.tanh(self.mlp(facets)) x = rearrange(x, "b (x y z) c -> b c x y z", x=nx, y=ny, z=nz) @@ -364,8 +362,9 @@ def __init__( normalization_in_unet = "layernorm" else: normalization_in_unet = None + self.geo_processor_sdf = UNet( - in_channels=6, + in_channels=5 + len(self.sdf_scaling_factor), out_channels=geometry_rep.geo_conv.base_neurons_out, model_depth=3, feature_map_channels=[ @@ -391,7 +390,7 @@ def __init__( elif geometry_rep.geo_processor.processor_type == "conv": self.geo_processor_sdf = nn.Sequential( GeoProcessor( - input_filters=5+len(self.sdf_scaling_factor), + input_filters=5 + len(self.sdf_scaling_factor), output_filters=geometry_rep.geo_conv.base_neurons_out, model_parameters=geometry_rep.geo_processor, ), @@ -477,7 +476,7 @@ def forward( for s in range(len(self.sdf_scaling_factor)): s_sdf = scale_sdf(sdf, self.sdf_scaling_factor[s]) scaled_sdf.append(s_sdf) - + scaled_sdf = torch.cat(scaled_sdf, dim=1) # Process SDF and its computed features diff --git a/physicsnemo/models/domino/model.py b/physicsnemo/models/domino/model.py index 20be346dcb..9f46947f2b 100644 --- a/physicsnemo/models/domino/model.py +++ b/physicsnemo/models/domino/model.py @@ -171,7 +171,7 @@ def __init__( ValueError: If both output_features_vol and output_features_surf are None """ super().__init__() - self.input_features = input_features + self.output_features_vol = output_features_vol self.output_features_surf = output_features_surf self.num_sample_points_surface = model_parameters.num_neighbors_surface @@ -267,11 +267,6 @@ def __init__( self.encode_parameters = model_parameters.encode_parameters self.geo_encoding_type = model_parameters.geometry_encoding_type - if hasattr(model_parameters, "num_volume_neighbors"): - self.num_volume_neighbors = model_parameters.num_volume_neighbors - else: - self.num_volume_neighbors = 50 - if self.use_surface_normals: if not self.use_surface_area: input_features_surface = input_features + 3 @@ -310,7 +305,7 @@ def __init__( sdf_scaling_factor=model_parameters.geometry_rep.geo_processor.surface_sdf_scaling_factor, model_parameters=model_parameters, ) - + # Basis functions for surface and volume base_layer_nn = model_parameters.nn_basis_functions.base_layer if self.output_features_surf is not None: @@ -353,9 +348,15 @@ def __init__( position_encoder_base_neurons = model_parameters.position_encoder.base_neurons self.activation = get_activation(model_parameters.activation) self.use_sdf_in_basis_func = model_parameters.use_sdf_in_basis_func - self.sdf_scaling_factor = model_parameters.geometry_rep.geo_processor.volume_sdf_scaling_factor + self.sdf_scaling_factor = ( + model_parameters.geometry_rep.geo_processor.volume_sdf_scaling_factor + ) if self.output_features_vol is not None: - inp_pos_vol = 7 + len(self.sdf_scaling_factor) if model_parameters.use_sdf_in_basis_func else 3 + inp_pos_vol = ( + 7 + len(self.sdf_scaling_factor) + if model_parameters.use_sdf_in_basis_func + else 3 + ) self.fc_p_vol = FourierMLP( input_features=inp_pos_vol, @@ -398,20 +399,6 @@ def __init__( grid_resolution=self.grid_resolution, ) - # Transmitting surface to volume - self.surf_to_vol_conv1 = nn.Conv3d( - len(model_parameters.geometry_rep.geo_conv.volume_radii) + 1, - 16, - kernel_size=3, - padding="same", - ) - self.surf_to_vol_conv2 = nn.Conv3d( - 16, - len(model_parameters.geometry_rep.geo_conv.volume_radii) + 1, - kernel_size=3, - padding="same", - ) - # Aggregation model if self.output_features_surf is not None: # Surface @@ -522,9 +509,12 @@ def forward(self, data_dict): # SDF on volume mesh nodes sdf_nodes = data_dict["sdf_nodes"] - scaled_sdf_nodes = [] - for i in range(len(self.sdf_scaling_factor)): - scaled_sdf_nodes.append(scale_sdf(sdf_nodes, self.sdf_scaling_factor[i])) + # scaled_sdf_nodes = [] + # for i in range(len(self.sdf_scaling_factor)): + # scaled_sdf_nodes.append(scale_sdf(sdf_nodes, self.sdf_scaling_factor[i])) + scaled_sdf_nodes = [ + scale_sdf(sdf_nodes, scaling) for scaling in self.sdf_scaling_factor + ] scaled_sdf_nodes = torch.cat(scaled_sdf_nodes, dim=-1) # Positional encoding based on closest point on surface to a volume node @@ -533,7 +523,13 @@ def forward(self, data_dict): pos_volume_center_of_mass = data_dict["pos_volume_center_of_mass"] if self.use_sdf_in_basis_func: encoding_node_vol = torch.cat( - (sdf_nodes, scaled_sdf_nodes, pos_volume_closest, pos_volume_center_of_mass), dim=-1 + ( + sdf_nodes, + scaled_sdf_nodes, + pos_volume_closest, + pos_volume_center_of_mass, + ), + dim=-1, ) else: encoding_node_vol = pos_volume_center_of_mass diff --git a/test/models/data/domino_output-conv.pth b/test/models/data/domino_output-conv.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a3b7102a414c49428b7a0c051865bbb8f984743 GIT binary patch literal 5501 zcmb7I30#!b`W|pY5||mtT`RT$B?Sds-nT+7;Q}fyC?UzNpa>3(WL~$*a!Vt{g&8DM zubC#YGxL2__2S2~Kb|Oli=$Sk< zUF-N@h;w9CPO@&?gNM%AR&frZPoTDSTq|);ojFU}W@LebPTO`&lp@L@DlMw@Vr@I| zB&v0k!(y$|($v(PtgQ4@U1m;pp7ZG3wDjEcw1~`9owK&R|5&ZFPW#MQZHFj_3=uuU zG2;Pxkk%!m&B$K_s_R44HjA}x;z?8+JF0SNhJCO~r&W*Dy3gtp6V=XXT13B_&4Q7= z#XL19`O1WM%4fS?P8T@4j@tOIs1&-k#2(i=ux^9PFy>8yFeh_kSf27i2^HtYTjJWi87) zdL!uTCOj9dXZJ2LcMbQ$<@rr$-P6GD|6StmCV6B1bu0AO^?dkbEvMCM;JoBpY-_FO z4&O+;%B;aD&n9eLW8jTO%H1EX zs~eFLF8Ftm_{Wd@uxX7Ih2nirye#vq&OVqJXGL~{frqTD<()}hD068<=d(q;Mk{ed zkvA?k+YqYN^Xq;lu4w0rqEZ_=jnQ+^0GUfV`C=tEA@4Z@PpB}mcZo0d3~9ps?+x79 z!Nd#F{1I}}hG{<-*yEIm7shF@eY+J-g~j~qDv8T_d*hSSO&EMm&(B*;+`M0dk|T}y z`mCNWuMqj0?Tb#+ZMeO=h)-2WJmVK%yfD>_nP;F>cgp6lt0 zwr(cC%Hl|&5bZ@*ua5WBd7P!VAVA%B9jd4xmM;S z@jh@5Ys7`BB3`l2$Uk=SLDOa%+6HXk`x}gGNcP9v&#Xx6S;C#ge2gyeMa?NIUcOq) zc_xYHf7A;u30B;CN6*8)HSzT+-dO#U6@%X}@R{}!7aDvZxg0}~rkGF8ujLfk8-ZsV z;W)>@rORqLy4)Wfriu3nF!1a}V*Diy7BpJ1w@VRU{kMs&S2R#(+K^kK=g;n#ICq^N zqJ=&R&lIsiZ^AGNYK! z^*8c}6TTSw>k-WPTM?W0n7FIK7ZdVqQ1&clT_>4)OcnY)Xv1`80|$)~^ONU?xVcSm zY1Ff8w#4hZ2tJjKIHlge(PDi)+n_<~j*SSrYv9}CYWat|UTA1BrL7^Y6 zZfZi?K?csS898vCHv-z&aQJvJ>-9zsj?y61V8ev6A`a_q;y3&H;N8|%e6mB&ox_bR zZT7}n*BWuM!oY2Z8`-Mvg_>zLv@167q7Nj_InfJKR|&n98u&v4c*RN$F3c5rPb=n? zArgB|^+bu7&ko1+T$5+w^>=-MK~@xq`moR+oR#Z`3A?Q57HHt}mn1G2?T2APA0sjh zY<}CsX~X=`wVMszn~J%NOxzRRNVeE8YlMNP#7gYQ8pQSZ7Oze=@VVu+>?-u1vRBOK z1wGdu5cMd;7vcA8U{Mb@rpf&NaZkhzx8gX8dGZ}2_mliFV5bd{&lU5|TY}$B4bJrt z>+g`BXWW!H_gg=#74`SPNj=|q(a3uT`onjW70#~}bH5UaKlkd5qsca;tkH9vWaNd> zFXHNVN71`b&w=NSJm-uCOLjM6tFM7KFPC}INN-4?Ryal*IQp+5|Lc6QO5h(Ad`?_8 z@$CL$K8A|*eN)eGHJW(EPA~9FHZ0eR{0uemOrsa##@R4%iJrHOm-s@o2H9UVA+A8r zS31sqSUb5RtnVc1d8C3^VMd&&9X+4( zz}-=!sW_vWVuqV>=slUU*STWP_%U?q`BGYb*NjVn;P^T)e=-6nAyID!LQ)Zk`0#BK) zLd0hysH&!%Hukq*xSPb|JGw!CIgI+;Eu;G$^(c+{oNpXaB5QUG6(p9@y^;0!*&?&^ zK34=^97R#_<+QV%1%=r%s|(%Glrx^DU9KkGKP;HAN#eyRD#V{1PgldLC^)JfcZPxY zc5z2(@^D)C!Y-@VP=|yr;N~`Jl&8j1$X1C??5sz0rNng?lxP^5LJ_B{>5B^%ToEy} z^VHa!Ig0+$TtSuLb+F6<7ZxkgW#I^ds)7{BX1LYM+-HXhT^r-6bZ!|PaI)Y=zQjDz z4c}(R(bk*QB3~996*X^1KX)8hpFldnx#uHJIO#5xO>U6DB7>W*vaW2vgFhDy@waUw$E0ZHyq zhNVz(RvGnDS+Gx>L*l2qpxJ*kEpw=)lpYo=`KO8399JSh`x?D*qMR0;s)MdT=9I-s zh~pVrl=33i5~)x-!Z9x~~d_=b|Y1=UN)S)`Hz@Wlosm zhEi!HeKD_`MupVj{ZD27>&Ggbsfwd11FEUuxETd!W&UuO8V*gdG*Mqg3(nT#;7OT_ zKU3jCK`bphE%fJV!I4bx!3Z_xZxPShRM6jUiFzRBviosYOt_y)S!XI}QG^AdwV!-ZZZs-T!Xj_z0PqWm^OFRx0h z9IM2Ec@dNmQcc4ynz3lTIAc^Pfh7|t|6na$USdXslh}^3T%a2|gg!V_N@XKN4GRGG z+U1VJH-^%u&1IzMUytVxN$jmr;@u!-e#C(MkOzXz9!{O$cNkxJKqd<4INq9*E>sBzRffhHB#kneO0OhTtI zwQ2;+8%I-v%juTLX`)T$J#$nzxF?F*Hg2b9=3B6?6L?RI8iO6i(!L&*6kTXW{J$ho zW7W8GAeNR+uO#J2Gy3(G`0^chz+((KuC64V(t?0zWL_68IQN}M1*PS5^{xfS2guye zPK{;Ccq&{Z>Q$0ht3rRr4k^*UbTS?NX9eAR*^D_ugq}|;u|^p~o2Kld$ve$h?j-a2 zSQlLLdyPgacGAL&7EEc7`OQ%tXntujjb2?w+3CBHH(BP=57gM#JdrluE~l<%EikW^ zI9co|&F@U1d)s$X)_yaFy#hY}7olD2NV;^qiaJ`&FrJmT^D$SvigC0e%t$5TSxBwS zZwD(eYTrbvf4-8Az9sZ>PUehdO56^gMAcu`lFxWEP6vSv&${CE*pWolqBebQM$CC} zw%PBFrr|NfH>)V_d_C4gf>#ATi|e0HrmjJ?G|a(^n^RzZO=By*>Rr(N*Es(s|+!kHPy-9Vq%c+arEc zGBx)9|Nrl=(6kh>&u@Re^U`xOg#$4s*>1%oW~N1kd;J*#fgzzmL4gB9`t=VD8WcJp zFr?q0;J^X>2M!7vG$3d|P;h8yNMPu|-~o1)qvGO{{DeDTZ!u90cEW!+SC9HL-RwU+ zcKsJ0>q($Ty%^8OK!Sxm(*n>F=FXFNk6J4}kKujz6ukfNbDo5J)G6uy7^GU{qy>t9 zI5bZpJ$iEO@)*+EmXRL#EKkBbD%i(iV%oPz=rys>rdi5 zsuTamaMbD+aavYzOjO&p0lyFL+3LkVfF63K5AUhJ2~nXi3UK=+#@T&NMYm_|-w$2A QHg;c2;Uqrx{tx>8A2uJqcmMzZ literal 0 HcmV?d00001 diff --git a/test/models/data/domino_output-unet.pth b/test/models/data/domino_output-unet.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ba6b36de77486f1ce0aedc655a71f9f3154864f GIT binary patch literal 5501 zcmb7I30#!b`W|4G??VIyOi|RFGhR1DL~-jqE@Xt_t|2Z60}Pu8vY8p0(anY2Gg6ru z&G@^gXkI1W_Yf(vx}cdEX}M)-WoEggruR%{KlS4McRzk-z8~M5^PJ~A=Xu}r%{wa0 zMXS*$isnDARvLpQE^S&;YFboUcGmRltghLqmaJ}Z<}9=S^rMP*r1Gt)AnW(2yXJ9X?BVUV+A%AV!~OSDVEr*q_osb*O3`Z(=9Hv_ z)M=K~tdQW2f8jrz=V|v|JskuFyqtc?Sx)X~Pbc83>Dds0m{+naX%6K>f&>2cybt-L zBxPl#SR5SQms6H#4aY% zn+jU-lAX$T#=)>iqxh+WXu>QT)h~^P{0>aGW|Y#@G%M8~w7~weOw8|3R7f^jIn)9X zXHDW#Hj(#7YpM73NEqnKM2pjc8n3id#qua9{*H;Ven_eNtz`K!5(2gx1^Z0U?#VVP zX^{v%|1gREXNlYjZM3OT0xbQ^C_;7-&0Aq5V?-nvCBNFw0OEC~Ln~8;?L}%LDs8jD~2!EFev&8?<&qm?HEl~BMNt9O-m1Ww=y^#g929uy% zqT}oA)Wa_Z-h0+0u9OpnZ?e*Y+c9v}XcC3r5ZxMMrGOkWG|FQl(^al#v5mae#K5vL zCMGOIGOo4KWUU#(v?fs+Lez4gjq=r47+c6h@p7UKW*aU0B?ewv%S6l~qW;l#TGuTK zX3b?HFI-T>p*3`HND>t7WMai0q&%yQ4!4Pi!IJO#t4K@b`2IN-Xc1%-{(S}Y5H{*H zA{yrHFo`_>5<0csN*P{p@cloS7<0XZ)_dFNh?^PwjxtgIE79OiHoBJ;3kW6=ESKEs zegS2kje|R0OnjOk_bbd!*7suJ-3}(P;0-~ZgKbo?#{%gACQ-Zv$-mS}AAgVln>R60 z`aaUY;WqO3H^aQ8a{Oqbn$HWU+xS>$7stfTendHAZFJmjffRp}xTO~~Y=D)fwvB-_ z6BFv|NIP0e{XL_hV1r4VdwV)X`8#NA%fiE>C zk-k~btt(dgc}gr4tTu_5Hb}#l*r-jnc&Hs~688=fb$`P~my%<^=NJ>sIuSKWv{Cxm zSm=02?w{g6MkCk7K;#EZ7~e%Y$!w&O8PriGQQwuQ zxWr1Pax*lO{_S~!Xlu5OlKw9mX36+yGKr|@sEsaMkAV)8Ok&v>qTRn(>G0=~P%8Oe z@f8&8W22!jCcyB?^8Y}hi0=!iusjY zc;#L!yzXWa-eZZ>4{UTtOo11sn1n5uNO!|VbH0j(%gIJDH&alk^h5rESori!ljz)z zD7VB;_cljC+|Nvmo+#+_GCLi<8V`+LGKq?jNSlV)si?IDCP+UTJOy1CD*g3yB;4*~ z65CaxFC*-fzc~_)#F&JeD^bh>E8PM!j2L1RHZQ5C%%}Szra(~y6XQ0>eA3@eS>s|M z;1`o9x+VGFk$T3)LCPMJSbI!R?G!uZd~Alj5^vzO5~^4w_jhtET(OwMzMqJi=3B|l z5(DL>CQ&ByM`5Owq9-T7Gv646M&^&7atmm1T{P6a-%f11h~&D|O1iWd2>;3`1};FV zn`WocNfziQ^FzQuqTnza*@~kf)GGHoj3}V{TB>`=47OM%>fRG{_=uIJpOpD+9uvuH z1o`)|(ubpC!Etg`HV$}hpggu5?Z2H|eV=-Abq@&+`d?432Pf#w&yD=I`8rrK_b@8e zPI$Xc;k`OB-t{9bEHRbiTZ>g}eOTe|EM)w()jGJ?;Q(%FV?aN<$}@g9@-=65&;+W{ z&DjI90u}!D2*!_mYJlWz$MCvN!Dd-1Uz^JK#atbney0X+ru*R7WR<7fG;+~g59!h6 zxUH}m9v!LjypI^~X*2-bD?_!J4vnW&e)fowHyP;+5qHaR!eUSKEK&Kmw2`n86hgSE@nLZm=4zOufSmwyzuSs6khZjq_dKKR8(g(qA!^4v&0%sg6yBWxNh->dL5I~o5Z zLl1k3Yp{!VW89@vIepCdgq?aQx7XsT^FDZQjmkrNG9J}Z2ZuvzvG4;uR`gbRzsp9x zsKOcEUr>wVR;l>?7Ye@<$@ox%9`^Br=nkGZ?7YI~tuXSuA$llN4`5+gBV4`Qz|S}_ z-ejv52A5Z0(6=6#e_P>c=Z(Cmr5?8YR)vYbH^p5C6z*nboV9TQYe5|rzM;hhEfwDH zu8}7W)xitr4&lm|^*BFQsYD*TPj(C06_CaIit; zw^thZwR<}7>Q;-#B5ssu4=H?`ALE0}dT@TO3J-my!$p%69#PEr4+VN~Yj*$_jPO9O ztttXN!=;z!V1F95m8OnI^9;x4iLs)dp3F}*_JX&(z)7Jpg7gb?Mk{kZo zTIH@ejL!@-z^m2!aBvSD4xOU%t>s3JpoM_1YjDpHJ-WA6`1E6pcfPHK^)1SASdus9 zA658~iIPj14ratuV0D!%y024s|7nbe#_A#C#{=kpw*?M+N99FPjGvLu!V7Eg?o4+~ z36p!dk?~aNt4Ys)f^|b(u(9NP?~0KJ#~I+9T7%X`9{AQ&m9LkcteK^SZQH6bu&s)B zcd2~Tc*d7JuZJ|WUU3zg4#nQ?8c22$Uw!c{gM9y_7(-Pw${`C1RY%d7CU z#!h(ZeT7F)Wqj5U1Drgu5AXZ9;|dRzE1dD1wQ?^f)!+taH*~+M@a=U*P6h+CDXqeq zXBwk9TgFTq#%FiY1J)l#_oTZet{YVT!wn#Fo#y^0-lqx9qQj z`0W*VEaZMksE^csIODsder>iNz?g#yR%uiovyAZ)sg-YW75=lGH#X}bW2QCZeOfp?r~-3VdgE;;S02)V@p?}^ zP|{)a%~jA@YBer~@lWsQz>Hrx*(&rS1C~9n^89xgKebT{ z+Q2Ft6!|Q6mU(URI3rIUql3bchj8XE8tjs-@CzduU$aOH=Zw{un$rX;M;<_JZ_Kww$AB4TiRI$2@OWzIl*b z$7@=6E}#ZGb}-iA6FW{4k60=3i;y-ppD&H`NKxhbw$z zF5{yMUBIbxHQvniz_})sAC_J#oudQ&@M=6atr1oYQhC59M&2h}5AmB1VpWm{o*u38 zP6ozz3{hZr^?oc?8)NE?g1y^35%=jHQ1u`uAas3HjTxL-@ z^Odon(SgU>8az401+)6e_ean_K%bz%9=*Ev?bSOVD6ntOfZjd&^bHIM>?^OJUVVdl_U;knV41gDeW9bQ z4mjGVFeeA$zbaRc)@NEdzIg2VFLkUZfgY{J7#{=aDa$hr06kIMc@po@(n|Zs@RmOX z??37}PeMLgDe-*_(o<^E0L8y5G*2Qu`sV8M7*c-2NDt~PPr^KE*vDZ;sSOhP14fp< z9^b=9UD)z5HtU);2=k}cpTv38C!HR{@$_sEr(p+=3e)TT{v6)MrR!fn4{N0l@3DUv jQKKoA;EqdQsM+ofFk+KIkg;~nOWAmH>;&q|NmR>thLu!>)f-y^PPS6J^TDVwW32u zBO?h>Rm8I4dF`RYQ$ zeFLq$BXmL9rKaBD{tF_4w0_So`NLrV?$a zk8axYLDR4fUhVXAtiw8bb

|OCrKLxo&Q!3o~|C7%AE*{1hFd!#e9n6de`qqQgu! z`1*!wwf?@kpwN(|rf%VW{^9<9PC>pp)37dYYr;%*VXtYz%oObc^{WTA4}6Y3F6{Nd zPOgvis9k?Y?Gzo>O+TXOWI#3F5NL3ILl z@k-3B7KVl|_R)Dq`iC#obCfu~{zvG3jo>%~c+I`GD8sf5_V?gDjT~M9U+})pKh&^i ziQ~kVVShiK|F_o$>2z9u1BrEfwYRaJfWiKrM0X?mS4bGLVyv~q(e{60;Nh+mk0-ma z$LO9?4pC73bCNrgR}PSf&G+Sv{$AO<`Hh({$@2s&P}IB89U(a+Z*y$?bGc>$uTQMxgWYD`i)#9p=8+Y4Viy?zD>B1BjF-tXzTl-a@ zv40XhOp*xCMt2^VR)#vaBzlLGqBT*&70FfDcx5}8*1Cz?Ev~%3vKq&3ZYQ(R67g)U zhNqPjBI;r+(KDI&%5)Al-l~G+!xY+;uMo-S)%@j^{m5Cdjk>6mBJ#K=SAA8ErOK^z z{GMERwzzZ5of23*$f6d7QcRxi!E?F-eS@>*tvNsgM z_7@jkb>jd!cTJ=`Qj5%oZk#t_4=NUA(6Sj$;!UL+f7rhgvd_~fxx0&yI=FG#g(6I; zOQq7qa^Yp7#!Yf@bi9f;@2O8jD%M6Kyl#iATF*SXX}Tar%p zQ4%p{f{JCA%i(XCOCibnc!p`XZbB^<4Bkm*cbvtXBzHD;E5(pCNp$C3SD}5V;P(G2 z#_g#)Xl-Yu=uE8a6vq4b`))wANoPh8BBqU33lFM(?D#-4$YRwi{o&U4{VL zt#s})rH~1I{=Hp}$n(0xf%Wxq#i6T}jg;8HWR;tP{>~IFfl*`45@@c%NvIe)T@+eU1Bv!`F;EDq^sDGM5 z89IenJ4D6W3$>7xr&DHuN?3Jv=bI zS@0{#qkYaQ@g&8Ix0jS)_@)%<6{rwpJ2m|9Vi_{ZlBgiqMHu_Lv)Z~Cg)tR0$VDv% zl}+cB4MnJuY@#a_uEO>^70G$W+PRYi>dob)> zJ)bpl@xf3J{@39uOnR72F;nEi=_>`_@u-2(zBtqU5V2FA1%}zXi^EQ(n5;dsul;6m7Md{ z0aVoIQ`Bi^Q8iA*KMbnDbo*>Ntj~|j*-F+NDM7q6iEd6;3GYTXwz*%5yvr%{v#Cs! zu5;sqUlhX6DucdRl*HOy9&dG zXHe!urI_>XG;Z5ag$A>1I$)_3vHJT%JEs^&D1!`@YZ+;mPS%$#gZ*EXYtmm;u5l7a zhyST^Juj_Z;ZnX!o6t*nmR8^O!}yLlxVzhlo%$!Ew)!F&k6etr;bpjJ=-*bz?`g3RK&94?lXlICLj=q#+<=fqu>ry=H03uTtffYPB7Azw;a z9F0Mm>J)YFG#}!f0$h0{=i~>m(A0iT5y4uVa@>va51qN~C@V@}3)xJ_8Go(?`nnpKrjYaZPl4Yb4C0vN1S8 z%3s`%MT6f7+UTK0*6n;Kz2#iknu;^sn0={n}2` zqbp&MO)Nw&TPZ6JufdASPYBmn;h;q|7Q8-{<2EOtYt>oGeKrGmbBf^6PQfV+3GfVR zq+@sHBGSJU!@iYpzwFKE`+W-?KkbhNJ@fHNoQwyKUWYk-&rtl)0Ng0ZMx?QXdmT!E zt@9a5o-r4_CgfmCmV~@oO(YA^`?!a{oWt_nu~F%zl1fd z>+$sIQL0D^M)#0B{A42K<-HRz(ef1S%b%_HE5__PDesDlgUN$4bXUF@!<$QC{>rSfpEH~2qcA_rnOBHU ztmWK&P#i7~I7e%S1)$768^?x7`Lj!j*sp1!4EUhJvjp#Vlyh6xWMu4Uro}#iSlhD# zce12B`Aj?(zNbH<`~JB2EDzteNZE$sk+-UeroG{dxXN6N&2!=st3(v+J4d^ zLRkDLOgLpwvkqk61E!I;Bj^}{0AjV;G(ha`MBeG87SZK4Z9 z{Lt1Z7dh?aToahg}1uT+^bXrU(;P%X!Q@@$gii zrrEh}(2g#|cDXa3I}?io$)_ni!5cwSvT>-}d~vRMKgxcQ@W!7KkQ3ZU3#Bt)HL?I@YG-!(W*sDB&eMREp3n}?!m&>zoH!vF ztrITNREfT(1mz;l(V0ixTZ!({6SO4Q7uRl-p}Ut8&l?hrK9kQ-(5PU9AIryMX9@Rr zUxRtOPEyzT;jpO72EK9T6Qed__xZDQQy4;zws@LIWjzp^2(tllzCSDxk6kWN;F18$e6JMA z*JK+DY3ZQvy~RvhG71o zo#?qx&N_XaNi;u6Mqkgvy`e>@3zu;HqcnUrzL}zGv>4o44vU*ozB)M`@ypIpqE3Ie zH|FDN`^g+QFcX6kF3?r|J-K;tEzZ?Sd4Op&_L?7|M+w2Oj4np(1PO<{z78tQNpc+* ziTgXVF?O7cW6Yv)rRofw9lQj$d^7Otdm^&9$0@y3VX(h{PxLSv^U8bTk8k|9|N95U zcvHiQr!xYkx7UAn8|?2_9A-57l@+ai{rFz$A0FhR4O-!2P(i$d{3MPe{%Zo`CfL}% zWA~2D1Zx}HiS`rhtnIDGTie;$T3b)F9siD<-9#Ik@izJa1Ix@Y-E2na85&HjXlEe% zhjR3yK4NM3;idN1I?}5cFUlbOU-GxrDjQIn@Am$kl>*S9(u6ehh- LXCuS>=X3u9)v$>- diff --git a/test/models/domino/test_domino.py b/test/models/domino/test_domino.py index e606b9ecf8..7e0643b92f 100644 --- a/test/models/domino/test_domino.py +++ b/test/models/domino/test_domino.py @@ -59,110 +59,118 @@ def validate_domino( return compare_output(output, output_target, rtol, atol) -@import_or_fail("warp") -@pytest.mark.parametrize("device", ["cuda:0"]) -def test_domino_forward(device, pytestconfig): - """Test domino forward pass""" +@dataclass +class model_params: + @dataclass + class geometry_rep: + @dataclass + class geo_conv: + base_neurons: int = 32 + base_neurons_in: int = 1 + base_neurons_out: int = 1 + surface_hops: int = 1 + volume_hops: int = 1 + volume_radii: Sequence = (0.1, 0.5, 1.0, 2.5) + volume_neighbors_in_radius: Sequence = (32, 64, 128, 256) + surface_radii: Sequence = (0.01, 0.05, 1.0) + surface_neighbors_in_radius: Sequence = (8, 16, 128) + activation: str = "gelu" + fourier_features: bool = False + num_modes: int = 5 - from physicsnemo.models.domino.model import DoMINO + @dataclass + class geo_processor: + base_filters: int = 8 + activation: str = "gelu" + processor_type: str = "unet" + self_attention: bool = False + cross_attention: bool = False + volume_sdf_scaling_factor: Sequence = (0.04,) + surface_sdf_scaling_factor: Sequence = (0.01, 0.02, 0.04) - torch.manual_seed(0) + base_filters: int = 8 + geo_conv = geo_conv + geo_processor = geo_processor @dataclass - class model_params: - @dataclass - class geometry_rep: - @dataclass - class geo_conv: - base_neurons: int = 32 - base_neurons_in: int = 8 - base_neurons_out: int = 8 - surface_hops: int = 1 - volume_hops: int = 1 - volume_radii: Sequence = (0.1, 0.5) - volume_neighbors_in_radius: Sequence = (10, 10) - surface_radii: Sequence = (0.05,) - surface_neighbors_in_radius: Sequence = (10,) - activation: str = "relu" - fourier_features: bool = False - num_modes: int = 5 - - @dataclass - class geo_processor: - base_filters: int = 8 - activation: str = "relu" - processor_type: str = "unet" - self_attention: bool = True - cross_attention: bool = False + class geometry_local: + base_layer: int = 512 + volume_neighbors_in_radius: Sequence = (64, 128) + surface_neighbors_in_radius: Sequence = (32, 128) + volume_radii: Sequence = (0.1, 0.25) + surface_radii: Sequence = (0.05, 0.25) - base_filters: int = 8 - geo_conv = geo_conv - geo_processor = geo_processor + @dataclass + class nn_basis_functions: + base_layer: int = 512 + fourier_features: bool = True + num_modes: int = 5 + activation: str = "gelu" - @dataclass - class geometry_local: - base_layer: int = 512 - volume_neighbors_in_radius: Sequence = (128, 128) - surface_neighbors_in_radius: Sequence = (128,) - volume_radii: Sequence = (0.05, 0.1) - surface_radii: Sequence = (0.05,) + @dataclass + class local_point_conv: + activation: str = "gelu" - @dataclass - class nn_basis_functions: - base_layer: int = 512 - fourier_features: bool = False - num_modes: int = 5 - activation: str = "relu" + @dataclass + class aggregation_model: + base_layer: int = 512 + activation: str = "gelu" - @dataclass - class local_point_conv: - activation: str = "relu" + @dataclass + class position_encoder: + base_neurons: int = 512 + activation: str = "gelu" + fourier_features: bool = True + num_modes: int = 5 - @dataclass - class aggregation_model: - base_layer: int = 512 - activation: str = "relu" + @dataclass + class parameter_model: + base_layer: int = 512 + fourier_features: bool = False + num_modes: int = 5 + activation: str = "gelu" + + model_type: str = "combined" + activation: str = "gelu" + interp_res: Sequence = (128, 64, 64) + use_sdf_in_basis_func: bool = True + positional_encoding: bool = False + surface_neighbors: bool = True + num_neighbors_surface: int = 7 + num_neighbors_volume: int = 10 + use_surface_normals: bool = True + use_surface_area: bool = True + encode_parameters: bool = False + combine_volume_surface: bool = False + geometry_encoding_type: str = "both" + solution_calculation_mode: str = "two-loop" + geometry_rep = geometry_rep + nn_basis_functions = nn_basis_functions + aggregation_model = aggregation_model + position_encoder = position_encoder + geometry_local = geometry_local - @dataclass - class position_encoder: - base_neurons: int = 512 - activation: str = "relu" - fourier_features: bool = False - num_modes: int = 5 - @dataclass - class parameter_model: - base_layer: int = 512 - fourier_features: bool = True - num_modes: int = 5 - activation: str = "relu" - - model_type: str = "combined" - activation: str = "relu" - interp_res: Sequence = (128, 128, 128) - use_sdf_in_basis_func: bool = True - positional_encoding: bool = False - surface_neighbors: bool = True - num_neighbors_surface: int = 7 - num_neighbors_volume: int = 7 - use_surface_normals: bool = True - use_surface_area: bool = True - encode_parameters: bool = False - combine_volume_surface: bool = False - geometry_encoding_type: str = "both" - solution_calculation_mode: str = "two-loop" - geometry_rep = geometry_rep - nn_basis_functions = nn_basis_functions - aggregation_model = aggregation_model - position_encoder = position_encoder - geometry_local = geometry_local +@import_or_fail("warp") +@pytest.mark.parametrize("device", ["cuda:0"]) +@pytest.mark.parametrize("processor_type", ["unet", "conv"]) +def test_domino_forward(device, processor_type, pytestconfig): + """Test domino forward pass""" + + from physicsnemo.models.domino.model import DoMINO + + torch.manual_seed(0) + + params = model_params() + + params.geometry_rep.geo_processor.processor_type = processor_type model = DoMINO( input_features=3, output_features_vol=4, output_features_surf=5, global_features=2, - model_parameters=model_params, + model_parameters=params, ).to(device) bsize = 1 @@ -213,5 +221,8 @@ class parameter_model: } assert validate_domino( - model, input_dict, file_name="domino_output.pth", device=device + model, + input_dict, + file_name=f"domino_output-{processor_type}.pth", + device=device, ) diff --git a/test/models/domino/test_domino_geometry_rep.py b/test/models/domino/test_domino_geometry_rep.py index 628e760aa5..940d64a9df 100644 --- a/test/models/domino/test_domino_geometry_rep.py +++ b/test/models/domino/test_domino_geometry_rep.py @@ -16,6 +16,7 @@ from dataclasses import dataclass +import numpy as np import pytest import torch @@ -36,19 +37,27 @@ class TestParams: base_neurons: int = 32 base_neurons_in: int = 8 fourier_features: bool = False + neighbors_in_radius: int = 8 num_modes: int = 5 activation: str = act params = TestParams() params.fourier_features = fourier_features + input_features = 3 + grid_resolution = [32, 32, 32] layer = GeoConvOut( - input_features=3, model_parameters=params, grid_resolution=grid_resolution + input_features=input_features, + neighbors_in_radius=params.neighbors_in_radius, + model_parameters=params, + grid_resolution=grid_resolution, ).to(device) - x = torch.randn(1, 32 * 32 * 32, 10, 3).to(device) + x = torch.randn(1, np.prod(grid_resolution), params.neighbors_in_radius, 3).to( + device + ) grid = torch.randn(1, *grid_resolution, 3).to(device) output = layer(x, grid) From e91f2630b30ed8256173d13e891f67c2efe987d1 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 17 Oct 2025 11:53:36 -0500 Subject: [PATCH 88/98] Hopefully fix inference script --- .../external_aerodynamics/domino/src/inference_on_stl.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py index 89d7a9ba24..7276807bfa 100644 --- a/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py +++ b/examples/cfd/external_aerodynamics/domino/src/inference_on_stl.py @@ -63,9 +63,7 @@ DoMINODataPipe, create_domino_dataset, ) -from physicsnemo.datapipes.cae.drivaer_ml_dataset import ( - DrivaerMLDataset, -) + from physicsnemo.models.domino.model import DoMINO from physicsnemo.utils.domino.utils import sample_points_on_mesh @@ -378,7 +376,7 @@ def inference_on_single_stl( def inference_epoch( - dataloader: DrivaerMLDataset, + dataloader: DoMINODataPipe, sampler: DistributedSampler, model: DoMINO, gpu_handle: int, From e9dbac9c1c4b92f52936dcb270753ba270bdc00e Mon Sep 17 00:00:00 2001 From: Rishikesh Ranade Date: Fri, 17 Oct 2025 11:51:53 -0700 Subject: [PATCH 89/98] fixes to scaling and adding configs --- .../domino/src/conf/config.yaml | 23 ++++++++++- .../external_aerodynamics/domino/src/loss.py | 13 +++--- .../external_aerodynamics/domino/src/test.py | 9 +++- .../external_aerodynamics/domino/src/train.py | 41 ++++++++++++++++--- .../external_aerodynamics/domino/src/utils.py | 8 ++-- physicsnemo/datapipes/cae/cae_dataset.py | 22 +++++----- physicsnemo/datapipes/cae/domino_datapipe.py | 8 ++-- 7 files changed, 89 insertions(+), 35 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml index b1b25515b4..b074681ce4 100644 --- a/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml +++ b/examples/cfd/external_aerodynamics/domino/src/conf/config.yaml @@ -86,7 +86,7 @@ data: # Input directory for training and validation data sampling: true scaling_factors: ${project_dir}/scaling_factors/scaling_factors.pkl volume_sample_from_disk: true - max_samples_for_statistics: 100 + max_samples_for_statistics: 200 # ┌───────────────────────────────────────────┐ # │ Domain Parallelism Settings │ @@ -118,7 +118,7 @@ model: use_surface_normals: true # Use surface normals and surface areas for surface computation? use_surface_area: true # Use only surface normals and not surface area integral_loss_scaling_factor: 100 # Scale integral loss by this factor - normalization: mean_std_scaling # or min_max_scaling + normalization: min_max_scaling # or mean_std_scaling encode_parameters: false # encode inlet velocity and air density in the model surf_loss_scaling: 5.0 # scale surface loss with this factor in combined mode vol_loss_scaling: 1.0 # scale volume loss with this factor in combined mode @@ -188,6 +188,25 @@ train: # Training configurable parameters drop_last: false checkpoint_dir: /user/models/ # Use only for retraining add_physics_loss: false + lr_scheduler: + name: MultiStepLR # Also supports CosineAnnealingLR + milestones: [50, 200, 400, 500, 600, 700, 800, 900] # only used if lr_scheduler is MultiStepLR + gamma: 0.5 # only used if lr_scheduler is MultiStepLR + T_max: ${train.epochs} # only used if lr_scheduler is CosineAnnealingLR + eta_min: 1e-6 # only used if lr_scheduler is CosineAnnealingLR + optimizer: + name: Adam # or AdamW + lr: 0.001 + weight_decay: 0.0 + amp: + enabled: true + autocast: + dtype: torch.float16 + scaler: + _target_: torch.cuda.amp.GradScaler + enabled: ${..enabled} + clip_grad: true + grad_max_norm: 2.0 # ┌───────────────────────────────────────────┐ diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py index 60c52beb32..cb161cb06e 100644 --- a/examples/cfd/external_aerodynamics/domino/src/loss.py +++ b/examples/cfd/external_aerodynamics/domino/src/loss.py @@ -227,8 +227,8 @@ def loss_fn( num = torch.sum(mask * (output - target) ** 2.0, dims) if loss_type == "rmse": - denom = torch.sum(mask * target**2.0, dims) - loss = torch.mean(torch.sqrt(num / denom)) + denom = torch.sum(mask * (target - torch.mean(target, (0, 1)))**2.0, dims) + loss = torch.mean(num / denom) elif loss_type == "mse": denom = torch.sum(mask) loss = torch.mean(num / denom) @@ -306,12 +306,12 @@ def loss_fn_surface( masked_loss_pres = numerator masked_loss_ws = torch.sum(vector_diff_sq) else: - denom = torch.mean((target_scalar) ** 2.0) + denom = torch.mean((target_scalar - torch.mean(target_scalar, (0, 1))) ** 2.0) masked_loss_pres = numerator / denom # Compute the mean diff**2 of the vector component, leave the last dimension: masked_loss_ws_num = vector_diff_sq - masked_loss_ws_denom = torch.mean((target_vector) ** 2.0, (0, 1)) + masked_loss_ws_denom = torch.mean((target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1)) masked_loss_ws = torch.sum(masked_loss_ws_num / masked_loss_ws_denom) loss = masked_loss_pres + masked_loss_ws @@ -359,13 +359,12 @@ def loss_fn_area( # Compute the mean diff**2 of the scalar component: masked_loss_pres = torch.mean(((output_scalar - target_scalar) ** 2.0), dim=(0, 1)) if loss_type == "rmse": - masked_loss_pres /= torch.mean(target_scalar**2.0, dim=(0, 1)) + masked_loss_pres /= torch.mean((target_scalar-torch.mean(target_scalar, (0, 1)))**2.0, dim=(0, 1)) # Compute the mean diff**2 of the vector component, leave the last dimension: masked_loss_ws = torch.mean((target_vector - output_vector) ** 2.0, (0, 1)) - if loss_type == "rmse": - masked_loss_ws /= torch.mean((target_vector) ** 2.0, (0, 1)) + masked_loss_ws /= torch.mean((target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1)) # Combine the scalar and vector components: loss = 0.25 * (masked_loss_pres + torch.sum(masked_loss_ws)) diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py index 2fa0de34f7..d00d6dcd8d 100644 --- a/examples/cfd/external_aerodynamics/domino/src/test.py +++ b/examples/cfd/external_aerodynamics/domino/src/test.py @@ -56,6 +56,7 @@ from physicsnemo.distributed import DistributedManager from physicsnemo.datapipes.cae.domino_datapipe import DoMINODataPipe from physicsnemo.models.domino.model import DoMINO +from physicsnemo.models.domino.geometry_rep import scale_sdf from physicsnemo.utils.domino.utils import * from physicsnemo.utils.domino.vtk_file_utils import * from physicsnemo.utils.sdf import signed_distance_field @@ -159,7 +160,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): prediction_vol = torch.zeros_like(target_vol) num_points = volume_mesh_centers.shape[1] subdomain_points = int(np.floor(num_points / point_batch_size)) - + sdf_scaling_factor = cfg.model.geometry_rep.geo_processor.volume_sdf_scaling_factor start_time = time.time() for p in range(subdomain_points + 1): @@ -171,6 +172,11 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): :, start_idx:end_idx ] sdf_nodes_batch = sdf_nodes[:, start_idx:end_idx] + scaled_sdf_nodes_batch = [] + for p in range(len(sdf_scaling_factor)): + scaled_sdf_nodes_batch.append(scale_sdf(sdf_nodes_batch, sdf_scaling_factor[p])) + scaled_sdf_nodes_batch = torch.cat(scaled_sdf_nodes_batch, dim=-1) + pos_volume_closest_batch = pos_volume_closest[:, start_idx:end_idx] pos_normals_com_batch = pos_volume_center_of_mass[ :, start_idx:end_idx @@ -184,6 +190,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): pos_encoding_all = torch.cat( ( sdf_nodes_batch, + scaled_sdf_nodes_batch, pos_volume_closest_batch, pos_normals_com_batch, ), diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 070bb71412..9758ed7e2f 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -100,6 +100,7 @@ def validation_step( bounding_box: torch.Tensor | None = None, vol_factors: torch.Tensor | None = None, add_physics_loss=False, + autocast_enabled=None, ): dm = DistributedManager() running_vloss = 0.0 @@ -109,7 +110,7 @@ def validation_step( for i_batch, sample_batched in enumerate(dataloader): sampled_batched = dict_to_device(sample_batched, device) - with autocast("cuda", enabled=True, cache_enabled=False): + with autocast("cuda", enabled=autocast_enabled, cache_enabled=False): if add_physics_loss: prediction_vol, prediction_surf = model( sampled_batched, return_volume_neighbors=True @@ -189,6 +190,9 @@ def train_epoch( vol_factors: torch.Tensor | None = None, surf_factors: torch.Tensor | None = None, add_physics_loss=False, + autocast_enabled=None, + grad_clip_enabled=None, + grad_max_norm=None, ): dm = DistributedManager() @@ -205,8 +209,7 @@ def train_epoch( io_end_time = time.perf_counter() if add_physics_loss: autocast_enabled = False - else: - autocast_enabled = True + with autocast("cuda", enabled=autocast_enabled, cache_enabled=False): with nvtx.range("Model Forward Pass"): if add_physics_loss: @@ -251,6 +254,14 @@ def train_epoch( scaler.scale(loss).backward() if ((i_batch + 1) % loss_interval == 0) or (i_batch + 1 == len(dataloader)): + if grad_clip_enabled: + # Unscales the gradients of optimizer's assigned params in-place. + scaler.unscale_(optimizer) + + # Since the gradients of optimizer's assigned params are unscaled, clips as usual. + torch.nn.utils.clip_grad_norm_( + model.parameters(), grad_max_norm + ) scaler.step(optimizer) scaler.update() optimizer.zero_grad() @@ -483,10 +494,24 @@ def main(cfg: DictConfig) -> None: # Initialize optimzer and gradient scaler ###################################################### - optimizer = torch.optim.Adam(model.parameters(), lr=0.001) - scheduler = torch.optim.lr_scheduler.MultiStepLR( - optimizer, milestones=[50, 100, 200, 250, 300, 350, 400, 450], gamma=0.5 + optimizer_class = None + if cfg.train.optimizer.name == "Adam": + optimizer_class = torch.optim.Adam + elif cfg.train.optimizer.name == "AdamW": + optimizer_class = torch.optim.AdamW + else: + raise ValueError(f"Unsupported optimizer: {cfg.train.optimizer.name}") + optimizer = optimizer_class(model.parameters(), lr=cfg.train.optimizer.lr, weight_decay=cfg.train.optimizer.weight_decay) + if cfg.train.lr_scheduler.name == "MultiStepLR": + scheduler = torch.optim.lr_scheduler.MultiStepLR( + optimizer, milestones=cfg.train.lr_scheduler.milestones, gamma=cfg.train.lr_scheduler.gamma ) + elif cfg.train.lr_scheduler.name == "CosineAnnealingLR": + scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( + optimizer, T_max=cfg.train.lr_scheduler.T_max, eta_min=cfg.train.lr_scheduler.eta_min + ) + else: + raise ValueError(f"Unsupported scheduler: {cfg.train.lr_scheduler.name}") # Initialize the scaler for mixed precision scaler = GradScaler() @@ -586,6 +611,9 @@ def main(cfg: DictConfig) -> None: bounding_box=bounding_box, vol_factors=vol_factors, add_physics_loss=add_physics_loss, + autocast_enabled=cfg.train.amp.enabled, + grad_clip_enabled=cfg.train.amp.clip_grad, + grad_max_norm=cfg.train.amp.grad_max_norm, ) epoch_end_time = time.perf_counter() logger.info( @@ -612,6 +640,7 @@ def main(cfg: DictConfig) -> None: bounding_box=bounding_box, vol_factors=vol_factors, add_physics_loss=add_physics_loss, + autocast_enabled=cfg.train.amp.enabled, ) scheduler.step() diff --git a/examples/cfd/external_aerodynamics/domino/src/utils.py b/examples/cfd/external_aerodynamics/domino/src/utils.py index 9970c186f4..9c144fa0c3 100644 --- a/examples/cfd/external_aerodynamics/domino/src/utils.py +++ b/examples/cfd/external_aerodynamics/domino/src/utils.py @@ -431,10 +431,10 @@ def metrics_fn_volume( l2 = l2_num / l2_denom metrics = { - "l2_vol_pressure": torch.mean(l2[:, 0]), - "l2_velocity_x": torch.mean(l2[:, 1]), - "l2_velocity_y": torch.mean(l2[:, 2]), - "l2_velocity_z": torch.mean(l2[:, 3]), + "l2_vol_pressure": torch.mean(l2[:, 3]), + "l2_velocity_x": torch.mean(l2[:, 0]), + "l2_velocity_y": torch.mean(l2[:, 1]), + "l2_velocity_z": torch.mean(l2[:, 2]), "l2_nut": torch.mean(l2[:, 4]), } diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py index 21fb8c4783..b41e217635 100644 --- a/physicsnemo/datapipes/cae/cae_dataset.py +++ b/physicsnemo/datapipes/cae/cae_dataset.py @@ -1186,7 +1186,10 @@ def compute_mean_std_min_max( global_start = time.perf_counter() start = time.perf_counter() - for i, data in enumerate(dataset): + data_list = np.arange(len(dataset)) + np.random.shuffle(data_list) + for i, j in enumerate(data_list): + data = dataset[j] if i >= max_samples: break @@ -1210,7 +1213,7 @@ def compute_mean_std_min_max( end = time.perf_counter() iteration_time = end - start - print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds") + print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}") start = time.perf_counter() var = {} @@ -1222,7 +1225,8 @@ def compute_mean_std_min_max( std[field_key] = torch.sqrt(var[field_key]) start = time.perf_counter() - for i, data in enumerate(dataset): + for i, j in enumerate(data_list): + data = dataset[j] if i >= max_samples: break @@ -1235,15 +1239,11 @@ def compute_mean_std_min_max( mean_sample = mean[field_key] std_sample = std[field_key] - # import pdb; pdb.set_trace() mask = torch.ones_like(field_data, dtype=torch.bool) for v in range(field_data.shape[-1]): - idx = (field_data[:, v] < mean_sample[v] - 12 * std_sample[v]) | ( - field_data[:, v] > mean_sample[v] + 12 * std_sample[v] - ) - idx = torch.where(idx) - mask[idx] = False - + outliers = (field_data[:, v] < mean_sample[v] - 9.0 * std_sample[v]) | (field_data[:, v] > mean_sample[v] + 9.0 * std_sample[v]) + mask[:, v] = ~outliers + batch_min = [] batch_max = [] for v in range(field_data.shape[-1]): @@ -1258,7 +1258,7 @@ def compute_mean_std_min_max( end = time.perf_counter() iteration_time = end - start - print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds") + print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}") start = time.perf_counter() global_end = time.perf_counter() diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index fb9a920708..eb7b004d18 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -851,8 +851,8 @@ def unscale_model_outputs( vol_std = self.config.volume_factors[1] volume_fields = unstandardize(volume_fields, vol_mean, vol_std) elif self.config.scaling_type == "min_max_scaling": - vol_min = self.config.volume_factors[0] - vol_max = self.config.volume_factors[1] + vol_min = self.config.volume_factors[1] + vol_max = self.config.volume_factors[0] volume_fields = unnormalize(volume_fields, vol_max, vol_min) if surface_fields is not None: if self.config.scaling_type == "mean_std_scaling": @@ -860,8 +860,8 @@ def unscale_model_outputs( surf_std = self.config.surface_factors[1] surface_fields = unstandardize(surface_fields, surf_mean, surf_std) elif self.config.scaling_type == "min_max_scaling": - surf_min = self.config.surface_factors[0] - surf_max = self.config.surface_factors[1] + surf_min = self.config.surface_factors[1] + surf_max = self.config.surface_factors[0] surface_fields = unnormalize(surface_fields, surf_max, surf_min) return volume_fields, surface_fields From 6706558fa012b4cd84491ab544e6d4e1842fe709 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 17 Oct 2025 14:14:05 -0500 Subject: [PATCH 90/98] Update README.md for domino --- docs/img/domino/combined-training-curve.png | Bin 0 -> 88946 bytes docs/img/domino/drag-r2.jpg | Bin 0 -> 31447 bytes docs/img/domino/lift-r2.jpg | Bin 0 -> 31237 bytes docs/img/domino/surface-training-curve.png | Bin 0 -> 71152 bytes .../external_aerodynamics/domino/README.md | 33 ++++++++++++++++++ 5 files changed, 33 insertions(+) create mode 100644 docs/img/domino/combined-training-curve.png create mode 100644 docs/img/domino/drag-r2.jpg create mode 100644 docs/img/domino/lift-r2.jpg create mode 100644 docs/img/domino/surface-training-curve.png diff --git a/docs/img/domino/combined-training-curve.png b/docs/img/domino/combined-training-curve.png new file mode 100644 index 0000000000000000000000000000000000000000..9a56f9d76d9227e3acd9560253467bf00742baf5 GIT binary patch literal 88946 zcmeFZWmuHk`vwXK3L-6F5zy)48(J!T>OZhg-x>)!A8NN%@Zn;CG) z0$$MUBsCln5C|x5{~<~#QT#+e5JHd=6IOOZ+-*kle9@WQdLs=nqSt_U8a_9Cv|dgm zj~9tefZHDb)ubLmeA`$!;Fwt=Ctl;+Tkw5UU;DxW|f4ccV{$4*!R0R1yV_uQI*M~-V zXA%6Tn`K~*|NqqgU)XhKQpOiM(n!RdzJ&CH_D4Ltm4+On*9cSrB0VXuB8 zR#hsd_)@(`7P?c+IYVPU0Y&z$iOO#l!@9(<)E81n!zek z6KSKeAY3ebZr^IlDG!F?(x)%1&E;TOtjJoAA5i}u=Q2a-}WTziTOpaTfHPwc>G)J&gHV+e$(^@<+xze zcp}|gAKdJtv74eywqjfu2#b`eBUw&|=2DwK_^W+Z02)Q*$?CI+CEPV#3P+s>eLI3k z*VhLH1if9>c}H6Xn1P;U`6M<@W$N^E>eCP1(+k)R_>5FxjonwJNOh*PaetfWY{6AeTZhwT-b}03CL&CMa+$OBT#oHvaXm(w zJ7n}EcR%Eaf*3F|LIT*sJWmL(HW(unP=&I}Q6G;bJcHK+-5zdr5|o6+uDw8-D~n-?h}c)Wj+nYlQ9D`poHiY*CH@(sL}Bf5u!Oinf^; zXX|~Oy4Y+O#Xotyw{*?Gro(&`NT~Z^&|YAtxN?`|=8(i+iq!>vu~Rn3y^&^k?*ZmX z-okFv@!(S^@7z=TcMF6JnntD>?s>uM5`q_sCwz0oJtRojH1tSQI<7j}f%t0lJ9^8{ zB*X^MD71Soi&P-3(t`0CnNTeEL8KsaZt=W0qgqjF*WA1P&a zD~AM*Gm?2-ZMu+Ci(PD6&hruviZv=n3-Lnm@S-)EN=pX?E;h*(9>JFhTYm;9E>kz` zU!D2fxH{2$uhHpAB{V6VZ49KEYSmaBV#?L`Qn#Ic!&69VNrYe$+kHi}-|OFK=vHw7 zi)m?TRoQl?-);DQp@iB=CNHzSQ`3V_}C}41LXkb*siS zrob=##+&O)(e4m>Yz78~JhlrJKKk+T@c?mCHk(YZ(?f3^t%`Sn>0YNZPoa(3+3lsR z*B+NkH*menF3y&i3eVo2-8hhZg&urcEw2C!N$VY*1Pyb61ZiY5_-1%Lq?e|^`ROhx zXXNjTJ0xD%i|JKDH;*AFeTAM~PfY_J)_9hJ-v~!PMJ9c#gnaT&&5FsDa>AF500{@F z^O}F3vyH=QnwfVsT2$mgGIB)w&mSFld1zkD9Wll+UY$d{2ThMQX5-TX#8_Ek;{C$- z##%2MTaUk$)0ab@P#V%#)VLp;MD_JjXQnyxJUfG2;lHC(HOXq8M{?bvz`|sh(|0x&8Qr zT=-RMxk-^N-b7iMSK`ja#oQ->eM@+Ja31ysXfv!bu_YYd5|^rQ^#vVR-YoLZuNt#( zg&nX&))Ku{bCJz1sI29pus*Q8cFDW3m)5W8$h>5r<#OK=OYq9>#CLt&r15x=SI%-| z(d$gd(!v5lIM)bQxflApD>7c#s$vmhh~O+u8)VCT5sS7bNQLx7J669gnRjdaC0_2IVs1; z*Q;^HH}EsgHD1Sc*;GNF7K|s%U#a9$j0AtRVetjij6AqUjZVl|wFKXH)%8*>)i%eY zlKq*a?RnT);{mg1Tn?pcqQ2f~y@nPR72Q+l*1d+6`BX36T%YOY9K8_q_O$ZbQ`<(j zk1=MelT?Mp1a|;ypIa5T4<>T?IcXcR<=T3=kmHQXz$~1TC*E>8Yq?l8Yntgbd3|bo z!@-u@^*>)z!e#RYb%SmUiB=o2`MmLqQTgHFwTMx6g!LP=?xRdEULpvhN(?UJgDh zN{K9LnX_$$E9qWcFNZPB-dqjcxGmi)*}$`0dx@T%R;aA1`Ua|;+2b4KmlyMj4>k@u z9ppW5+YmhORmcW$f(jashLtxYNW8`c&R6jitoDHO;c(tozFa(OG3CKRcQ}DKZ7K2o z2*7JP%sQy(O79BGE0Is*8o?XCSmZ+YR2P^DO?_<|$(dKZ?zGV0OlA3bHpl*W@54$~ z#`gGJ1(Q8B=DYL5ZW8c|H&MiU14FIf{m`vutITIC7|(BA7Aj>HyU6wTN%Zp-zYS@; znm5czk(Pjlv-tWESmAQ+cOGH+{PJG_A2cgMBU0D%@?mwmVOO$>#UnE z&V1*qv5QI0y3gU}6c!Z9l>HRAB;Z|^8|#CN*}Q{qfNJ45GR+Y_JI0Ok&Rae{f--@F zyGG!v`A>arlGpdB5`*zOOr%6>l`Z;)Ej*Hwiwyr2O9;U@v7xb&kxZC z;U?*}OOv0$J5Jn`GMRfag%{1;~~vD)BlFy_BcFR-(YcgT|jg zx?2G;Ub``AdNJ4&5quLj=!t$waDtf}W{+1KOCN>X0a5CGMYU6moPUio8dgtl2zk-) zoe(<|PfH^O-{)d0KeYvKnF$J^S9^F7yt`dgb~*@?FjVyu4W6SCol~o;`y9#hDy1eL z50)+#@h#;-d5J-zXB7h`c)cRXR}_t07*N&ZK=p7Nulg(}R{p3(;);Xj-IM zPQ?}JV85<{8h)BQd(~kA4j+lAXY74Dq;gM>9yAYcCi;X>%Q20zNfJ`v#_5hBQ`)hy z%Z{avV)U)}J|O;iG+GJq>*@PUXw$iqtCB6;PMV&@WBF>?Aee~2OLi>F=2R{lYrKn!^Y;6SRsom3BoSC~9R1Lf3q+3Fd>^|03-w!;bKv^Ls^9xSe zL}8{-58-jF&-*v^+6kF^kSl^mYN~~*j}B4g!<27wE}H0~6i@?jHn${@7w8BqZAR3m z*_@eaCdVr@p;Sfw-@^2Y>*c3TgwB?KErJi~H>(7Uh0*EeZblodW*}8Z%I`|{j>6az z+kOO-?8^^imWkzS)#fLHTh$%0HYqDPQyI2VM(jUMbdXRG7{w??C%*Bq1MitX)SZrF z$yM|608#sJw>egO&DYuMRyooyE0D{4+AhIfCt@XB>Cz*S)AHQ>6x>9kae4J_Aa~5i z!@~dzeO6G8Ox92biHyF^JQG4J?PU7ui<@{Rwc0RrDphdCI&vf-;K_;0}yeNoUBTFt@GtuRRefL`BoK53- z*zusHrR84c^;TxnLHJE~OkIGOP!=-UCJMvUyMZVTMiHR~Cj;FsiG*5SUU5AsRPB{;8ikE6IQwq9Z;{rix1Kp!_X_6eADUsN)VW$I#D z;-YwER!t5FuH)qnh&9o>KfEwjM-ou{73}ltfo((fxAYeO6pxbOxr#E91d8$1CP_iI zt>DbtAd00k(__1UWlH-NX6Rw_H6mlcdk1Z{+KFY?NZxN7?O`%&hSko>#hi5GZ@ZtR zfYW; zwJXup_^)XFF)WI$pmLT>8{(}T*FEW6IYzx|C|XTRxK|0R@n<`FGf51EpUMM`Ntn(@ z@gX@q=d~j=L{ul7VJuw_Ul}S6NN16HzL-fE;kgKDDze7~B46s73HiZ(=1ye{IAitv zyFZo(2z@P1Sf1v@q1c(nqhj<&E!Ub$_8rw;AeFi*%cvx|AUvZiH?IV0a;Hh#|ZQ8nc^@|2kK<07Vtpe_gZG7qRGB~A*> zs>6KGaVC0XMWZ|n1lpnyS_@QcRh3o`tIfl+NV{Zc@&1%)aUC+sy;Ha!EXR8)xUv16 z@XPh(xzqk^yhWy6U@uxO15HcF7?w9$fU%SD?M=pD>InD1M|R?7Le@CG4L5>&H` zx?*-*oBD^ZWQ++`E~>Rza$caG1PLi*7w3bUX^R9iX+7GgeDi2sHG7z%Tp`%K&Gp(2 z(CuvUO`*?%yg5w9jfn~q z>cLep;jxvt%(S;X=THpJd2CO5@~J^Ts<4z|xra7#sgw1Tke_QF^S|=pR1}pq^i`7eeg0HunRco3mDf^*iu4ji4$^Qr9O*&E~g@jCsEiYuL;CR@c#~RvMAY0|j_jw+d@&1*54tARy?fTR3##LwkgImkvQb|g?NgE>52=}Iy9!( zbf8Q;xcKx3{yeKsL?fA2AL?c+fCwq?kHO@VzPuo0!Z|OyxhPvaQPY^Dm`!(lx)IQk z3-ejOUEqRwpGm`u@jM{n@38c%@&5_Ngw@hL-xz%Q;Yc-;YF1t6Y=KRbI_Pm?nXszV z1ZOh)aJAIlW-rg|T*bj;lm$jXUUlDbv3J1eqUY?*UTrJfx`ix97kkqcX`hlY6~9;f z=}fUtcGH*+h`@sMW;Y0Wuh=OM*W15#%|C*6l*49I@m3r zs>d1+XiOQQZAp!xj|@0A0=smy3Hsw%3XWj>u@C6(V|FEj2pBuIsnxi+0R%vGw!YtNlSn9-(QG-pWI2 zq`z@qU+>FdPmM;2jjo*)84D)sxz7$m zcjQtR8kqKKDP2PN9!t}k^JH+yPZFyb7)0@(et2VQ$?&N%r}=WfQPK7276MqwD-)-~ z}>xl+b=5X+NwG%$O;# zrXin@!k|@?r0rvAXf1~}A@dzTAWb&X9OX05zK@QJH&gHkZ@wT8u(wmC=WZL;b+_Tk z-p*+m_EvgWSXue3&T~~o>A5a7E|^gVqQGru#|U3|_NumqVWbkyVpC;M#bt($3`?J6 z42iu^^2*=B3OQ963) z*pfTKV%DLToOEi>J$O&qt!p8W(T<<+1D_?lfKqL45k;Q{I6=p$W>|<`0xx0tdWLU~1CcejTSUCsHq)f%EBM{1iGoHcxXk~n7 z8)UwLvvoDW&#SH_nL?Kfu{nrF*Wj_4SKq1>Ax&%+d#RY*^3ixn0D_U&5B`MZG1YLf zUBv$YJ>?_S)cW8yz86g=p6N);gotoE1r4UZ2CTa-WH#HuVC+I$2Ijh-Zu)=+C^k~a_Z&D~ueT+>&E`IC+ z51w7}{j}tW@!_v1KGwd*`2aM$CeUCyZG+n~?=IS@PNIYS=!u-{upn+RB^k7Yo>1#7S9e;d`?OByM zq}Uwtb(2z47}RLQ5DgO@+jn?MA#j2+dcw~wRZvp0SK$_NU+sVx&*ad=C*t{(DrELUpw_D(lC&1D>dZ4R2r0(;vhtxJ`a@WDN|HYZudh( zfzABt{=%;_PzsUg{8=#)#I zW*KRe*bwFO+T=cJ#q&VorPx4i!G_1P3D0e|&ELPqOcMBI}@Z zh+<-PCT=o@*?C(2>Ic-r7O|7|>tQr}N1HdZsWu}{n} zre3{7@Lj|~OIZ~##(IKT8shzl%<=6yBM0DT#l_QtIF<0NgbX!dDV>{$Fp2AHwI zHl-MO>1v%5G?3u*SF*%6oBC@?b@su#XX+xzp<9>X`g9Oqx_lhXl_P(WtFNgSn|Xp4 zspo4Xxv zN+$iDmqW`WCqY+3Yo4qxMvN`_257`8GXJLy_9i}JtI%=g*{j#YK)&W3s=h&3Tibtg zr1N-^Mh1o}yYpu#l>g+k{@OLk5FqnODNFI6+-)=%$e+%?&KLW=5r3z9X>o+ga|1%e zelPm(e5r#1LR$iOJr4CBi2Q%;NhyI|;+SJl?`DjFyv{xv>9oU88t%(~r;~HUfL?H^ zaD{)T-TvyWhaVAHYd;x8_B$Q+x1_`Y*;b`?F|z+mv7Q=8&hk4-2mG^<|7#zO3-ltL zB#Hc=DV`x?Th*J&hx}(nN@VZ#Wzg_K{$BU*->zl=+jZ7Yi}RlqNdvvm8j#`qXNvQ9 zLgi~2-2WfmbYDc;Tr$NtAyd zi=85^m%!D}ju9tNzI+8OD9>Yxh%UuyE_8jfx!UyF^+cQca^CZUEBr(lJbT??X}G`E zA>NTYqzV0e5Ugta?H;wpT9y5$GYC$-#hSEMy+wQk(d(|RaH7-*GHHP!ZpU@EM|pCo zsz>w?#P=*|7=;z?tU-qEMi-$hY3A97=PW@h$wsU}ENN0HKith9ZR~)N{TP~taJ!_R zRM*yO85`#YhLA5z#cZ}fxj^SMttDEu9%M?!rySuSAJEn+0bjrrMl=hx4#$vAS%9oT zOH6b;$XLwto=P$o2JOo{QOR=m1*Y58U+59Fp}EI7Ze-O+Tth6<{i&uIBhkI z$$Nrk5ApHQS%O~cm$4P zg^oCfuZmq;C!`73&^R=PanU#y6i%0u3p^>J9?t))6(HmM@TcdTxH0r6SOc=`PG>DNw7Y=Ai;O}S-UmmK1Kx2mPy*7+d>U}PA z)|>|)^1MvFPMp5#R`~5@iOad{m}KXH>y4ti5^X`^sW41^DD`i!Q% z2&WP~DM}~BCq>OQjlI0QL@YDuifS|0RxIQs)RRsP4Aq?$zg4#`@q4|teI+h_*SbYh z0R}SV>j`?W*i0Ap1`h7qlwWPQUZ@0ZqE^91iR zI5hW4h`hBNU?Xz)_5p{(?)%rIqe^#0_T*}{nN^v^-&Xh5%b=2feOAN#N^03?=3+Hg z!KyP9KOOrnQ~Vx%MmrgH$wTB9h* z!dcMT^L1JW`EQ3k^UeeA^@LjL&mT|mfO9b}bh8`3iQaKLq&|Ri@fabTn3&)&Xh#MT z>B(LMhu8U}VJXYqQ6#p)Ly?g3UY_;ZhOK0$87Zp7a2xMmJ(AIdwgk0-ojIU0eT%eb)uTJE#Cf%(@eot9NjpW;pk_FgFT*8BUtf*Bc$dXACZD}|@$ z_tfvC920=-c>P=^%h#;+6j}a+DZVy9SY$(xmMclRIlIKqVcPM=wU7Glo|6L1CDfWn zLiB<3efQPkMP8ee-~IJbC=i8|HGF1%g?I9JOZ#$2x0`A zmkeQrmL9}RjLkch_u-JVr`Jp(-$_2=OlEHUm*I8Je+M;DiK0RhUu1)?~0cYI&MlA*FB40d`-$v;X@`f7))kpNZTO{;%t8RY-GFOYMrkN58`lJLCck)>a@xQ4W|bS5P=+h^#Bbc~uenMm0-EP^*)qj| zwcknL#P_i*gUlahmEH`QdgS&m6H|XnxJiArD~Fw+_i* zE=c&X66$NhE5)AL)*@7uQ)`Lw3|!8YYk_NMJA)X1Mg~;l9?-SGi3sB4OD+c1&0*0< ziM%q~u0GZGUNZ4_PV^`k&|9C2Y*Nc*zBl(v)ExM-?Pjv^QDzulVcfZv_6oSsRHnQc?_usHw~!GiF)m8uA-iuomzKfs z9*CWb4clfnM`p&K9rOKbl|8q-xIVE8c~eZ^&?t0YiS(E`Zg{Fb-3kAbO(k3_)dK0sdHYkr7K6!pt89%>%L zFQ@gQTx~b5=ad3O2DkqH;_ivmQvs`FKO|#fs(T}=S`!;Q;lX$v*0jZTNA?hqp5A&5 zuF+9wxs{zt)rc^9f{htQsz~W%MRjZIAl(ogEmV?l=wH_JC>_}5CGeN8JNXz@Iz8Ra zRwb2c4cmuzW{lw51c(c?k&$-y;Y1oAcOIh6y}hI!@1|+b@6%P%-Mx~Ub&`JwU!>6n zNcULNNM_pj8#Bdqu)fbZCk9 zCG6v*b&Y{mt=XKh!#gm!iJ#MDnl5{_8fK&X0_Pe12AUh zO@vKJ9xzz+C9mM95enQ9LDIK9z*z;(gWZ+Sj_fOHzBI^L6Rfkak`P6$;=CzD`g5oV zzE1!dc=AHqDD-_Q@z&7L0m~>!X4tXXT+M8mCdaI$bxFjQkUM6M%4||pC zdbUN}UCOm@05BTbh#ff*68;L=&KS}sR#x^=l2sPK$A^N>zf`&_!9YcclaKl(=) z0mGIf3IGY145O!#LOauaLEm^t(RhfHYw8NR`5f$zV{(fe6(1S@7T#M!n00HFz1~RP z_eK(~Jp;_@(c=R17`VCd@l-A6#$1a1-qLd$zI%T*Ct3hdpo@MtGDP2UXDhUir%=rr zG3S@Jtkr*DhjVFQ7G?aPEZD<_Gb|qBLvZLW6WtuW_4hq=Kp-zl-zoSLbWAsD-^Dvf z8-bb3tZ5z9KOl>d zP=wha708C2@Dg*IOg`Wyw05NG)STATRbN~{82ndz31B4m@u!_i_;O)R!Lw|ean0{i zsJh7+)A))f)7MWNCyC)88~Nu_26qR1Zjdi5`f7DE-|X z{$Ru2(y`1|UKPEu{oRuTd~gvJ(-+4H0ysf0;&=q2WA8nQ@RJ}u!-{$;*!HW1S`;JySi0KPO_I-MSx{Bi4 z@#?{B|7nS?&Z^t$)zgKEi(lvBE**qH;^c!wEaRtl!!X-J-7=8LR;1c)(y)a~|=$F)C<)<=pOH z>aKlj=2G0y5(#S}_tvL4#6ryQ_IL@)LUm17R+%&P(V>EMrGvTWD$qCJ>JlicTDP)P za3H|${vEma(gQshKea0`)l((hne^E#iwqTpWZ8rIof#P!iItx-P;JtVwwCTMbd?S? zQ;yrr&s*iQmsG|+pPGp?&YZqs)WRF^$NSSEBC1gs;10BmdO731CL~u}mZ0Q|TL{)z zY0X^WA1frDbnncUI5!HAO+GgAvIP5-xDJxs{r0LK@NL34nY!?!SzlEa_kengDogo{ z7ZT(&vw0XqFA}Nna0W$JJ0}V@Zg%}`=SaMNejK>`A=`Mle(rJ&f;JTzSv2cV&59G> z(V|WnAY^LtO66qOQ7@n5d9L*thSqSF$(KXrp+H^BFnH~42C7M<1#U>pdmV3Xa+_Pc z7E;3E3~_;!`kym%5C_zPZj@46{=9CV-LU<8EyViz>Iv_WFILl55N1qTlo79|!BbtTU>;AEg}(OHCDB|Hm&WuJncPOG;~0rQmbA9O z@O&CUS$vUdGN#BWsuB3gq-;Gf<~0B}!M{BjWHjuub2)$I3jBQfI;axLspr_;e(Pa! zU>D_{=_QOyW~qH0CHhO+W0YCrSAo_u{%>mw2DZtA9S;sNaSN3Er`l2^d-zj3FHoQ> z|5^@E8ACVV9c2jvXv;=_qpw{H&^Kb;p;U7<*)y;5Bg<>fCf|g8NyeLD17CVTSPZ*o ztE3FG&y@Jhz1Q(&Cc0N!HRBiR`3+m+2W+inluJkB^t|Kt1o>Pt>-8^G=+rzCsw)h; zl4iwv_z=&+_N&atSTnh~p8<>W2hdoZE&$^?QrZ6pZu5C$+M07B^R+!e;B`h1sV^cp zLNaO2I82qIDKX@^C>a4qpy>URcZ4Y&RWOQ5pU;tE9m}LPmdGbe^`+Bmfq9V>ke;8AXSy|IjX!H4cYRyU{ug6v^ti)Es*x1 zPz~1b(?G`A6axd}bXXK?2B6*R02NF- z%SYz`A$zZCOuka@=3?q}REBA0vP8?eEBxsx)6Ef+%jt~i900p7XVe2TZU(z9I44?9 zm$v~!b2kDx{=IO3HwRP#>1bbgM$$Z=uXdp~t3R@pE^)R>P9ZoCq_CVQq>7RarK_89 z-FZHvW13hcLh;*v0d9lF0*R7oTD4f+KuyOv9)p-~I!__f2BO=1*!A26Aor_-!Hw{% z4Z&J~y+J{QoSvTUvXz_YLAKXNS9a1w$q$r4+%n3+s^tK|abSHQl`wV);Bc>MEK@vM z^1ifIc+moz)N<*08SDl?<&r*bSBKqlL$WDsy}E8cP!5uKT}!=yqMHitn2l0^0`GI= z#|;E~BYET3c5}-gHYERLvZbSideJ#TvLhogc>(T-RV(~74AXZdN^r5|w7Se)l^!kE z=*L>3UJa#8!t|$LVhhtW`!{U$-zI=`{HhN0Vl$2+&U!OB)S-KPktW;0w z?oeI=WCpXj8s`I<(_6;e;YOxUO9YAFB5zq~DTl^F5GIiYKtRlt1F@o0Du)+^sZQ9| ze{8&ho-ML_92|G->;i#x_S6g?n&#z|F-l$T*^&2sNWyZ`sGb){<5=GoXi(RtyDrY0P8(urJ)LQ_ z97thb%m`Ekc6876t!6NH;dGv@(?SgE1ZwQ*Yn<3Xc|#zUnNRR z_QTi%xT$&r4dYrSQ#bBJJr84n&8(T2Lm5fx$?Ixk_Bb(PlC~W9h(cZ5eiQ?fiIi0> z(&OfIhHQ7(EN zd&cTJW;3YBsCEZec$(}j^1~oZPtu$RCWh9kce4MB53)*US^iLdQ%RrJ&ldQWOnS1c z{QLYiogAbbNWd^jxx3fh$6Q|1z9!ylc1N_2CL9fUqgIWUJ|Q(JlKZJ-&oyhCrx+R0j;eYjD!G^s&nHCF+qklvrr;sllYw5Qd@gbPWpkZVxQN0Db$6n%XX zDH?l-+GW=jO61acCg$jD zezXTJav;qT2)(qDHhHx-9@m$Uz@U3wkTay5GLr6ZU4;?ZE8kwSHgfja-sav;rbLrC z@o?Pyc~4J-96^k)y#cK218VUmM67FJp70=hr8L#9E234X0yz2B{6Xc(vj6tU+)#R-eHqgM4EmJry}9sm0PfAK}m9 zdXP2_^i4G!Ti^dWI3Lo{W%PnT;Hd_dLmYbtR0lCf`m|9WHK+2nVs8mK z+YAJyy4O6W$4ypRiN;LoL3us9F)}nfA&iB|UE@``#i^Pu$vEXoV;M1PI>wEI?S0 z1US={zVzQN6&CelH!;o|jgn#d#37&Ebk_PZORSa)nE;FF)x>HcY0W<_yAHI2ii%k-!Rxi75it zipS4yz}?$vxiCM{VUajqfw{-46*;v@o(Ek>VGGto2Oj^T$TA~!;sul%{KRw>ZM87h zLY(jc=IK0w0^iCc;I@p?96J0!Z{@)EQMR1Y!p(h8Xa}`kLx4jSq3=AzM>-E|G=|8l zxt9&wXI-ykf`#^exP$v3qW(e+7$Q2Rq&Xd1aG4vhTn1Q!1XJUjOP*t4vBp#ycIN?m z{_960RI`kACuObI4V*fIojahh7lOaO-&1&>`;O%jAhE6H3Iau`xE|>z8<`3lxQgr2 zj!VCv1rXwQ+_gDb&TShbok>f011W_mKk!-T+rFKsqFw!OZec|Onjp9BNzo2*ro zxn$Fft+uAKMKJ%T)tb>o$y-n^4o+ zD>1#?cWq6jF&FL%-U~U+w0vg08qUctqgC^4ZT{>m&Zpdl$*Z9ve|7#YLM&8<8g~`f zV?LHQgCp_6fT`tIwp0>Od1XI2IccK6cNoCQpr}e$8&ZwR$3Fwcr}Q=2>k_)Li#os`E*>}0BwlqBgRNw#b=-aga)3{*8h0U3tM&e5Dw)M5_hTy{ zW?9s%1Zp-Qpmpf~WdU4B2f#@h|!EGf%*PT(0#S=&Z6!H%qei$?@vxGQO zciOs?g82iF>~+RScsXsV2YXzQuD7Crl&oDuiePKrw)Ac6Nz!=-cLz|7+cQY*=|ggv zP*IyX*&35$JjSoWfprqZ}tXTe8|q+-7~5|b==p>gn2A36>3pSg|TI#6f0W$QQ& zu&3tcOL@vjx#6qZjP{YQ&sAmDNx@HcJmE)$%!dNX=E)PF!C~QpfvzZLz{?hBcPX%aj`(5{EFx!(Ok3@sK$;e#eVwXR7S1*goZA z^F;*m@o05gz-^smMq(d6ovIrd0}ZnOQ13%E+5I2T`)ZwN98>FAb`Cq!tMA2jrj_i4 z9DbE+rebrCqAgbSd^pAl?At57hPzZ};RA_sr4j`m7%9V_d$5o>fEb`$=p_^x39FXh z#v;x+&nS>V_BPp9p;E<^%PV4OdE&3CT3aXQ66J`=@ozIT+>QC`EJte}0Onk-iN-N& z)Kv^AdMGPe%_mWE%ZF0{KJe>jJ*6DZn3YF6vjE|{JGXAqVY#f;^hl@A-|6*G&3JP7 zSQ|(!6X(RV66iH%27L;B@<_c|%STn9M)2d{Q*OGe#I(Da<$s_?FUl-u=jV+DVaijxmyHrL!{NI8-J*BS5?k_? zarNQRA&T(#OIy$AF^@?snw>>HUNSQHCm_%9pM2D?DF6rykW=_;52d<@ zIgJbp8KQ#0&xm@Pq30hs#$AZ% zRaVDjrlxg8Ymuc78mz1=Scw95aqPU}nsKFhO}p$)FEf98u?0I0oa!vSCz@LMh}Tu2#o&tgQ$1QZctu2me3{YW26k!v7+OYnC+l$5AH4wgiF7Y|nRlv(+3PCp0p~ zD?hc7e$`cX2-r-&!@y}{GZ-+LUOkm&ier`JSZe1`sW++-IXlig{d zy}Ev6#Ix*Z+Z9ecYiKav6JVMszmbpsW19KoTd!9_Uf@NmgMJw)Pl-uvc#0)(pxBb{ zeTbL)5#fCJ4%#@XAsuY-p_Ad z`UiZBY|!&$G1*HJOKtCmJ%{%;ABzT0bam!FAE{e-os$7d-{!Q1K>!%o$#Q#iAoJv| zmZ1WvCiKrD+KbrH&*AU~7(XLJ;_)+Nyh3toP%KxrxekI~?+ph-Dw(vr#B1%8(-R`U zgwQRC_uGCTjMyj6yxW!JZ3dA@GiQ$w30IPb3bXh_LYaAt!(mw#|BUwZhTw)4Ye8 z5mKWSwc~LA;}?sA{n2Vct?bL*{-ilMFNY^L-Hfo&6xUJ5oRGb;U zLXl{aRdBQ3geP=b7ev9!&e}(jf`?j+4BQ-7wb5p1Va1&b8)Un zqR^G8sAjKa8<+h+(B3YId`Q7kN%w)#%~@?@*EiUy2#6IgkV#oJd1^Oc?jlobewrJ|0eRq4Ctd@iyBFV8%jr1k>y^Ol*ui1apAo2%xcg^ zRMyXW|NSdofuy?N9ROCI#HrKc;PQs2@QGU#=Df)20U6o7K470}UX3p+_Gac$V)++{ z;2{iHdtVnI9GF&7(=jF@B3ir;1t>}^XmtRo`KwLckNdp9jTEN61CX2HsLVk)r{JSDJD$3%&yZ_NR-;Wg0ZW(GJ-_?O9 zhfq)6$0D{G%U7r+)bku!bX&n_Tuaa@Zf;({6U4Ra6#=-UIv!iOlf%RMYP#;Z1Ie$u zGkmVy!N3*Lx|_(g>K`0u;1XV`)MAHLo?s;X`M9|i;gk(Q8d1O%j|yGy!DTG(_* zmw+HC-3=n$-6$p94brW2^Ui(FJ@s_y^c8gZb=nVDoh3q4fkZ{Mt& zTCCsapD#*ZZ+^Q|6C~e}5jj=d=iqv|8a$82yT77n1JYBf4nUn*>i`6{Rc7)(TFO6} z4#dwwgB<1oI(U#7fc8XIx!PWy7mc&wIzw4-CRJ#>AY$BwWU6>mtoa_slmX& z2ErH}i>`D^v_G#Nbz2bR%te^p>_=k#=qq=pX_U69WNH)z&A^SEGiwKvN_XeF9wAo& zm;ITMVvcYJfDzQbo20juF}27(hz5~8512Jr=dqBx!Me| zVr#}$xB{;)cQY=Pw4g}$DUNWB{VCf|_irr;122MXauLp1-Fq8)3*b;b+pwCN-&UDa z)z8K-t-(`-0?9mn2Z5aBYtsAmtJ#^p$7>ZlJ3Cv7!**%aVJUF1JO0&$p7J3(-!5MJ zSJV8(ry5;5ke_i(PieJ_75GgDd+~oc;t=H6>Ogs`X{v0KyC*0^C9PvrC@Ea7-0@Kf z-^($64CJ}~?l50St_9UdT1xHX%h5>hXH}>|ui8weu{=uv$~u}Y-uHQP`Hlr!WRHpo zfa7%$K?137O}w1FCRB14Zf*j6Y-S&9s`JAy#$iP))%fvR_a8GyMY}o?9U`W9hc#N0__|bs;Z6S1wX?;IkJ&b){szb$iTqBM%>58 zLQ~m(#XG4rwZ7C=uyX9M6BDPWrIqxGS3Ynew%69@W_~H%78yI=PSk3{W!4lxT+;P5 z^now08T&esPV%J$A9sBbegxS+Y(mUobLTgNh}oP^Xp@aQyLR4_o2prjrd#d`LT6Tcss*X5l6e(W6_CV|N?dxc z`@GAWAW0AigQ2hlS5Uz3D?7b0fB9@_tDz}zeF=7PrFeQD3mcnrCdn|VX3oAmSC5Ro zzhRd9|G+F%^g83*8`ffwD8oIgkyGJ~yunr!MpbK@x{tdNMd$5)k4kI|tuww;?;49q zI>tb%Y&vikgghJ!t^8;SboNoKIAz4}>55rJDGkjsrK|05vy8PsdAxNs8d9|4 z_28^{WwDzLpVFOBReTn!f7Mti8BValOyhn(p`tiDmM1H(^VovWR~hXL=l2xH?=Pen zBpFufHchZ`9=n&LoRIAmT@AnEKiZ}zu~4uUih=Akg5`{{6lK}-M4p)`j*gxwL&Ly} z!$@FGY#t%u{%qQyZiT%+JldcNih&IO#H*F}J+8kypKmE1;=j;MFqQV{4{UQ>6RfE0 zY~`!(_Xo~=*caP#g4=Y*ndm9|$6hX<4lyr!D<@rk6r)3d?Mfx$<3H>mFIb`L-Voo3 zT$$`q%Y}Z-8wX*V3sSdrOk09Ax`Nc7*)57CgecnU3a?k;ZFyGNhWW7d6mz;AJOS$A z`hNI|IG~r_WyUDj?WUOjdOHVXEp35hvt!Olft}nKIh81XVTz+g*X0^;&clHDz-`uZ zn8rp_pShoW)GQkQP+vl6{`AiA#2)J3;ACwg`-S+i#fTkP^~>XYW*yIu!uy4(*`~*@`)Hn74{we z2932RVVn5O(wfAGRO3fGGk)$+w4k)u$8bpd? z<%YN4o+#!b1ZF-o^u^H50~~BU@@I1fa_nC!aLkMQ)=bN2#>m#AZx-6?8k9YGT%=g? zEM8t!&LtvY688TicY>x_!hHU{W?WE;VB?Wc=}(O^$0Q{ARaoQ|&K6DKrexzi^&7|I z^)H5d%gJwe{@uBR$gy9z6;K8)xJ~d2BE?3Q)EwK_sAFn1ZK8w6iWFcT#|Co?&V#jC zl->{ZiDG|6Q2d8Qi)9DbK-yI>d>9_qEwmV4!(2Er8u~6Y?Pm-#_rELy1|gPpTbwp$ z5s^BMTZ>}`%zx@skAZ}f(dQ;8IS$`1C}If9ie>O0yD*^VfM=~EJ`Cf6JnAtlH<>+o zeaQJ0TYQtw($?+wL$8OoHvcOiIRL!;Qv@WF5TTJ4pYp`3@Ra>fH`3YLpzx&%G6(rT zct2m?3>g1G#Q;YnlSHAB-@RPZ)>SG!krD9}4D&cgp6kTSOn2-A!^r<$BN%u9cYJ^C zEA$wAHxY}(yxZ^B+rNdMgOwtS`kgK3{}kD`;6%I$QboxI;3GU&9;LA6(&Q=MGCkaF z(ecau;W_im24YQyP@7!LD;Xg|3&bui!974*uB_g4hj1MX=9Bqtx?Z*A#@)^N6E*y+ zTh9|R6ryAQ%t=H0Mo$kyj2S&LG7`ndLWhO*WubENEvUa+6L=k#3mXE+Jh^X5m!KNr3Qo$VcTO`X+R7U&JW19l+L8hO>7@Z-rIdnEq z8bRzb)4|J{z{brD7iJ_$!J7e&X?M2OkAFVid&_P7`a~{(yOD(`Y&h8JFOe?E#Ok=B z#yp4x1aT8C0|9f-TyW;hNzN%@Z`N=V+m&89gX7liTCqR+Y<{)ChTVlDJ zJVfFLXf=3)m7YmUD5{s}S~6&qR2qo}&m~qEvn6vSIb#BT!j%e?2Lt~vfC3rmR4UU` zw`Au9A?vH;QitP@+YJirjLJqbcZNqXf`4|y|3M9i;ID-;-|D9A;Fp`%NfjmYjrpfo zSZU{qj0`@gSP|U3;fegi()kP#olx6PsrkXxYvHzDD{O-HH?rbd}5;45Q&F$l;_ViPiO}~`*8{e<73`5BLd|=mXhAs zGAll}JlbdN=ADj}#xt=T*N1f2omdfOD{1#g{IM3lAO>eUwqvE8j0lg0uV-u!@tR_k z#lAapB)mLeoC1&CrRl_+Sck-kLhavoaZ~vw^d6L(|Nnq9)s4nGV|gwhUUL5S>ZGKh zVb8SIU)x@VNkI#kT4Hwuy^D6@>QT-8>gqI|CtUA58AJtxB2wR z6%(6Qp_&wkSPnlA^U=JjrntYrl8<|;5H?XQ^&m0VmV8rjXEI^8w@^R%c%PdhQb%g? zkK*+jNO^I|-~WNQaC+aqpRY9iB;C&ctNoEn7b@2bC`(eXwzR6uX0Ns~YzaB7%dxXaEkI;n5ul%0PxT!ej9S|kq&K_^yuyC}UxE{@>E(X0PeD|=t zeE4qu-g~(>9wtNUYDPQ8IL2^hq+*a@j<>{HD9QZWxO1Y=Q-Pj8w6#Lsm;kS+g;p@d zQ0YvS(*a1=)JK-i{Ufz(&p8YG0pt7W4a!yclj~G&=uG#a1D7jmh;@>6gl+Evw>zjA!o6=7i|Hx~)aSnbi z0r;)nqXS7kSk-R$^2iD`#b1!LL~DFv00_1ChTQ8w+Uq=t$G#DRMrk#Oy1T3j)t+%# zu&0cu}<3YY+;1w#rEo?c& zvoANZhMn~ML-sSzo^t6YDLS}=G0{%P5BFJ&GFu3a1Xm+-T>eS>(sE`9C zGg~Qs;#09Qj>(7X@=#2ThP{Mf*-Xw~SfsEOGJUto)z-alnytO~ERphS*39EFhJ&-J zMb=Zgn!nPiX3%zjc252M#21$TmY6S4;iwOs#QrayV|)HNHn>=FEZk6^spS*;=FwC& z!hnlE)cbh?KmSE3$NGGKcBvM|=fV}l?0=dEz+}KIbxUb+F*~=yD7(Cf88S#4_ISR_ z7FK56WL*2;G^UA*X@K*4Z!orln=(keSwMZBSy2A~nX}>MBDi0$1=2-{DH!%Xv3S8I z-0@;d^4vjZacr?pR8^r{gz)O)WBJIZMF#A`%nAf`T;dx$x*4-ilS+L!_q3(lXbHOl zC*nQJ7=Q$ZABalOrpp1$VWfIZ(u04<+a)rTm5ZR0<(vvXdF1opaRZV<4}g5GZg=-x z8S>nKJPBh45DGMHCR^=;^;o9KQg8lw^&Ef(6V9vnq6qvrFpIuMsEV|FD^t?ttenGT z(!=x89j62_H+Of#`>^@Ng-pdOa7*} z1!UOK*v5g|ZPRBcgLAYYU1{+#TG;Y9!zf#KfrL<7=uXt+gqA{#K$6gSszCnIy(8rE z^g9sC&Q=2;xy75jz;UZM*Skum0GD}6wZLRvgQ~=pOgKle*bQ8?5=sYa;@2;dE5Fu! zdq>a|XmKh*KzoR{C^sz_PXEk#Oh0@zo96S?cYw?dPh?O6e)QA#c9KyO~#a3Kv|o`clfuO*QK?@}kMmICv4up5muiqCsjy%iQf1cgMMn zaaD`9o>iInRNbA^n}oc<427*2*YszNn69t%zD^fCijZSJtohb1v0VFTZTInfciK@Xi+Vrx-Vg7-O*ZJ;s<`nm6{)v`{lh(8o$x{$};1(hEk73E6=7Y|T|8Sm1 ziPy@ejm-tx0ixs|>(w@)-)B7QA*?fysQfM3^JI{#rCv4leZ41$-d&(V`!$K-wVapU_YgP|fJO0wq^5mn#440*bY1qEJ~5+XNI$oye8%bbmldIO z$m+uo_0sL?#BV(vg%=7o_caO!3n&Hda0=7F;W&`7fTkTgmp_vx-$%UPZ9-J9Io3GH zDOPOGv*2xaPh~{-KKR>JM)WVy+p7?}f1KJvbkKspWi!d#{mu)3W9}k-&aDfFvSiXQ zX%78#OmjC2Ko(Wq~}szIyHb6D-F*|6-?M@qCrts9)pcd&K{m zJ!1*{KI?bo52bTLl8NoRG0#DYw|ghFI5|mm;R88TOV!;_&GQ1yluVgr2$nY1TjrKi z<_NUKP>prJebDIRmo}n7iQK^LBQ1fOJtb(!8Ae73`o4$ zwT^*e*c_TLJkur?qHZ=?l4OS%tbKI}!Tr%E(LVQEJ`rv~pMH{) zjpnbsRE`N&pTGS$-E(BUtb4cG$ONk)V3MhZ8sh5BW-29bB3Nreo-bC6_ALovXdggQiMC(~V1EyVNMGip~db)6c$PW+TwvP0g zeV{910rAQ#%D63$&|bGgv;0$D8Tku(9Nj`wV68zy?Rm=b?RRV!ELZnUz-OJ!k@Zl1 z_(@H+O0^f$sbCzWtzwe1-$mcV=Ru-AyKzbvD` zjwAce;hwZ3fa9G5P}@zPUZYrZHH3ZZdO7d64?VN|+C-LR3Xr?gM1NXRPFyzg_+7EDQ|;VA@F5oDh#zlsJRd zh?5j?;bY_u)zp;BlbDNsRX)w@jMe6PBv00_Rwt+5KMp)F)jd2a=A*FSp%@aajwtyy zy$~>Sq$tje%k#SrV=!gm2Zl|rZTZ1e)?6zqKW5aTn!5{v zg#Mh|f989@z)=H7oVtUaLck|K-+T#o+*&E)REroktOqCY68SxOi_6KER+kTGQkP$f zZIjdMPv#EK>|}K7IDQg4m?Zc*fl&l~?_A$Sz64$JAsoTNsuS9>%`9%I9-tc=5x4QJ`=Qt*~sO!&RwZ>B~B+_n5 ztmc~JjYgzn_($W5cd=aVUmc&<6}u0~%zH@ZgnQW+x)yn>=uBLFq^d%B^CX#xemKT}WMM;^Ev_<7f`ogIp+CDXO(;Ub?r-S$ zIrU2ZVw0Rb=Z4fTt$uJ)aSli0=h-xGaAzaIN2c?jhHULG25T-dx8kpFj@PMrg^woLDud(q;li8_7Mp7U?sDO=5aaR}@T#SCn zfAN?2KGPO{q1xl(sMKdleA!{s66qNez>QYVi8+FWWZ>T^r0?g&*cE|>C>=*WzFLRU zqjGRAXX>We-m&V=EJ0Yw9xs(*hxv>1xdS0qc{wDxwY`?<@`rlyf}zjDF<;Z$B}t_% zibE}}zGqC!9vZyPw-$H@UPH4O%!i#~bBv^Ap>9AJ=k!fW(aru}r}!xz0KU%g-1-BB z;*n#;^Bi5ofY0Yu7&-s zmS~VxU}T_3Xj;#D6y9xhgjJ%~^e|3bZP+=Y+2cTkgaqgBOGXBLX*UY<)to_FL)T!E z$B_B}gJsh;xsS=St*0{*xnyHWQS2cq)zGy z*{XG1=KQnTQomy1$6J>^?&qYhrAodIPeC~5Q0<*->h8nmle`8Gk2Y-C26%qXm}WV~ zId{*D75shlzfU5}_n|OE)|45XkCtD1P2gSjv|CeUbw|I8jwoZAK}$kB{V^^TjWnCH zVYrUlptAd~QjW&=!fxDt&3YUz_~fV^GrG(>p5pd?vtdhLPK!N*Rq6fRtjun4sGs)G(z<`H`6&3<7?pSalGwx#r)Q&jt%0fb0Z)LN6-rA(s z7AL(UgVeT<#Y?SFSR!&YJa855B+D9P_S6w)k6&;q^$lqgEKX!g&xbsc@m%yHdW%XV zi=pMzpzT2DG29`$@4$`s_aU}hk$P=iFYc^O%=ChNF=|V@^d)fNd7i=Qqnn|1MpF*5 z)=SKl26GzuHE02oCKw)X8|9mL$lF3U>CM?-FUO?`M3D1KMizf0GovkMK z_0V0Evrs%|+_SNukn+&*%-IYz>|T5dxypq$SLPNkk>~v0%n6qlOnCj=+-`FRSSI0* zQp%U??>9|`(Uqvp<9fc=O>C(Cd;Q@3x*o%lTSrGa0MPBU6NPyMSXbznnInOFLe|||-e}65 zm5u;-xC_wsjzfJNJSRWcM9Eu#4jP(QYx!`0H__(fLr+JS1k=f88%F)5bexq+_4k6>smLG+25f3a&-KyJ;QYUp#$+_> zf17NT`mDC>9=|=shP<~GvEUh%oTGgh02^_EJAmW2w>@%SSg+_p#ciV8yM21Jm~xHx z#K@S>1p3hawU!VaBAMLz+=0YC%Sv@!T{(d9<%32g4LVh*5kRhmd3k}*N>F~~^CD8Y z)7`eDVD{Iq^o_vaX9s~bInkh}S31Wnf}AGR=jIUX}t zwNKI7;5FmwTZ`LUtCr${$q@0!o~MWt;>i1|Dy1t!O#9|Alm%O^U{TGd3_{^(WU5W#R1RDMbM^L=pj}9S zdilgBY2*@xpks{AxL>-x1VLgSyp4n(bqw_V4of485PY~Au9*Xd3z1+n;-573H-t9z zek;muz*g5iDeXrF1CQ}OclZ}p%W#rzRwO1r);g+KgtV>}>2QA#XL&V0i|ymZ*Ivy_ zZyqP*i|kSgA#-Z>;3mX-;7H&5SCjiwQAk%#+^>py%{03+<5?Ezol-gcy7MB*O3~gN zZmE55;7KLh1Tjw~wK4h1_9}+u-^GTE1HLl7oKeFjED9=2L*r%&m}E$5C&uc8$axhT z1ti`X4ie}xgYjaLUN`3)la|0i!)`f7HP`CJd$80puhBzG=25=lwMMv{36unO75fN& zxk$df#Wgz(a(72$&yo>*2-9JLr5CDS<)60a8DppONa%nQSD$<%9C@>1^+ zpUlEOY0@vMA=?!AA-0u7j*b60z&_KiEoOs83Hr-8H=dacOuaB^e z4?X*XP>gJVy?-a?dtZLB5?W){_aP*%h_u*Tlq^QC>8&vF<@=d(qoLAO31q+d97at_ z*Kv6+%(?p&M8Wc+4P6YW=de+%3h&Y!)~HYo`vQEc;}%{qe@){p5g|2x=xA8W-k`Be z`Mpi>u723#`R5naKE_vuUx_&)H3h!dgB^Q9rn{_ z60WXJyRQaP5&qt<;QdO4$o2vvfCNKQ(s+NGSwQUdM_ekLPENyin60gktwLk3$&kMX z{=#?3sjOtx>+T*ZM${4&1PDDZ4lZu8n!`&ZlV`BSkO0HwBC1GEot6v%3$n#Fs?Dk1 zNxP&E?HgmUd^PbA*wz8<9F|2ogyHyKTdnA2sCx;@^wwOstAh8sGDq&oU=bM}TM~;| z%Tev&_~m5fg{ZQs)2>q)zmbvrr?L=Ir;M8XfNC{OKuB0~HsFkBr|Gp#6?1*}$0*?^Pj?Fn`^gKS*)|45{N*%TTQj53myLn^|UPsH(nkbYvrE^pi5f{8DyO%%fR1 zQw%rjq%j&1`G{|TX&Ccjd9(jOH7Z7yP&W3)c=Y@<+DYC{5T(oaHc}aV9r2xGM^^OqDfNE=9k-_PN$KZ3RxCA~?91UW5r>R~G($_HQ zDO?^bG&dx>#MF@D>PGsdXNbl$M`mmMb?y7*lTO^SVZFPFv(4*8p|eCp zoSjcnIig!@2fRpLnYM6dLGWH})uma%U1BFlyFI%U#sx`@!73e8#xX}m#LnmD^Ia>_ z9*y8{(0Psek~=R8%8DCK(_OQicRL`>nr#8!yO;C}*-(G_j3AFTo?vX;fNQ zbW~x3SbQvQQN6AH!l;4CDQNFU|501ZE5;@;lLEsFOF}u$yOKbI`qr&ysf;Yr!is>Z4k*lMEO{uP1Wj`!1H;#o;uFUl|wRElkt`Nh` zPhMuaWXx3ls(;r8le5{va&d1ES>_D4LtINa(Nrn7TF1#4h%Lw=R`;_{V;G1JxJjytah(?LrxuSYAt zee?NJ*SW1eK;`&S@VWdKx*p?eIVDbhDA6d>|G=n3C9!~_=@TzEQyKK@KIFsA+dC8E zEdul~q}50cC}2q#hY0T>#qN^b*xxVHdCfab&@6V4%ft;=-fzgj$T+EUjJqh5gH#$O zSJ`}bxG6Cgm!cRjNuoT>8~l#g4Q?xvR3azzF1|26jd5bE4sV0VIwv(qw7QNjW9li> z=-rG@HSfkZNu@jr>z9E*mF{q__Po=Z$LytAmGVdaxrIFcYoT`@7g@WvnweNO5t)Kf zsBT_;`JR_g5iPCxAdz!!J-4naE5fRtU(_ya>qs*WY&CR#xG4MH(WVTM*KWo4 z-eiR-QSLDs(HFH}fey&m{x_6m@+dix-tYH;3s7;(l#t)O8jaJxd+8-nq6y$qc~&og zD%@h>^{_)bPv1EQ2M4|7CDKnf>hXl@Ur5z2G*lMw#NV|TDSpRueQ#vU5YBi$0$-UE*)u)S3 zzExn+o@_cUotQ`EY>u>zqCDwzo^iZW$5~Zs8DuH?Lf7W|`23=@%~=OETs@vCUj9+b zS?*;7WhM1>U0L*AJ?hax#jk8L!$QPlyK5PEq3w$p5wI|0uS$j$Mh}#ezDa~#9A-^!tabH%U#5R%Da>% z97d0HKUO1k1`X0!6J4?V5PTDH{?Xf7_|hALQ!LG7K%2s=daFXXl?m{pD1)zIvFsgscH&+L~mppb#wUjF`&Y5@iu zazcf8t%_LcjJ@3<*j^i@*r$)&_DbruLFIp1XeTI@J=l2-WP}kkZ|w$6Kw^HL^WKyt zTpv>?lt0@6keMmYG70xARG@zi#`DS1ra57SKWF={!A$EfBuVbO9epl1UYR=6Ld0%^ zFY;_+>g6&5L;U9g)ot(Iv3Q!6ofuz1hKV^Y3{HZ2KXd9{M>BZO6`Pp8=&c_D3uf+(9PoYN;j`eE@9g7GEG0+Ukt8 zp5`%Q!nrmd_cNRV3%=Nyj;d-qv=+O*1$bsid>82rz&R(Z#?rm7cPzNkNPrhUP4xy0 zcg?x=WPW*_%I&xf#&){e$iYVo5%NfqmQd`5Ai<^i3Tb7mufL45OzdlLRD+jRdAxKZ zFgQ4zGn+9b_9@zUaG*{dk8xlf_S%dkvSGhulZL0wo!RJNB7c!bxGoAyLh@WMMCdCs zY@7N+;<9Q&SHO@)iO`Z>zWq?vduZh-$0^C zsqhxHw|ojk3`OjzI%8=7??z`J%FdLIEj30}ndb(ByVpWdS*r}G=Py`bp^t;=TY--` z?Z`7Yp4mVFN@AbQv1uNI37pbU8?Anh0{}{IPB+9bwr!9stdgIsk79o2`li!m+9;8J zKdd;q^RE24>tG04-&Bo-@aKRH536neihvw#xo(U?tR7=W*_yK_=%{v8>>C>~=DXvf zh;Nw^%6A9LLh40|7M-z!lH|vDI7u~stX1>GNc>Yt_>zs}VD`Fq1}UQBmh0Fw1cJ$0 za&2Jd=NV$?i!i7%*b68{&%N4vBOl8WIKdi8cXtL9~opl=~x*L;l^qxgh0G z$@gnO>+?Y;OPqF*-bbZ-oof-7VEIWKl8Y)^!{cLO!Z)(PB{@KzHhvH785&n(B-Kzp z+3o*A@Y#y8kCW;}y_D)Kk7n}`%pITdV}G-lN=JpXJgcPMq`_(R>Cxg&O`*8F{z|g} zp14$x{mjG&J=+Xfe+VHa_Yo5OS8Z&`SBD>2@)Z9rKK*`>k$sR~MWS$C3+LK}03j*; znX=7Zf23px=K)#T|Kf~ zICzmtL++O{!pd#E;yYEi!xH*}`PbLq@enPEOMNsDNcvPdWOSz*+90#?C^Szuvb=82 zLc>N!@Sl4yeE!*95WA9FP|7cBC#_8VPXmYqM;zvh!HKd0hCgak|1K!^r^e1jX~$I; zg71RoLY~jqMutlZ6r%Y^FgzJnIa9=*G53{!x-0vn;F3auq;5&Jq2r*`+Y)8g)Y?xJ z?l3Mo)dqB_aYA$1qttQVUgE5h(T2=fmndc3tCW{h?=pN*D5e(fm1_8J({)K6btWu{ z))uK+DuwZy53@II6I}#u@A)VBsW)g03zgD$n&L-B&H>GhWfEl?97eO7FFmExpRgvXH zG`XI;+@-E_VQk-At?CGto-i33lC_2UhryY3kHqJ?d3fOFupouIr_Db5z1cM(O{_0{E+aANdVXi@o<@rtskCy)bY{ zC$>7XxC>X_vE7rTgecECyCW_N(IPo)gpcDL3O+38lUj$Mykpq-_X#CK$_@2}Czi%x z8HHY%F!FYr=yLp>R2zxm78%vCOGt>c(#fvWJ=$@VziEwyp^e zPdr4Yio(WB#`lXq&&cY-nRXq==v$1}y=&1@v1C?cHZ!k{E(BG7MVrMD+SHB>s9Eiq z@jo;~!tbI@WTdFOhtw6s+x(=nbtBra^c~I&i2)|9r&FaV=Y;tO{{Mx4SsY@~| z9BoP(5oylw$%3wUBMEH1j2YlyrVXNhK(R-7zkMOHGur(9cJWeW@OvY^-_qT2-^0w2 z{FfDf%2VM|hiL2fW+g5$3-*&NVyHKwgEK!?gPdS$$=-xK*ZW*zJ<|1iFX7oJh3SU2 zn?~4rmd$K8RvIfz55xKNSVxeV_!!@`%u)`Mns9Z$xDH?TcZgE0kA+s9=jCHMp^?Wb zAIg&~dMm(f4ZgZ&Ybo&62`*q@bh^}gWjpZuw;M&k6kES^Q`!7YzA-VatY*X=H+ds^ zFt5%XK?Ggj5x>Lk;fr5K_38LgnAeh-3U%#g&3!hT{(Yq_erksE9n03y4oPz~eXcoW z5(&3qkLKy8^d90DqCdw8T@Yw4BgT(t5QL|?ZCLq}g8q4kM;I}qGAS32jiaTJ;rwAR zwsWR(&PbLNkD{e`rCzVPJS-&2QaKepxhsUN=kHC1=mn^S>yc=oA4cWZpl*;sQisFO z%7Zr|C>8H7V<1C(HhU#}HVY(lj|~IY!u3d%jzZNfryNQ7-O%y+(RX#S39)B0twdT{ z5O!qTp2CLoKJOBV_!||FM*0|7LhGf?sv%4^fZ?k{Wu!8uz90pe3|D{;PRBG~=n zKJtd1#Yt!d!v)?Z7FE%Q2y(ORGcP_E&GGJq*oQvmbZ&j(nvsoZxycV$h;dZm;cc_!!?N{KUC+RP-k;<^L6Up^faXK3Z*b@ZR$SetxL>W;jl09?-1l=@djjxUo z<9TfcrZ}s-C4B=2Csx-=IL?rf36EPA2l^K+mY=Jq4&hVcNEs2cQ>XdznB8O=^R*{U zM`W@e3*mi;k41swM}d7$khtg5vw1Fk{qw82Ue=FVexzQ8zVl342w2H7$1%3qn#GzV zl1)$px{MDaNGdA@TAz)hWItC0ydZEc91 zQR@wU3H9i_fpcvw<9SCiRz^0WgtU_P;Ul=+x+%#%Fp^-?LE&4QP$3=pu+(44wff&G zoZp#x-CE837(kNA*0bwJnmZegv6$Ru<#C#@n^V@e;2iIvU0)xi8xQU&G5@5ZilDg` zGyzjy8;A){WZ$<)=u#i*WooNVk{SM2tbNE+5s7~2rI{y(VIchECdb>r{yrn;IFFo^@FU@tDMN!1$)r4i5Scq+sDty` zmGNWAu5E+Y4&|Xdvmz-scw;!>-(riTbh@r*v|p2i->kp*{6f&?jvy@s)l?Hb7?tWU z+{b#FDL&ak{h2-9&|McHmch(Cqr7VNeV>MS=e8q-gSmN{>dbR=;xrZs+K$P>E+m_@ zCv{J0kg8E&uUczSTDwXsFlxIFDdSME;L&X1J;eqaRf~6?%X&F=T9(4G_!CkuP|R{v zUKhwxOhQy$ojfmZ2@4_%3QfIPYh-E?3$9-UBxGVo)r{?JDQ)Bpi(Q8uB(@tx3Q=o5 zoUeqfve#uMYS)ouf(iBP4vJpIe$J;~wF_KcOHq7^X)p^YQzKEuN1k=1fh)avc9_=( zx8k%(WcpG#la}1~D0ZhYJRwIfEuo5ci0+3u{RhUUGTD@%AmN9Dg*PlGExn~nZ5ep+ z+BWnFt8&(K-D(D+?i2ZnzGSVQ9&0s**Tb^w5Q%ItdKli<$QcH1JvXWi?#ws_EHM^` zQX)2-AD@?h=#h%IyGhHJXsmBddR+qBw0qYwmGS9NwF?q^^S!m;evP$EIO0FxTq7;q z`_+zzgjZda0@9riE@P&NjugAki@H9PJC6;LcY&=|=t3CAgppYIRJvZNajwffp~yc* z_f3_CWatOAa7OD*O`@#Ib97SW9siu2l1+)0P1Fw&j;$s524D4F47$9Q5Q^p9HCC_7 z>!)T7n~8rE@zUaf4W!}e;xTLOq#R?Pm|YHybN_xgkuM1DhFITXS-CE2e@C7Us7CrI zGCgVA)WodK%Jx_J*@zC9uf}0>ayIA_pdjJqFdrcaO+VQI~FlG@U|1Yd*_j1LBQv4kDeF}{6sfk(!=O|4DTNI zrF(w0!EY3f8#ELv6A^D~U5YsF|h2PPPgD#u)l4a1^G$aAmYP2)5}t;AwYW^FzTuZJ~BhQe&O>s79P z^kbLBY}*W>9)(}DEjqa>E?!1uDzqUsO__-6ou~{QpoF_(~U|K*Nq&slqS%8#{ z*n|X0m3+CZPfU#;X%y2_$0RF2sM|Hj_mcw2Cs~gW(YDVEGQ4@xT;?o?ya@;hwmT3> zbJdE}M?vbMC6ME=gIuFV&{Gsf8rz!zt_^TYj;GUY_i>yy3sTh7)b3#nrS<+nAdZem z^a+BWwImGp{Qige_FPv%Q?E}*FQ3}sbqAamcM=hQgr7pVtv`xfcPNqAevq-jY<`&h z5i+6d5p{3s^GQ2rN{sgFhlQ^5!x7Xd-<#cpTCqj40QXfgD!J`beM~N#uVN+PpK7k# z+Ae9cM+2N;i`9$~iPP5>g&NYKBa^99YS0}LP#ms7$ol7)O>j4>EwaVZs-f;tRURiB|SE=^2`B^Y>fw*VD& zV}?;z(CGEq-Wm7i_5AXEyaM;evt@h-3M3O=I5Jbnw{V3{Gh^e&h98L;YH`b32W=ux z2*h`TP8rCQ=Qjn;RFzdFid3&scU$KAYu}1UD^~`2N8IE}*uGYvO1VEr=4UNOw-@UJ zEoPk5qB+0xm&A^KrzMNWC737k8c#7XHYNh~Lekjv@%V4-NeW(L zg4of~L9S&v2KC}h;INI;2by;RRud#Z$hGCfG2D%g9yXsS$WILkK6nBfYveW{G*MbW zDk;195Rp5OEABv@mZ%;;$!6)`0+^{0U{5G=hN1~v3w8nNYhW~~=+~kXkqi}^=WFNc33g49`rd*jCo25}`NgPB_sD(Y`v~oT?^g3Go4wKGZo`m(Y&nOP2J_0G z%F^55#4x#%Qilft{u)cX4xW$vwwq^X)oV^j?-ZLP99u6J1_z5{jQ?8b;Qh}3V&ig= z;yLP{4W%NRQz`kgwOr4mxOP-1Uth?I9Wj~v_u3S81JGnzhBercs=U}^8d@-t~84c-uU9T7L>H_|Cx zz$b@Bgc5b@J}%N=_2Z@bFrRp>+4Y#^rX!i7UVkWsEAsmM!0PKbP?=kRjEQQm8<)8j zkGfJT8=I-zV<-`kX4i=a?Q|QQSC=MiHOi;xx0hLFJ8$WPPNx={Hb`(GBs;P@&#R5n36aY?Ye8A`sF&$Xx zSn9$7J_BqT^-}NhbW}e%mxECL55HKZvKl~j9!1RCT({BC(BK4;(=5jTb~6Ax08XRO zpbd>aD2VU^-q2LGXY6~2l+!(HTnuC~aqPwF9{)=LDj$mS+5XKjzoJ~(WTq4ak21dR zYIy^4Z-e0=70T$lFYQ=TTPuy8$2K`56vJQv`)$j9s3QMDV!i7iqb|cJv~af;fs%Xe;TWU%H)o%Ge!-3U z=Zefceu}@+>@$KVMSw`vQIA2=WuvODp8qLvX7I5lh=awLf01o;qpApuz11hk7{{PYoW<40eE^q+(Hi+Yv(yUJF@NLDZE#lmOOK0b?J*098Er zZs(;ZB*g<#8u}#3AoEceRa3of#P$)Dwd2Wg;Yu^zyAyW{&|UKg`2QO4cY@0q%D_69 zqj5=vBl~JdoETD@KwnvJXdw8`8bVGV;nPuFvkSw)$tC9T!jea<;uXFAZ@&YN!Yuat zx(It;HCt^;(eNadS@D2} z=8_z%>*|*dC*y{Ts~O`JBbw1v-5IK}EpGS&*CMN8w$k5k2EFM!TjmsyPk4z=wGLG? zs${EMm7<6E>{~X?`!C`~A(6b+!&_~%$2y@m2?j4pr_ zE_A`_xoTB-$s73~@;Y=T-4E{eHY9tQa~QGtZCmz`LFBGqxz;O8O-#UR?|;@KeHkz~ z$Vg?i@|ir&+bg_(AXq+k4P*Q3OubU+2P;xHGVx)?JSLw%I zZnbE$m_%XH`dL#m!K@x3N@^zSRLA3qnERTZ<)#AGoD8e%<<-pLv0H&(2rIk>yzhuI zlp{h%bgLmMr+oWIa*N5UTliPdxZT z+MV`!CrnbNVhWdYWj_`jW1=rUcH;PjBUKfDS69iYiradXc(6+M^@={}??Th}-*FxE z$HRB5bZj>XZ;tiN*|i7-y$$~yc;JV#@?ox7V^`@6G_i0lz=PN>$s=tGDUC_bMh{wRRME3*||fS{npmOo!g?m!ItamV#?KF?y zYA5=4Ra#w~LHD1=AW)P;lk;u!rQm7fNguXlBIwT3Mw0V3RHn((|6$^ZZ9*v9?)y}A z^M&J$55F&~U@p{ms@vHQv);jT)pH`S`P?l`_MlIdNX*M_dn8Vw`qbw}P*ntZ3$qXL zdN8$7;_)bLn2gIho(wUd8;CzGwn8SH|D@m41%m;}4tS(X8_S4vM;pw1x57V&C-tKA z{PvetZTp(k?XROIE~L(cbC5X{qe*eRNp(x4Lwff@n<$2CL=}G0Ivkm`Pp#RDg}dr5 zgw{C&bRiep>6T{`?bLWZX3W4zZNs|h88oHc#M&a3x>#8ENx0HIhEl&)WZi1uF{iOP zG}8^zlEbaUm}g@(Stbx`_R8u${AV;9J~&p20*bQs`)Zea2+(0YNFb3N$iWCq8BQVq zolG;E@gbCEmAUdS0Gx|IlR^Paw7EkY2IJ=8fl6Uc_Ct;Yfg$iKw%2angWrGq_Yc;XvUmY9nwjMn9}x*H_Zwn-S?fW;2? zMXHK2roCrr6cZml?2{cPhL?zF&N6H*e4%ila0PQ5(R)TFmhnW|zs!Z5Os~qk!4aOm z4Jgp9-^ZD_g0n#A>#8s0Bxe=aZR0n4`)c>KhaIPm7yIaF3)8jo+>LoF;{4Xe?MxrA zV^OYB?Ezp3hy&=^nd0;{UoyIZco%^{zh}@VZsxg3@?75N5hsPdIS-(@N!Bp!B1U%n ze%ktLm}lrRN~;f){&`+IQT*B?0D6j)Ww;j(Q}9QUZ64qzROO}8-V@zyvRe`V2eg0K?RPfRakkBUHAP?P1yH~0rX!$u7HP8>WJwz@rLm} zs)0;%ME#OY*}tU|DY~-#w53xs=l4xncS9bRVQRxN;%PcZv--MiaJ@54@kE+wD9x5? zc$dQ&uC@K}6dRs*z?VA3o;zmYa$?xIbz5bl#jjl#nK!Flwgm~Ll0McG#^OO1tFg@D zBMyu>_THS;?hqW?iWaU8t_)YW4=9>^7d;2_`<+x%G{Zvz%M*p&eCz9vY3iJ=!k7nQ z%1;B-U^2GE@YEqK8v0%|qG!b9J5_A+#Q=qXi|>^Y3had%D6pjAq7Ymj3N*B3mgaTw zf8N!9h6SQimy~tj*Vh#NV}=P|CXl12a>esYw0}hSihc&{7z`i^9^r-;xeml~<^RDE zHMlHA*8e|C%(1|V}FyR$K-aK%`w{%zZV96vBrxa_y3ANTq3!{(YQS5lPtHlW@L?zqu(=Xh)qW-*e1noL zypA^UZl0#oa8kcMoIif0jF$0d{kC}2-4}~NLzPmAKZ6rp&ZktkV8qP4h>aol*oe+t zs+R6PQ*+$iCy@=YU8+1^<<`w}F6CDT&k{b18~sazh|@T)&%2VbkrXpY0n?Mpm zP&>r3*>|w$Gf)-6QvttLD5E!c#IfJ>o;=7Ja%E-{h6;w4CmJDyG}iXag*V!b9C}E< zz&ysC>=cSgVLU$?N0m=?Fo z-iT=3P);xE0ToC;PACKrZ3;FI-y6}?Zwez~OE@J-m`~cxH#-_tv6)XwNX7zlxzJ!* z#OiJd4aHlB(MzhN(SMeUcf04g$Jg7>{}5m0bL0YCrpoQ|nt~o=CjEbo)nB$pg?5Ij zqT+*yVRt@vbIlZyXa^278K$| z&hsAb{e1db?Ki&CF3{E)j)#3qcBg|Cd$6>c`dKj~Cv|!i64_jxX}0VfYG@f|pEv0v zzFhZbJ`MCw?6U8bM|x1|`8>&<_2jb!=-9~$ItM*%_>zjkDEn1wF2U_6c2WZ34a zc_(bo9aqTyg`fR!c<_Y6z$}uHYKJM>_W*N2xK+Z_TJRO<@+jX^*qn_0( zEl)0B^FKYm66!Z-8ZmW+v^d%;JX!-GUOiO9hfbkpwJdh@hmi@$yBE6M;XSm>+QXVH z;t1p@ln0p&<|lvawz_kt{o2WrdEUH)+8OIv0ZP4B@fsF$Ft{$*X<4tXmDJ(x;KkU8 z*h$WuyYUV%^^U;9ign|8G-&%W`=w#i;nY1#{`e+iv1cTUG- zfG6!gMVBn(O@0oo-V5v;;__Eqa?D0UFkdgf)< zmbSW>RU>2^%gMy{`Vu0Y6U%t*Gz#}aEGRFNY za#vLMNLSw8Bj1DW^a~ZmrOD1TY6w-68~Nfc>Ix}XnvwcY?EG7}&)ny|fY6E2(Sjlt zMOA<8mc)eU(mHpi`LSuY(FfsYi3_nuFpHX%@L(tAj?a6u03CoivlrN?RNXBZGp z>BBN30pH$5vcD>;0pQ?Mm!`n3)*f#&7N2giD~XM4z=9Wq{|X+>`QQ9i(3?tE3R`43 z*au?!D(icr_a_-&gBli3tCzl39X&Is4ti$d$*SpLiNPhbKMt0*xkTMcfBU`&PaeEZ zLv8fo;NS_Fcpb~W_9oJ~lQ5Tz<^ySkem;`)ce4-I7IoqLS^6B?#1-r8>Ufo;Cc*m? zBoXw&BKphC=N~ZWqYlc@`VxlGQOn3Yn9rr}LqvVCdUPSaYx80s8qul#gD3O09?W3FiOvdghG^*?_ip$7{($GVm^F## z&mq107=B(q*|g>j;RpI4hVVt5{~a8J;y)h10Smr$1OepRXTEx%IWfV#AZBeG( z4WDQM#coFYnYYwVR`B;Q;~K|er_NXgf$cyzks2Hzqx_G*hzOD1veU?ew$-wmRHv)M+$4B8}B^5HPBd@>|0) zS1u;_`C7P7X6g^47ouLqJK|`iDX9>ukp`Vcxsk#;<#1o%6{`NgzG72oa|q|C`&VZn z{o6I9!U!3m|81Kyi5?4gv=hKzvm8gRCDj?hq@~cfPsW?A8N*t@0*8ho5mO-BM-^IC zS`-HKQ>Ce8T>es+Km4`i?j>~rsxJN0wJgv|tjPQB@W~RAojorm@+2mhhRy@~pH$xJ zB!9N-prql-2ajgL=4zSfV$j6|I!o0_wSXF9~g zAfLXLRDL9_FxHAZCTDLIUj_nrHL*7^VIedIXjUqL$H@_mE+zAx={(Ol7zIrHSI7;>-Lgtl7(b+$x77{G-h7~*bIy!e(izQbd3 z{`#*ZX(VY#!dq`MGZGjE==}JWXpZwQoOamMI&+S?x?H(r3r4)!rFLgM0*3824Sm>) z&3M6lI%MoEBtE4ES;$I*qakLf7ZHaaE%6?+e(F~PFZv8sOI z!|=<;dnWfIMst&;=+FMM)sX5BDHm=k`q-SZb}U>3LSo@-5)2?Es+5TC(EiX?BXF`CT1=r016#8kES(=(5vmbF zB+C0m79z44zbp1ClR`fIE`FL~oD_Ps=B~$(!%^4vd-P3eXQ|%{dD@hSS2z&FH}u@TE`Yb?LVQd-7HW9tVMDbO2$ycllk5 z_yLz!0)q1CZV{1_&dLOj-`hINF$KvU3b+gZJjQ^BTpc_isq6F1tSuTyW7KwJVqY@( zdj(L=xPKzj#nBJM%QQR%a3ifv-QHJe)xidb$96d zewDJh<)Kc5&Bs$uu|xF&M!xB-84o7!Uvf)#t17B!Piv2}<6_Zd4-tY3h#7!g1Xu_u z$&(e-|J!R4$mWRy+h+_lu5jMOUyLOG__*bYOtxHpl80Eq~jjSy(;HXv40DF7-#RgrOv#v&Hs5J|J8JF zDS{{{NvsR2ia){$J+{w4v#3Dprzu*%)jL0j?ERTZdu{n{_LTN85|xoZ@SBbVI5z{9 zX)QTP3Yz}|&k79AfbL5@O#Ot|GyWE}F{-a{UmP}=BPHMWQ|mAnKr$=@MlX*l5t zQc%`%kH?9uND+L$Dw{CdadaiAEroq96Rxs2~OVH z$@!2a6Vs<&GEm(heH>5UOVPGwHLk`Bb4ai>ac#GLHC>rV}aqvvG~QbxcU?ig#sS^`F`e ziD1`wi-Jz`u*0?7&gfESFjhP_a(i;G{q6MHL!(to!>TO@D#&QSo|9QtAe%*IG^k2&h+Xw)z`xsQ04V<565$;y0q9jYjBx9Y zidvyv!_DbpM_7r@#7Kn(Dt#jK|139yD6oiv87)lE5&?TVTKU{Q?Okq#(Mq6yf-U(o zyWG0|d2)5^xsR`i|JBZ{MiHay-=~7oc3Rv|TEwgZ*WIbAIcB7!`W$hD>7PM}gONI} zinuvjZb7N%_W4;YaKpkIZ;+iFvk#BrqUA_g6EdAJ9u6Y2J)Ax_BEY*mUeZh~I+*8V z?QyS!y3ipQeICB zi=srMUD7V9seqSy00#%#f1BCo6D-(|66HZA<(9Azby%-dW8;9@N_mRvmf+?H$U@9OCXOe80Uu+Zep#442vl%WiG^#{h8vbg5%&i zD_V4O@?)4(qqSS(@*O)(p>g_T{2Dsf8=RVkk1|V z;08#A`n|M0qUl-E$sO`2+q>yiwg}IXIrv))G9pL_48DDI-X7^?bnmY|%IqxRxro&@ zT*mQOTsHA}Aqw|*yO(IQ3p*r>y6GNbLzi$C|HvYMf>$+muT8KVG621;tlsB(1CvF8 zRO$}h`SFClRYEy5vE~{`xG`daDGnH#FdxC@<^LJD-DqLMswfDz$@|1+j1wvt85g00 zPJ%wo1V6EtVDce_u)(6g>F+%`U)2luQ%bw zbJOv)CG27@g~qdFY2z&Nj;juvB8qNvY=&Nn>Ep8}LmvrWZuxn#+rP3Tl)~wl>3a#& z=T)q${T10y<+pQWrg_U8@OLhM{j-29f1miCkFf<)roWED+{ASa1KL<}$0(Q7{ytPL zSiroku5mv;)+g%Tg0HiVQM8}c{VW4(-aYkc9=glF+y@~8WrC&zJy+0RKavCPfPcsM zzmottILMF_Uwm4_L~AUQa_}p18nbso4-JW8;`|$9N7NA_8o8MXIrr1C<6;A(l%lt4 zPGgZr0mE5oP_vuqF;J>4+Ug>k_e!&(i}^{$K+HJc_dW;OPK!zRkMNGiuuY-Nl>dY|WLNjP=v@(;iN|pdJ58(`s~KgsAfG+^8^?&S_0u zz*GGXx$zzdSBL4yhtpBmKP4lW0=*=A=TOPAKnFHPz;F67c(?kd*sD6$&SJ;K*XXv3 z%5A>m_DyL$3+T@<2DKd!0tQuVeTtC8JhRIWCvl}PDGqv{p~O(z z%COZ?(v;|I)|;5_H$(GK^)}m^F+Q12Q%;2bwBoy!p?4Xl^ba1{jtTNKx|KVo5?lCG zgha%uwgz}53l}2jA4vWc|E|JJ8D;+!{~NHuK=8JQ+9^;$5-{p6JCZKt`QRN6CNyd= zdxLu@>1E@CO7Zl)N%qVAIbir;Rr?W_q>{3KvgkE9O{6l@`B;Nc)6Ax6FoyqeI-c;m zjLX$PViWmc?mo!LeIY#QaU0z_)5FW5Jtnznul~_Wa>D-Vc)dR1iZu)x)gD+{7$QnZ z#wCK_&`S1<5WMjw^W&+{&mXItO!WKkrKnygcc?~)?q$wnC-Jd92^jRA@IS1IwO4tQ znQySWpPJ2Q9*k9_o$6v6Bfw}=Xakud)AkjmXl=DgX!FJA`q(%F80R)$ujfS+W^_+zAO$w{ON2X_~Qj! z<%_59WF#;jv`^jDe5#KSeV+!!(iv6;*c7r9j(@eAIG8*sa#<0`x+lzm&7usaKU2J> zXiT>wX8d2Xkyq3haC@J5AXOk0%C)+-!ecIZE%OvDnvgJk>|5gvd?_s*x0rCROGDLH z!;azk6PhJ-4F^qR=m>}z|5dF?Zfs*#K?J=flNf2Jq%@_?VUMUNc;aQv6qjUb8gA8; z!mA7|UrV(a&(nG9DWk3cYdsx$rxF{)5MN=mr-ny((^$oIi8Q%@&m`wl==>n4+PFVK zgLP4E?d~0hz@OcUt}tIln-bw00y*`* zHgzg&ArsERX(s{0GS!Se%F7c*@ti&`&V1DIa#ET@EzS>fm3<5ID8}}5G9-aDx$9r# zjUp#FMjG+U-kxXha8NIWC@IK0^XG)LWF{ES4p&kq`tlWWLbf-hnC&8-J}@tR_^+WG zj$P`pX9_J!6g7h*6gL;JE0d{7eE#I#qC>U9Gthu~#m~FBhHekdG?+dFFFVKMCI`Q- z{SxEuHJ!>NJiKZ6A)>CfM6oct>@|pi-svP77BeZcPsxJKX|Jq?<`VmH?`dG5xoWa`T@ z4uDRZPjRPhemnhR4>u-Wq7J(^S31_#Lf0vGQ}FXDY2TSi*UL`dx3ZtO$fdWIqO*q} zg%s#-!M1f$TG|BdAH`l3xSfBZ+2je$TLwazNZ@MQM*=$a@iTIlrLInoW|9jbFXtav z+PxU8-#hV!AGy^vwH@NYu);O4j-pty*ElIESyi9tL%aBR_bU23TuV}*9qymbp9Y=Z zMN39%GQR)rvH_1W@Vfv{IO|+ssRI0J>&a&E@)5xS)Rc186t4R?9t+3SxJkX?74I*z zF9rIdO|F(lz9%F3eg!pcqK~`1+bV}+`0V@c8K4N(RjXm9aJGhg(>nbD*J|%JKb7O$ zAG&V9uBjvS3*X8F-hxI^noL;T>m^TqYeD{SxgkFJ?zo`J>9^8O80tkvN7hUK7yN;5 zMjc>QH>Zh^M+pe0l4FqfY23P#He6WOWTT2xiiZvyQYqi(Q(rXiQ5+pT0a*F2gQ?YB z+DJkT_}zeVha3|Wx}FC$*c1<~j0cVKoK`jG!c}SCI|I#bC>6+TZOv1-SE`O%1wBxaX z3RRLf5ltG{-W6tUQnsWL5X+^n{$F;&6Fy_|FrWg6-Cl=xt#2_aWHAB0{D zD8@8MRSk2J&)sX0($HwLv-q|-M~W1jV5&mJ;!Z$dt>zE6k+ddEdrwyf z`Nh7-lMh#?j7y(g=|Nl8*LQ!OsJP>rRN*y&zhbiCanK&5%cm4g&^on;A~0IV5Xn^9 z^h%PuBGE$&ox3+anfcucJIBoGSlG*6!?zvGs%aE134K&`-HzjEC!xVQPi%Ut$>y(y zH~(eP0c|Hs!T3df$$p-@=}#Zk%Cbl2@mzEo`ZUDbj@*3JkI%FT&1IY0ogeyX#J;|M zNF;km_9e^dlR>9`^nc}2i0BxwoU~LnmJww~nP;bF@LW3il|GHQNLcH0zr>G$h^0Ww z!4$DnVPmJs72 zXN{o?j?iY)dYR{K&^rV#@j~CA1&_sk17owE@gCK5Hj1@>@-UYe_z_`)%d3y0_u*`u z^8$CFC`q^HeBOO(VC>6b9aZLed^L}yj>=YU*rdk@x84xB6%i&DhpRui-Vqqddm!A5 zH7-xpPnNOms%nF_DeQMm6uRBHQkyD67eU$j=Zsu_Vd}NJ)#vJ__hnx=ggktoaQ|O4 zi4f5U>K=!vvbXRYYF|$8pFI(j#I{U^Oqj9=Z_W1L2pE~7tnl8RWX-=zUouAGk*WTH2Hpj{Mr|E{ir4V zaN0JCz;m3gchPw#($4pX8i83s+Y_lSywQasJY9Nzbz{BLHSSJOo5|*z%~ePR6(l1J zT{f=Cu4-2pR|gJ!{}#5woWmOswInNF@>Er_G+@ip;o*jxXne84zYG=ajtyEpK0>Gy|skN4k=*o&J7+?#Yfg{qHdfWN_Yu5E@UuC94d9 z2c=I$c<9v}*BvwZ1x_f=Rx9t5IAnChvTEs!eq8f;3H%f6~@D z(LH+Y^vz*eCQaO+<@<$<8^r!KQQ&oF&^BiE?bn%mWV}j$9$7Pk&6~$|bDv7b^GU9^ zmaX3F&(1};pF?#@V@(X6w`zP+KQ{^1S$roqH^}3zVfgG>IxJ)o-Pwd8CZaSEbNmz2 zk0)xtAxY-p%RW9K1f4ZSl##AhhAW`>Wa1V>%s0!O{ydPNz70C&Kw-sFs_y<`Y2iubDV7e>KA+jU*ShDCF5 zANeP^eUdwzVb<+;a3nwA`9E2DN;@Qd*Z4w+&23`RZ>bBN!Hm>aWF z-y{J`nznpXUu9>RC8RY;r;T)0xYHCCvqs^Bny)Ty{4vzeQ=QpP_{6>zHzsQ-24vay z%>!KCTmE~XgHRP(v_Vd#bREv$BFn?JhQB&z`H*&+ExW77$gaR|YKHUfc*cXxy-Hrr z{Ew`ag9kBZXQ;ta#^JqBv$@%ax@5#g1crV~oEeG@iT&g6Ii(^CSJ7w?y)$j-u>0tJ zu_XIhqtD5Ua=P!d#pK{~dKx%CJHej+7&Rs(MiOEmCzMw} zOAJx@D+bG9-6q_6s~?OVqE9+2ObB2ltFw-pAA`XkVT@siUB%f^*^uQX2B+v!iz{d& ziODWQAgu^hy%m9fpsPet&^IvLQSS~+lX$X4yq1V`oeq)som*qM{s5%L?#_l3O~ZN( z$5P(4?PVJCqlfvxywDuw7DcKwb|W}NyJYz(MjN-4(C=F}rXGx@-We7TO zbR%0xVtnhLqD5Au3mygz9pxd8#V7sM2weS_CHy?=C%=>;gdEFmvBP`vMK(MK46vYZ z_H{{P>BX$b-_8<=!W*2(O+s?O4s|b(=RR}B;5#ryA7f;i$pKpJ4^S-%J@X5=MX0ma z(H)J1eG+vdqPZc;BGb&1e}2{B|5yM^mOD5r1|ozRY5yJWIS&gSV&h?$Q{G!Md-Gg}H|r zCu9;a!%|bN+0=+?uPrz^O+O@xuzy2(V1%1lebEi`uSDy>WKNKqJavO@2U0i0V6|8w z5Y42ndRdLlkU_wlP5bANh0PKtQxjgT>`-H=P_VDFO+%)lW-Tj2fIH2~i-_b6#gQv-{T|}I&CUsx z`T5@-5PAor)Ri+O0zC=QCn}LKiccSH)`=MI?MD>xax!4_rIW7aIr8oO`ct?Mv9m^U zgs2(CC6sHP1YF(9OAelNq4OzNxEAn$!h7$ULBck%EQ+|6GA8|4@$Gt=5}gHF^f;V% zI8IHRIT3n|zrF5$vTfgauiNQCOeiQ>LAgVBY-eAqX~CEiBrnRLyU_Lte~NqQ@LR@L zb0k^;TN{2YsryxyTE9|ZxE1JwfhU(DU52)DBnXj=;_LfeJzFyqwXwtd&7tTggpc`= zCZBN6BSy*MuWavtK#6l`J*lrUR1Hnqt;B`@i%l2VLW#Xm8*CHZ@J1UBuDS7Y)k?_Cr6 z^A&a3np|5V#+cx+5RmDClDWNACa9@O^I{o2O_mm2QC+KcaLQEvc?XXGNKe5qneTUi zcv=vJC9?p`0a6YGGK+n`O5Iag^rQg1JQ*+-(!T+idIQX%&Ib^oiCuS9j6l-l@7)2QFFgIy;Sa z%c1G^gz4*&qi%WwpK;eKj4hzdy@HcH41%ml+*x}o&};3jbywqetgZ>%)w@Eg#!M|7 zQi;aFoCxqA5}1f<(moTj&6$4(JBu_1{*G7Bl3rt}1#p|l#0Trw&fooZAsSwi=n;c(aiuT5J7OE*yo-c`XHth{ zW{v!S@mpeP-=BX_*;H z1>`R>Nex+!T^X8{+CQs|*}9)O&H6mrm^Vu>6 zfo)21K|!!K6`gNeH0U2Fq4j$$nGA``usz{Y-XlHe%ilUbqc`v3o18z7xr<^um|fUc zgc#&a@g*tNvoOaLM-$X6%D?*oUx?05L+0Nj$-7xjeCd&7$`~KTc#H8P+{g~JU;qEJW zU8sw;$t12c-#$-Ht@IeYU&Q4~;mseKOtaBy-IIr7;h4}U?|dH1oKCVSjQOq2Kh@TD zvtQ9PIltpIV7%S(?WzGIZHGzX5J&1+p`24rEJ`luyZ-W>`jV!!0!jUR%mrQGIey`V zE--kU8iUIl0R%6emcAG;c;P#S$Nh@cJTPSY^V7pc$^Xv~*0!zud@9&~6xI%PF_M2@ z5U5d_%v%EYW43W@obY3*NAUALTcGUz{w5|!Ru;v;dp~8B6bByntI|K8G?40(`0;QV zFhDGW9Fjp+|I4oTxAy8R!f*dS~{@ytB zYc1@xV4wb$kS#g^Jbv2`Q97;Hdz#0~@!b}^&eX}KVrSY&l9TinO7e|lod?yyi%l~C zua;VCjxUskFfQ3=98*u5U@-w1{4*L?#_ghg|u~Tr&s(Q%-t3f-n93n{qjI&TF(#M`$_f|}SvKB>C zAW0!wsA|`=ybGid&+9rU_k#@sOD~O4*2r(K`i9o;%d9#mYT>HC>PTDOhdEC)+FFu! zV1c_XcW9aJHY?$5>Wxg(@2Yp%CB~Yjf>k(F`vtjsHGK7cyT19e5UgG|v6BjfJ6m1M zzGgcQ5#D!Vv-~eqcIN~Mv`k?sqFs+TPFiUJ5M>wdo~{1hkg{j3)Io3WPZs5IIq5)^ zaYAu0Aq@YV{UM?}RB!qQiAS{Q1{wqr?vP0BkhMb^$(cR$JZ8UQNcX_=>GFHS^3Faf zXu})-fkRxjwZe?ig_*o_R5I|*UG2E{Y`*~oJp zp)6(j7Q(XAV$X8i{cdq+61o16_F=7GcVv(Aa5k=iUk3YNLbKQ0-$_VKZ`o8T&wQL1 zYK>Fuq(~=uaUoNjs40XDqvwW`p!&MG99npCZ-pz|-mNipMOZ||BP&}}RTX{lHqcSj zwp+-9o|0Xb|GJr_y};i&&*ae=!3|}83Ig-T6d2d?$rB|_Ix36G-A>HyxJ0d~|4N)P z(XVrOEjFASH$U8bHr*|bqvR4l?B+wK?p znh%@^`gr!vm*z4_jL2qCX;Vtxd&=?1}c@)%j^^oQOvwcyWTJ3tnHK+SgXQ9}N2m=#DOQT`b+~oq% zJx}VZ2(_@MWaD1j{i%t`)8@d>+vQlN^O^(gY+D$A@SxrF%J58Jc@VXyqjmQe?h(o4 z+8OA{_bpLZ2eDy=uF#X#4gg{593a+G-BiAn_c8{hknvIyyW99FSb;(e38TEG>igaP zwg2SvhYSi+>cVE9tX-UTHprn%J9>?cJP%JujB$*)-|g2i^oH# zTDm~OQ@IA^0cQC-miVsUc?-*;KU|>AKK^FR<&i=vwu3Z*Gt-Y}#Eq#NWXI+Z$2*bu z#dj5HUukX8u&_AdxkniH0|qfqeov*4P87VpJ3nhAwA^UKKa@zhsG6!1$79`6LV&bY zH|>rc*Yoecn8K@*=^BaR#pRS3dL^xq3)BE3-&% zCy?Wg+N*yWiX_2rWYySj$bKNbmT7}*qe$NCX_Oa!!)AoF*=x7>;@itj6-Ij^P$io& z*LWx>-O9czfV+U%_8STEs|*w=`~A{6jnu|FDPZ$kieT>>K}yys7->t0MpV zC&isK<)fsUwR58fla2l+C9PZ_MGTeu#mRkN6L81m^K{mH42;j5u|b|YYyR;=6zP-L z`KN?fu^*~M_C0iS&d4J#YtGC0R;>}iusD*wzJh?Qihy%Y%aC%v;hkzUs(4C0i1^O- zk__OyKw$fCi-wh6k;F=WKN&akLR(7cxRKP|3Pny)e?)&{_A|r9 z?3c>oC`#mfhRH|H>VnVqUhFa0JV)tYQx0KWbh~%GSKYQ+EqQ01TfqVMPK@fBU+OW} z5R%<%rRAtzbhI(sDLxkWgb>l$@Kvfi88NGD!QGBO6rbHMm9KyMLuO3s+JSgSMf$*x z%hKLihQ@p~H(-HRuF9q`5isH5XD97#4&_`ji=$w`P4@rM_+y0GgN(WqrAB1EuuGt6 z5FY{5l0fg+c1%~#xW7wkTeY74u>gprePn1( zL5#u2u@D%J}xskvwN#Sq32btm10+tmvl4H+kF871J#8P z>OK{k#g=gJqw=*#8HBvHa?3;+&k$Ui;B~tHt`8 zx13`$*DT66FVP)lyVWq~80Bu8ZDz(v5s8BFachH`-+vEJ<#e4k-;MG0$-^2}t*ahG zrLik@4;sw^=xi!%ge8)SmGcd?16#@rYq%w_a51JGag=l4t+N_=J%09)+@7oX8Kt}M zIN<19wAxQ3gYBt3M83ExR~piFnJJcRvVlJ$@LGe;#I`v5qB4b}Y${X2C?mBR5Br9u zKtf1aF~9hz)23$svBLE7LGs*ey&D91+(m!BNl03Fpo{BU&yYNM!l40M zgvI%5YDla&d87YjcG_=We3Ug}{US1f+D6SL_~gUuligu?zbDM>C(6fwUrOJFhSD`M zp{JO($S1JD6=g9SBITGfjR-GC_lOsbGtKX|v$z&~SDa49WDCap z8LL4z3=&Eh7-X_|MK6Bce5{>di~GN?9g}0wswvYXS^m%DV;|erzC97Nt|{@>!)BcWT=|3d_iK5h;geAgH^pndxUoWt%HZXJNRT5CT=b>KiAKX5QdNpsuFUnQgx+p>{=&#M7|An*LAw_^-qzN zj|%X;muXvfBHeqfMUpK85cs3op3P-!v+1gEk-`eEqS3n`#Q0KT>ydK?fYR-ua%oSPs)($}l$G*O7-tNo^yt{Ny(I&=|agDVd0 zBzYA2?lC2m@GGB^bJZ|+(Rbyoi{*sgz|4YFh^ZXKZ{M>88j$Zwx^+Lp+8*yW3wJH9 zA5mK`mHY1BKQ0U96AQfhSs2T5ePcY|UYEPOta9>12{byX`oT9NX+WMVZ8)}8IC$#l zS>@ubv4ffjOTgc`PV;O3RV{2gGd!Ie8`P&nqLPp6optUB!fcw0qJCt6f~hXEKwQ|n z7h!w^OSPoz2>ftVR|J>n`Xw&u2vLOk2>f8G5H3#V@co-Yo)lOK|G}xJ z3j3>m7^on8Pohl4+ua5({3^>8NKd)*f=R?hS50W$u$8KepQEy*l?rrCpm|8hPE=@X zB|$U3R_3?s{uuR{;B7O}WT^mOJ6xXF=aD_e+2NsCve`72|HAk#|Aq0br_C^lxJJ&E zN|1g7oT-X#`jJH*SmD)wD4TzMkO&r7k|213ND3M=`lcy(z9?;QZh?XjfQ8jhJ0a_p zu;o9Ck+GpvruwZPyTVC{-7@=4#b2zeT*AyNU{AJGL20j(x}3vj z_xjudtdC@nr?+45De&ne{2b1W+RZs8HLGK=NIQzb&C^x$3*2#ppPRTc-Fa*{p$Z_9 zF~0H-kp14wmg~$a-5i#v4&J9<4X9w6rc#fl@lsjDz^5S}BHvYbA^~>cbU3}A>F3SozRgQAR<)| z2!s|o(n9a>KfGUi%egprXPhzqn~V`-!ruGYd#yRwUTZGDlb~pwLvh6km32{nnV8ml z12G;Kx0@%FO4dHe%8f=0tueL#N(ctDuM1$PGBsMoBI{R2;ZFr>nZ1@Vcv>P{k0G^v zK!%^|;uLot`{1dpqMUpYr@CI-6JOW8_2Mr(N`j?7ue}INbbSA&C2SNHRWkFSfVMFC zj`9=5CiU3PoQ*5*+(m*{c)OmnXtA7hY{qUh_!V4FrfiiG0#A8u${Ia@%|20f7lE83}2K8Iq*`+vI`z9dag&&;C6m!?>-_!u;e&l;N z`jhu&YekI)hv}l5>JrJ;Tko&8ZNBxAIEIn^W_r()YLg;Wb~tvlT#}x0=syB+h~pOs zEzIJ*E2S7|DLZ;@xPV9OPje0L@y!R%8h;cgeZLpD&#uti>}K^!Kq9uWRbD1_?G%zx zLt(FOrl&pSoS6xx(7ms{ap4jg9TNJGGrtA6;^RrB-;!JfE^=%YSjJ~O^?i$T0=TL4 zf3H2{$d?0(qRi~szfaXbFJf80*+osM8STI$!&83X;hB*XJ<1PK&$6#pc7YhSt2Nf_ z6G-c(joMb1$4JtFrGBavB+?8;4e8+Fz7x8Cce(aNYMF6pMQ~E=`$y}1l`>_}vFIkh z2%5s>1uDI;*v|O6%7y`XhKt$R4TALzfW#;(G5y`kCW=WTmJ#fOyI zH`1o7Kl!YvUc@vCUPTmr?VFr*!Ji!FoQ&+18M=cP_4fYP&!1-^pXI_H2TOnmO~{%+ z6xq*m*}EckZghN0zQ*t%CxAoc3emNj1nolCAK>*4Jaku}+u+_?WLhJ zSpwF%IB)vecwl&8crtNOjxrY*fJ!AUAeX* z*}JR4?1bEG_-Zq;E@lc)X=!09G2QiBdV!H)=tj~LttdBvyLRG)R;RVMYSbs>%BOQU ztny6u-&e9LTSS4G6tx^1*z`IBvo%qeu^A?1QYf`|w-;S#f3^&KiNxXOY>Hc#Ge}`* zcE?odIOUc4$-dp(6ik&W!)xWIn4YH518LLT9Xncw>^BixT_C9K>2GU4JB}}nt?PN6 zsrN}MyHXN!$yureOr6}PT8bm&x^~?1&iM-on>DY_QLBGX;mhd-)JR6ii+NMDk_+oR z+%`Pwn5VK;Be1twbUWQ_EgYJ!llxwmfV8evhTf)?+#(>pV~zM$V#J2=tvo01+aBDg zoOCBiY3$7AKQI$)^3n~NgZhAq>UYcvRcea1eMVPmr^FpkL|BBbzV-vT4uj$l4sJzX zt4FM4s(H@%->W9^5%Xj?KCb=!wQx`+0*LlIWt2?c5NO;TbOFyY0hcqCFi}l`q<+Pm z;3sQ9&n)O~N9hFy>x`RndtWpOKvsfYW8H&0FhCaLbuCbBkDTp@K$4mqKCV~ZYXI76 zIiBr}h60j&(yzSMt8A;+^3$OtKsRRt&yjV02M<-!bCylGBK*>kx3PN#ZTf6_t&|c7 z4fsz!BC3+RlZjO3CTxF8e*>R7!vCkr$afN|Fs(0fku9_czgnH}lfI7{>5Pgj&9BUd zEI=XpnNBseY3W8G7#{w-xz-}4YK7&OxSsd?N*Y-{zvZx`FbC0FAbz{KVm%baTc>_X z^3R6%Sgl{pH0e9lq$XJrzGn|4>X|tMs)v<7QSI0{Hw z+!cP6ZQN-w?M#}()QwT5P}G$xNow?nphmerJ_vm!gQBj#Fm%<%-` zBYOLz3br`^5DOYhNp<9Qscjs9=0q{H)N}?fl*JO8Pc}Uh*quX_)aMk`Abgr*!-vP* zpszIwYu3MZ8QeYE*w+u^6*%YOd^ z8aU8Q?UlHH#PGr2(u>$C{jQzmOjJUf0@5W*!4BSYOv;f@Hh?}FQZy>I@ijN#mu)3xvf=WO;Z@@bGDCoE~6qdnne} zc;o$Kqt0-9jr_XzQC?0#Ox%}ehX7&FP^wRNTmzI=wqFYkMnKNHn>VRr7^MWl{m(Oo zSCL%KbnWWDz%i8lfq5P7=50y5=$^U+wRxQJqF)c1*?RbFy;bf-5GBa|n`?Y2_0k5* zR$bd3ELf{&KqH298p34lrAh$v&3bYB>RH^%sI9phv`ZkWD zEA}*Wr7@j$Z{vMb&|X@#Qk&fFN6I{T$isJeyYAz*1x0iqQ@93sea|v0nJNnPnSabli`o~-+KO}p^i~RV?rk<-oF{xbdJ>Diynj+3Hh=gpBU=CC3bYE~ zF6!x2`y}AJB`rr%kb~cZbNV;RDelyrX((kab5kZ+*W~GS9Mi^3QV(LYwmH*QJCx?) zcY+SL?SG!+F8W$eJo5cAsxNwjq-&Rs&&I`!s=|i_Yp$ z9|sw);LKJd#U3)yfFRb!&n!;`m>ul)P%)ocqxz8`APTs>82zt?I3kPIVA9 z{^dIQ^S55f3hTD#oXJkKlTjR%$YG=O?y4qDp3zmGq6)JFdXETqr8f*GyzsKsc28wt zjY04^PwZbS*lcjd1ld0|S=}6SrIKlIpas%G2iIe)PVkop$&|T;AI%=`*8UVD-A~@Z zFr(+gB>GauFzTs}xYP%~R&+a399{2;eQZO(+|*>!-8@gX&y<%yz26wdBB|ZU5%Q-K zxORD?(F%2ZswQOgIPc`tl67IE2g2iluFw-J;pZyE)O+i{HAD*5A(BAzqS*exF;Mr( zqkxdJZHOdaR*c=;v<;0H#-h(!>VSKTJKrdvXY|>c&2YvRQ<%I=_jd-d=I2p+80gQN zp!cO4c9T=xhG60W(Ji^P*HsU&yhf6!<><8hFPwk98WSPJ(`|dKWYrL z)-XI;G0Pe+HuEmez96l7Vu-wp$?B+N;(^FS{iLZ=S_64p|8FxGEvvj0HotK$MMw}D z>!VAbaTiH8yoh^J?=#T%jKyURM2n(_#JiSgGW7ld&hO`OHUi@28#s5<4u_S9Ix|`PYOhwwqt6dBbl;%3u!;?61F_2N zy(YHM=EFl*+mXKh60@}?k*w4{(&)@C#u(ettur&;i7kZ6SIbpctXn#-&{SUfirCPW zGOj)He&9PV&qW#FTZjCeHSg~vkr*r5$~2GPF&58Cd_sPn?H@FDt7OpjNn$$sUhDKT z@K`P0jdF#O^1Pr)g|YyT{zngxBhCd+h(P}W0_Gpw>05e6CpGO=Fa)2#Ocoe!lSv6J zkRD%(fyA$(5Y(s8V-44FP60W+&;ZNdSq0|Oa{GaaymjcSx-{+lFV=tfv3fj&x4PDl z`~PaxfgHbAsChs?>+-P7B{X$~LCj@+_HIhvaZei0kkgS;O3LRgH>J&((xL9qA=fiK(kc2c0FI@Alk+ghK=1~Skl zf?7aAc~{ELTIQm>`5)mCk?(h>rz@)-f2-c*uW5lPyG&bvZS8-%#= zFtZfxnnvj%NY*#t3R4*nZHkYrUaXQFzW}_V`09ey*oSIm2I|C#%a*2)F*FDrv$#-&xU&6(XkPlc2gQOt&|7U$Ky#iufyJ+`0z8IHuv1Y_L*a z^QhR~8em;#q(#re2Fvw2;YSF+NvU6z?um6iWF|9P$-Mf%*lV!v z6w+z;x$SUW`vxM`d9qcp5@R~dk^;KHm0UGpI|d>l0)bG6yvqGLu{hGm-Z9yc_S@yH zTa9~@frV=KKdLKV_fJi$SzKsxVLP5|FodokRo?ryQk7J^!%%OHZX5~9%TQ5)f}3#N zzEEn%nAaEN*=IEAcU$vf@&_J3oNU_p)n(#?8HB!Z|EdeB?lAp;Kdp1R1ZnI!X6RZR z(}%PRN(2nw_3_x-I6BBhmB%ECyvMnR3(uldiy5l7obp{V{^Q5nTRw}lbV$yit0iNN ze#RB5-u{A58`kox7WJyW_deq%eqtN?RqT{hnr=92xkEiVjXP3ob=a?9JE=h?SdYI!g3k!WH6BIhg2gXDB zq1&#Q)MIEs6iOuJIHKraf_^Z|4F|JVw$Sp7lRi$Ziin-y#-6f0JX%f&|) z>9G%kMlGoMF>d)|0!KR{;3>h#q4n54Qme-3i2if`7@cPOo(34LLwfEGZ~C0~bUlW3 zf%KomK$YORFg`L{uNNEZ3de|@x=ct}^gi7#J=Z!(8RsVSw% ztV0$m-12CPjIEnMn3#iB#2Cz<7EgQs(d`ZfAvETxp$+)!8N6k|vJ&+IxKglPP%6>k)-JcXRAVH`X0_L#U44$aj~?V(BiY`$HtJ9UEYo~JYKhYD1=x{ zZgLQgW&!EGBAeF~Y!)=#qTEe=F8sxR@^=JN4b*9FWs=PxGcJ`571T?cEyVHV@#8lR zftHWotfTi!iIo)q5A3k)v!$zcY!UT|SFh}=#TRyA#kf%Na!HE1kL3HENRux!d-J<) ziCyZ#l5JL4+s(ST+<=MxvOIex5`DpaM5}&zoa!+rR*=SPjZ01N2e3X7)=yFYF}T3` zkl@$uIgRDowH?3iZu^0sl~sMWWj*Om9Y1Lq9ycwhyuWc+_D0VSmXYA4F*vgl#fx)T ztFKw_Z&CY$gk1HK&Z5o|X$!lp+EYL8B+4*ji@hgPA=MTV8zY@+bXGf%*1;n;1Fu5f zs)D|#Z>T4D0MfpD_g%VD=`mW=KxBD~z`K)|g3N94n)0nUv4(*#O{L`<{qcED+rGE* zED6)~oC(QDhXfV-i)vvB@z4P}F1#|sJNmQrgV`YSw)uu!r^l|9e_&w6i zjBmcn5biaD8=5!^>+u^V?-aeF-^$CL>u(J&#Qc0PoIJVx5N>Eiy+<3n3UAh!)HdRR z$hu0o2>th1KCGcfTkM6G^+!kfq{Mz-pb`Vw87pQ?Ur$GM$*#Di5}By z_&irDgT5nXBC%rD!1xr@zVzH{{B-WIl!@&1>TLX?Wfmtk!1B3lx$8%saF-$G-i66l z_m+p9VZ6lcQxTVh6VE;qw&XbG@9$HNK3zjdt1;qEF}o7!$XN%YPv2g0+NQgovMTpp zyJo>kZC#%NKek-^J^Fx(7NnO+5+{1=LTV=7S90j7iP+6ge0rF%$E7AG3#*Ue+P-3l zO}*~HH%U_=KA_eww`zGP!ixQj)W*wHc&Np^PK4|YYA}_I^m6X-t_gw%#0Su6x}?r2 zz@Unw9tIqS-r4b*5REbq3@b`3pM(>_MjtRm%!SBju$^8pwblyTOf)10mu3*Ng#H%} z4d@VBLC4Xv*){z6*X(Di6)RtSu*eCK6Cau%J(%<3W*Ds_Ap1j2BT|PHO1q zDK{z_&Ch<}5TCwW^k*r6+7u~jO>gT>&n1;VZ>xR{AG!0tJr*O}GEWxz8Y82|{W?W- z2SKDXMUgJHYxm2Mma!|!M^nh_ChqO4D2sYw?iZ-ddY{3WI&r^@k!xLa*$Bk;J$o7a z8Whql?WC;=(Iu;(D5Dp?%Vg-xO}=+55g+p8wPD8CqgG?jGm+7Pr9Pq6B-JO&Y@*o> zZR@^rW90}PrI!@Jk6zJG6>3(n8+grUst&Hh;YphY7zm37{5ZU9fB%Bxll{7N?v-b8 z@u~wVVeH2EREFul0@5qrDS3-UFML^LQxbuQIKbJ93@S)FR>V53tUvglt!LRm&Ds~& zl`5T3m<%7LvZ}B<@vJi9@2WL6i@PDIe3(0eumTcMOo`L+PLgXm(k6e%?sQS*)c%!(Ku zSXlI*OR>pizxo)v`tWv;e8OPjhaa2qZ>CUu|EqZQulK^CY zr%o30JutI4hw^Z~++3H^udZ#>+*PS0_7+enWsr4h&zi|?u|UoAZ48#Yx6lq{2aSas#N7_ZgYqOSTZzDZ~Q7$x7+C;Z6V z-ahqu-|51AUTF1}T87D8s{ULUr99sInG;ZRWwNntqQ)^}cYT9idb#&&M)1EvBY*6O z-1aL}4?vH4uL;PNR&$Y3-Q|8G0a#46Us?fTLOYArXYal^6|KjKj&s@~*{r&rW7O-8 z2OdCM1Y(?^`btxcvYbziS{M&67Q1u-kR z3|QSpfdsgeUqoX#Mzdj0x^KwHWzWOhB4;Y;7^L$%o%Zq=*aa_6yBhK>x*Ih>UElI< zZJceHTH!t*o0u2dEjyE6^E{w^1td9d;RQDS){2Xiitc{ggs$1cix)DKq`r(l4Y?QfN^{~$CS^21gUu9|7`bTjS?Oek@ zt%W(e@C^0G@(;lXPl==>5hh|z-TWum_4M=F+7HT!mOU8V6;4ScAJaj8p}7JXHOKDz zTZc>I-jClyBO+tOI9{|Cby<(dFaVc9^|;_AQQhRMZaZyjR1kk(rCE%S z3`VrSrw5k*4)O9^Cgpa6In@|?`K_X)zD3XKLF$m&m9ih4Bzh9cBU#7rPQAF>@stBJRS_Q9*_Nm(|ruekhr(MLd-nDl>NwZ%tB!mPbOjM zw|qOSpk!JKV$6`u8OiMLPRnhwd;wkfeb}5RTUbDDC$Rfd0EV$=W<2ZIxN-yR=v9zf#GIYp%t;thCd!C#wFaNP${$-S7>A!PDNv7tJhB1qsmsFH8 zL+dJe%?#z&$C1h-<3~GEpiBv0Zg>XEN&3|plMaT2AzfKUx!v7HWUF*PsWy?7yx#f) z-U(Uz!nfpz7ZTd-Cnz2<=kKpwTLE@?@4_An>k(q zo9dqrsX_zB)RKG%vnYGGQd-bkEyU1A6F#Ptjj1rTweF>OVQy1qeAktIg=LAMVT!xJf zg`gHq)U8CHOA&_4U{YuniBBQ^}$G1m&=%%?z#+V?@JqTzy0c4qN@iT zXC2RS9kU-CKE5q7*k)1fo8JR%WTBSo6QWw8z729Q4zb>aY$<0CFO2Vt4OHHRPbiT0 zBe8-@cI()vM3J-g!V&RmDVv(*h>mC-un5GW<6GySrt#3<-vu)_V;)QJQdF4g2FM~K zDxK}jT>7onqsO6mOc2{xUZwM|6IQm_;O?W3WXd!Sd%1jioT)aUb9R;0DF$up2^k*) zDem``-HZq-QL%N8;DkoJx^W@K=XJk#8TPbiXfASO%za{d_IyUt2U}F@OJ-FF>FOOE zv#8Op7XOB2;7D0GzpEIm#0niqz)?EA5Mzew~B&E?#Y9q$2B^p3t7=7VV*nfj} zKx8VI9v4PNd+%r~esyS#CGH-#i_+rPTT}Td$nju}8TVR(t{6cbYH5dLRe0Gl@xqR- zgH3fK|FJ77*rEB`c{AUb;6$CT^Z3N+2u&dpD(unfY=UlQB#kdK-Fw#at9;_@yJF~2c6o?j8^C5N_eKuCaGCnpxz<1w#s}3~EW2M$|+QWD{vFm5TP;#;`iB%77r=kbB z*7gp`B@;=K#`WX7HzQI8Zr1E$YC?jiJ&dlq%OpHe`T<|flNE~$1%t~ENrhC<-#@(h zPf+0Ul==%>v)jKIFsoASsWx*}>C1;%@yjyNiBt9`4!YqfO|4nY<&8CD^xwOmen+=e z!~lP!x)ANw){=eT@hf1=Y}KEkyJKf|bEhY_(ZS zkEKYLq(_Y-Qm$~YCw01?!oqngKF>s!t4`589CKTCBqoZ$*v8IPNlwqW>H%4PO`zdg zYbb$Q>;tItE>yYvP3O#g0e$pBVfLAQ9OqA&6;WHv?js`Ei9r#FthbYk&3_T#Tl!^M zeBTj%aSAFCZd0Xq1HKzA0W^8mSSMe0Bz(U#D3Do1p1QaQjjs1I9v&UNdoxqYeU-zj zL-+;wB19pCY^ckm0jyBxe>AixW_1My23h4py;tPEbecwi$(VR`W=Ed%gu{|3YvQwZ z$KfYdALsWYI1@^37kTNlAAIUW+pdP_JrLu&u+QkKBw$M6G*IZZri+24JA%DSD_vDB z|BCO~2|W|8dUqZ4=jIty*Wz(?S9h~fNTY%!>J6M~u9^1>-SYF+p&A@4KThqKsI6q_ zvd_*a&k_9P=GOb;=RU$M7EAks2EU)>_M^wQ11iqlyOC-5HJa-`k3+Xg4Ziv4!!*AQ z5Ab`2SrU`J$Bk7tzw58}uURN%^bU#cXWy0-?Q84x#C*h-@1^7IM^HIq5@pj0k)z&#f@wHBb+noV3Zn7G= z9JMH3yA7x9CNsNuQ;nGPPxztokcBb;_5=k}j>AUC5mtQYhv~|j#f5|ik-5GyBf#im ztkqF`e;PfjDf{Fsz4%s>nf+?9wGKs8jUhK5RVd3zg_SL#or3Q0ZRRdDy@*`&7peT3 zLQD;>zB!f}^vuiMF~A;wQQclH&bbk^e~1az(_gdCzI!uD zBoxW-%Lg$wzL(Lc5Vf!)o#;ik8(d7=tL!&KVbk-xH3XrTxmGO4?^t2umcn=uTQoL& zmtSD?yxlj7p)`Zyep5*ts}tz8T2B`Giw~8}*#P1FX2))>m?~(z>3VDiR#K`#G|(Ol z5v(H0txS$qLEZk}a$Y*N`(&msB6SLh^e!;Yt6$7i)V_u6^f1!lbrq-BTBpVKz!rI& zl~|s-qi96e*OOStt8cLP_8o!QF`(rAvZNli3HFH}m@14cX$rI>QifxzXT1tF)^^v~ zT#HHXD@3o!bEyLNwb2biWGk6X$+x_LNVcc^6re>6CWyOs^MCI3Ej_(7VanP}U4;wg zRYsI~Dx>v}{oiG>SIcUPDKRAM<;Z|rYB+Z5bh#Hhsj?$X1)9V0N$x0Y{wy)`HZ(tUE#pF)Xs zv?`CR<14f<>EP)?w`J0Up7ftVJ=rz(aHtgrEuJG7WId8n^eA*c3-h2UK*6Mj-G4Pv zOC>=Cg|d{EEv<3!X(<}3oLu@84Q3Bu|^N> z6U4MXkvrbp%LytT?T?Mt{41!9ZE%IDzCcW#GMF_;&}S(s8?#m~R2!@T*F!NfC3)vBB*tb6BBXZY&N`6zeQc+yIi-N+?EY z>i+3`4L4OY4C}_@RbV1Ey!heIhrVl4H@WsU@wS7Jqzk3k#G5cM$5JPQ^ex=ez5%^9 zLfV&q0?B_6&(A$qsM1{1#X@Lcr46a+hT=sfYo7>(dQElE2hjq6Bg*1va6=yBs)I4X z=?59A!=Ha;$ga(=c{Hrwn5u#~A5Kgb*E}|q2HD9eBaV&jDg6S&jD4Q|@9N_}K9UMQ z_oyc=?-pqbU*=S;0cC$I2W)QegmyG){}!YY`eTmic*r@JvLmSYrQ76d-)@K@@o0q8 zB-PkogQ9nv)QDJi4og**p!iNTs#v@8jKJ`})#mBd#HTBKaGt74f=YS(IA57s8!QA8LtrfZ@Y0+uXnnajerQh|V7XDo;{?@pK41B7nY_9T) z8McT4%PBrhLtSlvt(0}EjD?Nrwux3`D01C@9Tg2tfK z%flFCes>ZsSDREtfpBHEQKM_rtsOhZZKt(bpiU$NOh1;H*1MHT{2wg<9m+3-wdJx6 zMn1Mq0)Mrs?GIT9V@(&J>OqtZ=A7{&+~&*4REme~i!irdDKGx~)_zA143Pq50U{nvR%Y7;^#62Z4V?N*ptQ#VsV zC7k$Nv{c50N{p@(oWD0ML8H+WrV?}Z%1m4kaGZS<*Wcy^T-BITE2h_Oji?tc&G3qe zSrLtG4qrw=vl#w#Dg5uiOd`WWtq}6Y7mgS%604soT$7!3A3v?L)KOJWNzzGC{Tb3; z&hc5x6=+?%v+Fl|#3ZXrd!%VCzq#QQu|ot|>7&2yipv}NT-3vjebmpLrUlu%K&DNG z-IKrnp6A=Ea&sg2p;|K6xFAp8eEQd|{vTxSk1KxD+af{}jWg9C9u;&o2*arQ*Tn+P z%k#jiTtJIQs+WKJ#6JxAuz~5@Rf5t~C1oV|Gcv*krxl zxS0O;fB!piLCuXYp-g@M`oJpPpZzN^#t+sE&;RcqBJ}*fOZWfp&5-~0?YTa;^v(Fa zJrg)KcqMNWXsT>BUTM4HP~r5mc(l=WD8I~OXHm%Y_ph>HU9hWNit@TUGK73(PcvK2 zM{8%Gn68w5gBB6!OM?u2<&v96zd=}7EJuSZ_KtMZ6qXr}q}U6kH?FYkdfq{sz-Q7V z6D5Xl&~^URlq!<1X1D(<B3XK4IiFlWt%%AR2)Lo*0wyZVny!&>XI(tYQvm^R3^6z+w}8O8NoTOQ%JAf@I6Gbg>(xx%S^!M zqOkaEhb^=iht-@20TQ}bj*mQPfDY6{iDl$--anrFb!xl!@awsp=&cDw41xCB3h-7^ z<5;V)rjyA{!qfGtQR6SqW~;e=0D0-mO}JpePl-hDCEjQ+^0(G^{>|irFGnU?*SW@M zHI%QOP77#>90W6nSb-=T)+$sF(I~m3`HOAA3#zxPZjWYbVRN01huF{pSUF#F_&*#& z=+#LogPm9BK#OfnS|V1sX+6zgnnE>vYt|9P;vNM23;S9%2CB=F1vK%)?POoboXG;s z##icU=$a2ZzCf?)E}t$d%9At$vI|Jtg3P4-gX76f-vWkel5fO+<>o#&*T1)xhW(^KR^cV-KktH zX~@S=mfyE5>B*LtUr81;nR;0Yt`YE_=VT-1)hi*h&h~MW*lMSarwGGLpY~3FnqN)r z_qIwAcQ78xH^T24PZPBpoaM$q8|+okjuH7()ow+~|I3p21MqgT;j5tEWFZ<4rlcW= zUNN(=iq}^od`*LuXnWGIVo&+51Y8Rz5V1L!@^W~n)oDpCH6iCQ0Ye2}oNr!GTxNd5 zCs&OX`jly zRuHj~QFf%fSYgEvpWbjRq~D+@l`IX%GV^jK-wjdhGt;3A$9`2CM&1os-4gvd5NOcx z#Ot+@r>xRQ)=tJ!P+@74ls6!ALvfj6jw~`j4BAIu9G6}&IyT%+K3?Jg?m@pBZ^tD+ z|5cVi=S;tzLv`4+u$|W;oyFxL*|}r0@njxEjk?kc$;m~;dmgq z(E*ql^0}fJgUV1=&pTYU)>$4_X)uNR!B3ynE`M(eCQZ-U1a?kBS*G9l5jK7L)Ji*= zP%h&=#B9!PV)4-7V&8$^(mDWCY$`fc+vE1(GU|JE4taVh$pjry@b>sRu3Ykbd+%P!7O zLg!-S<-7r7q2bvRQ0_QlU(lB93L(BC6IbTkCmCwh>)9GWfZfM9Xd*oIZ7hL8V9Lwp z;X}XX#w0)xee^Xa8B#sQLfd)9j5lmue3{N)Hm=~SeR>fwy%7?8xK7awm}Okr&R6=r z^7~Gy=J0uD-MtU=B|l!uW|-HqnoyG$2id1s--_^I_LzUa5+i^kW1?k}oOz2ssPaC6 zlfz4>uk3Us``orA2eeL7R<}LJb|E24cW^itQ;^6lOGu2-(KAXskCxp>;}*r6#}t13 z9pRZ~2!9B&zom(;0W^>v1a1!|H&u4$?U38@3&=Qx1QP)u;I)H-F za!4M|1cGTFfB9`9@eg|&16Z|OM-8D>*DkK)ARqyoYm#y0z(#uIQ@mEiejwW^`wC2z zlQb(BiLfp8Uf@LndEAe-Vi^%`c>(J6fW%MVTN*xqkuOuJ{X3)ne|EEz0XUi=9&Kg9 z`|eFUnU$6DRANQrftJK!$$}?q2u(wS?KHa*VnE=@Siizj+0$yh>10E-KLhLQFqOV$ zU$|=)1HSD#4X3FN`b${g2L4>=rJIn^)Bm{z)M6^=rcdPon!4Q}@Ojh?P8zhgL44d!MFGMrRevj1t)BY+WEILcY~q};?6{LKUdjGd^+5lgLx1XTRU_>}l0#0Ynp>59XEbFj z;Ic!`J=gu~j{BeoP)^;Qp0)bb5_|qw8QJacoVL}4{EoL0n8cxEfN8Kvk@NA~7JL08 z;RWs<-Cw^AWOk4MM&j9)+U~(YC{GsrHl%TH#B5`tE_YdSlcb9g+BgCDHGHe{>hU*) zGSC0E?SOUX`s3@Bml#wnQ%bZaRfK1FY7cPH+UCLYZvd!fYgJTQS}LiqxV3(4H&R5L zVwMizix%M8&J05H*If)0?os^lbFXgSL`IGR$0r2-^l|V~Z;C!gm*?*<1#QQBt3!`D zM67z~&9$l6w~%c-a19C(4;zbDToA5K(BG`}T>PbVb(tzt;fxak*`1F(nYb7BiNvX= z37e#Rt0z|WXg>ZfOkrC`6x*i8O05t{EPYt?MqWkG0Qz@Bv3&n>U}2^p6a7>C{`ez2 zmbkmisYO?jOZ^{E*~1=eYZf6C#Ozk{wdm_qk1Bm~l+y3*T_R`e6R{vb$E|_nRSF{+ zd+Rd~8384PK;PG|E-_aDEKmzzfzZ(my$;9d#P{8|keo|pUx_*#b@G2>Zz1;%(`;-m z{lha@#BHnkSjWUTZjiNF*;;HuD=PtUs-V*TXKQntc=JB**_s&q`=Z z-SUJOb0_l&$T$y7#Ze%T9NSuLRPmIBNN5jeTSspb#sU1i zBUz}Jtms|8fVrE!?<;nzsnerkmAz-dSI=`AMvc9q!sR7xUq2L{_@OB78IFaPNiAy& z%Y1)YV+gzv#^3j`I{_;&i7gfucjO#2R|1d)>A|HZ!MRNWh?2{tkOGY?(9*1)LEwJ~N9tWK4 zoygCE|M&p-O9^Fr-@^vX{(7->3F~EmT_?l&`=4|23<9>ctkZ25`u|Mje>=ASEtOdR zyW@X+;LmDiY8b;DN?{Snu)MiR37kU;?R+(2?SgxNIGfb@7TbmJ1VBTc#*6gJh^zr+ zVf@Dt0`+%JPnYB=FAH?hW}z%?twe0hQ`<>3K>+`+n&Gx1@(#!abR315VeL1cGL|&% zkK+L9D1Qpn(UXE58eJupKLobf8SkVywp=B$v>z!kVrl~PKSBZNLdgk$sG4my9#C9X zo%)@k{FpDzOi}NXTO}sVa;KwyXL^Ud4!+cVawluEs>~XYQCXgjR@BGYOArodhx-0e zVG2+WCZOp!J*}WIvxfZHZkcIoz#8w@?orDav-Y)1pymi*oDkR5kzk-6QREvyqbLNu zv7`tfYJjM`75uLBvaAss!UVleDT8~62D+NJ0W(dK%eLyvNUhQ)v$nlU;s<_U{8QlneF`^HUnG-smaOsXd8gkz`LUFX*A`IELECfKV!S`ek{oy)ZudI z8t_g z-fw^usCpP<6M8m6peYtR=Q>hk5S9)Q`fm$~!wYs5DBK@`hde=_yD$>ptt`LYpNiB4 zO5<`sukvbKM6@~lI@i;yzLxN_Rg-KgQEym>HI?NZ>GewO+QZohB%n<>VO~lP{g`V( zBz)Q-=qucT16p+tEscKFDU6%+n%c?2H61iMyrb;E-B5W$e92(Sta{USGfB51HfyUP zfWyrs0y(9W(XT2;T!JHH`j~j3M~aV#^(SwBE{M|jOKx%etq2FdQ`cg7Zpt%ZkMs%4 zU+}#xy4~}`r$zF<41MPvPd=@zW2VpD{L}`IO#oK7ixPhw7kJQo($K8BEzv#Fu94rl zB!6*qE+L&H@lN^41xF}z+_?N%=xjw7>9e@4m1t5;{ zaNA?VzY^nJF!8feugTRo8Q+_gIO5@Gd6M1c83IA2QtGWx_)~I&1}&cIP-l(t zDA_ERtA*Wnl+UQO;z(YTu@sLt$%3uIfnK(d|3;y%NA+xOc1 zq(^$V9j3J$2`iA&* zxsM$)j+B{EU=L+8#5urE(p+kz(K!Gpu5>_w)u=<2C~#b(N$?2IL9F8algWp|5QEl z8bO7XKcACtOP|`!Z`UlK^Eq=JnARo(S0}}N8^`C3B+>vJHAZ$983<0 zguspGu%T?wpXv#H0Ij@fu60xHs7o{Ca1~mw**s(i9Ce(|L52&XpUldZ%&-^+LY;NC zW=)B8Ygn(gYnq;mx!IenlU&-xiiDTYFmw9)v{q3Q^ndyHvdp=a9R_L~{wk15gkq~~ zUdcOVM#|d|OeA&Z&~sA{|EAWE;DAM1=lir;Q>$cDL0;Sq0WW_ZzUqD`&~Ubo5pTwL z61}s%u(p|vpq~Ki>oReoDR5*M_S9*~ zwPhgm?BS>NC#75Lj+xs756u%*6M)^1n*Xu=ol;ijAdYwD{4xO1PrWTO?>MIuB3SQA zI+y5O8DW1|;-&GMRa0b=)%?PCa;LT-r(X-giT~UfD&hu3ruw9rMW_6YcgWFrL z-q5NB7A7#X@RQAa_dTkz*|zg&7i_=t%Rpv)c1naQttM1AY#o~s|B+KehL}pUh$LdH zs`<3L8Pe2(#pTgM2S*3r^_S=Cyf!)etyi%AqNt*8&qHFVJzDhrc&!g(%@W7I$nXQPjA=h`ydZa#&jMS_98|H)Zb_NlUJMTi2*XBl2b-#m4Ch?3VR z4awNXGx7}cjftg#R{ZcddCbGQPR2|(v|`D*L6rLy3Yq)nGCP_T-};{0m~_*NZ{}_4 ze(ilo%UaCkR`0}(qcw#~NK$EUxAJ?XbnHF_IQy3o6ohi$IG$V)*mf`;A}rFKRs&fw*U*iVskKA$aN(GMo*Mh)Y<2YGDdjqK}YV!~4RJJ;jZVhT3vRnUh02cI_YniaC* z!I5)#twt-2T=7?aY^gG+OeBZpB&9`=tN|e{eka$E`vcLcQ#<{FwPn!I&@8c8R!GpU znJ!PY*-*5_wp~9lgxmZGuU)*R193zW&}Grl2gChv#(S z&JL#KEysN0jv}pPI7Q2qZbkslizl|pRoNA~I$ zhm+#2p1avp)vU6@7OwhahJ;)CEGRg`0RYKh68mV2p~j{2lsUpuY`b$NU9a| zCNFek4U0|Q+NZuH6Z3H}%im{zjOXfFGLhlwc5<_tdN=1*?b9e>4R4@Eznwg42=T7Y z9`9W`#5EI5C_Z2NTQc>Xx9_IcG%4E&8r22_zF20takf?H|I^-=hBcXViQ3wNwjix2 z1XVva`Bz|{5wvm{7|Vs zYl(iDse5i)#e1K3PeGTc-mhlW8HK1|Y{CuJ39sTSK=kR4)WlusHhw}knEY(^$Qx<= zD2Vl4!U!jCTp!d{fqeOydZkS58w6(lfZCVL+`=zhV}4*^(6g%{$Z+$nJ(Rh(7)T{U zeRmq_?k|yms^aoWZTXmDM5X5u0v0e>F4#0$T_12*)yIo@apBT~3?>FOIuGQ;-}3oK zhd4^}_enlHJ?iu}jrVLIzdrb8I59A90oXn}tdD%h(Y8-?10u&CvnJd0;YTnp)w>H$ zZl`lM_NG?C{W2?8e>6CRI_YMnd+p^!XXE|e${uI)L?;Olp!WN`|Ek6}okKzPy#vy= zg!g$lTnBc>>fQ4mhyymX6x|Xf&a7M2Q_<=w{=RtS?bYw+i`jq&cO)Xt+pzHEoU}d3 zc)~L4K{CfO_NxxDn`ZjXrY;yX^Q+Y_G5Qw1H%%Tk>Ols|HQNnHs-Wi7jWB+lxh$c4 zs3>=O>m|RlaYuJ1Mf|+;k`cq3qf=u{GIY*nbu@-M9js{9>uddG_l!Ox2IB7#cEtSt z?CMJ#!s>8>8u3Dx^d9X^3P`w4qziNJ$!D~0e71Z00805Vw zfBR3}zbsD~Xj7=(C`PgY$L{8D?D=ppuvap%IFb&Zh-jD}p98*m;He+8E$)EMe>Z~Xte2jiTAYanzD-Y@2U#jLa7F0-Sys|t&SxiCW(h>95X+M1~Nuz-iQU_OF zeo3*;xy}YwjBC;mF^n7Ioaz1!0D9}pX2-x2Xl-ZqB<){LdovQRJC>d(Gk*Ccf?+@| z1QXm79(T<=`ZQJ*Hj#z7M*KM(dQmTix6cO|3QY&H3Avp1F6A4RB0Lvz_fo)=pBtZ| z+WCofS*3j9^HmK;)LYQq@a}o;+9d6RSC^Ss-hZ&Nx#)=GfC*gVD3CEe4JscA2gFs0 z!z3n7F}1QvdOto_x2SKQs7PE_`mmyY@_^Nrx@QzkW4+6JUw3j=^0}KFHN_S&S!%cV zg0R75uFkg5T;7q2)U2TXc&T{rcP4E`mD|EqczEf>DN@V$W&hv=T*crD61yf7y=|@h z-fa=ad)eG|Z_YIdF-}qJ1`)k%IUM)f4;2qy?0vDRg&uYfM3NX$`#G0Ia^3r{qDavZ zPT*&J7!X95xbdzwV#kVE^%l&#DN*}Jp_6{gZ?r?R1CLVj4+5+>8Kgq_=rL@kepWK^M*?XHQA8%KTYpp(eruk@3zQj3tBXfOfBt4p9O1gnw?d}x1g!ef~ zv<%vTJcB$P{AwW2`fgmf1jxAcyob~DNI;rF%QJQ*8L(SKrpr6659V#+R*-ei|c@Ermb z1D$Q9OvGE4`^LK=(&I$vGp~uDs*TtiP_TKHIX!>|diuA9ZMaby^#}HiYKCph3g*}%Fk=GU(l zsp$UJ;}gVwpGy%ppI{!=@Ij-;bUhzZkHc!)_N9)73{w#49$pq9CZ2BNdYT3|?#CMD zZZ@$a-i@^o-t{*+yCYx%?V5`D2a^BiJfUms&O6t9&baBp+Pcr8k!y*gQy{w#d|B6M zZwWZ~JbRoJ#!FAjy1tu!lW8CEybK2R`%Fm1jBZ%s zaY}xJWh#1UbE_m96^jhozKtrJO zfrDm6%1J7{0Zm+V*HXm(2}d^z!A~)R2mWG&j)knX53Cp%sn$i*EqOPM!@mslL zG;Sif$JYt!6&<9Q-%J&WQ1fZ*`mm`hQ%f#1N_0Be>~n zrxuMBjzL6php)SbTj(nUlm5$uh5h}{e)VX7=y`P+U^YbpA{dR@q5PO*k`VvL&;o*K z`FJ2XCyw4KfsDtgrW8no`y8}hg+T|R*)60ONNr;wzAOjr&s{3!=4Nj^gT1UEGBm5u z2?H@}|3ta);jMCmI0&(qy=-9OE2(E{Cj!k!xl>0K{w~bz!*nON5WHm(deV1K688~f z;T&_U^Iywa$r7Z+V^Rcysn-(Y4KoN0Mr#{qj>7m<-@Rxyg=Cq^D=6AcZynm?tOqEC zT;lITlx#GxcGXxjs_iaY+<>-$5#Lp38!~}cC@QYQS2r|t(su*aojv!#1MIaZ`E5(- zt*Z6Od0jDP%aWd_X)_n+Lb>-zt-gimD+`C}Df+6cm6W>#^y!_-O><`=&(;!o-UY`A zV%#wHuZk%}sTe$Zy^WHJzH(#dbhP4wFpb#wV4*tOd~yRePGxftg&K${eeg7?svNpV zW$!OM{;vCxLuuu2&W;+k!iF&8A^V66KSgq-{l6=Q(mM`jB!p)Nl z_wKkKEcmOCZ>3!4nWahD$Tl@g%W@=$6PNgQ5x_8Uqp&$>fDY!&h;Yllsd=lMRL@O0 zw9aJyZ8P_PxAOhg+-=b;{9yN-SLM<mv4F-*t5l%DNco?ua48pQ>Md&)4a*)dnU>$(0va}5yNW6%w%>&EzAj9WBUjfkI0FHOSkgtwn@u7&gd&+H$i(I9hApLSLlEm$_ncrJN{#z#x@k_L zq{^46nTmOT3}1+jFA%OaBDNBBaC)J##vgT@Zd6yIf%t#&);veiNZ+@9ZT29>0L1BL z;jQbbkc3ls-U@fuu^sdtl3UNx_v!(p!7{581Jl!t+J**6R6ktfI!`0n{DuDvojhbP zw4=(a=I7p^s&ZcXE)PAKQ4PU&AuQL5b_5)Ze^m@t&*GYN9mDk@u`ePwzt&=!p7_!KR>}=U-q7t*)@`W!5MV^$I#WhYu7KVg}E$M zIZkV&qEEC=$nEy6&<_64J>qn5t;d;mWDk7xD}EbxR{#icgZzZCGtwsxHZHLU5-R&- zR{B<1Hub7SCA&H$qIuwPfgR$y<9nB&!7+MgXf&a0s7 z&&)VhAaHgzyoBY&d`;+iW`wCy6Y-FEebWIxAWR4@_;vasHw3{}4*`f>I|$suY^)>l z(4}L=%fceGUr1^S#vM(sjCrc(Ov`%vjI3jN2U3ej+XRqF$!`nB*BR57o`x-@7$?5o zUQ!4P+@t&&dx3gS0?rDd=v}-^%z+xYcRt;E=tow<4z2K_y>PReQmVs`ysUq+E30St z8O~9MZdqrRT7`Kyg>ut&)9E-dg10A$ef5HLOIndbho8R*AjcK4P`bA~6H8fvNUP2@ z?C|T$aqKu|uGD~bwd*($>%K`wc2fhkR}^ul&o-$phnOd5;V1AC&&%m^Xs4uBRbVqx zdQ3jGM8nK8)?vQ{;V|^!oJe}r)||-xT8SVtr?DJ}L+k`nFmkU^PMl+&fL|2+bRHQ| zyTUXl=6P8csf=*^Q4&zlOB@?>GEg-3Onah7foG#fp#Ii-XHDycp@1dhLP#+&HKVmp zQklMZw64cLp8xOjqpi~l$PccL&uueCe;}emeJ-|WI1l+5UAAzF^*#u#Fnq0Jc+cqO zTE|tw$zrr{rkI2tI%N^k_~a;3wz$vY;`CriAt1s1Gi!Xm9(!KeBbq8sM#)QyNL7`I zf#*Z{dY-s`pKnd6fh_Kz2b$R$r6nqZP6wu4g#f9?8llr4gmI!{OnaV_hHNo8-+6!t z10S-U&yBlWUzFHS@`}%Cf+RjJSsOFc=doT(UTkuPfCN{g`-%`1F!1viDaNwJTi>rx zJ@;UILtn)K&GczroDQV^1O0yv=0G?FQO(9zKCfHr`1N=7S@cr&lU$=B65Wj7Vt`8u z%xs2mw`HmFR~vqBbo20&mrvb1( zB?Cko_riOF15bdPrX@_+s!zh}sI%>>57-<(Vr_r?W-B(! z9PH70jFumRuRn87Qfzr~wDhSv(jH^^-sF4jCrBb-$=*-ce~G%X;aauQE{VP0J?WT# z65fcnzH}50=ny!2II%OOzM%rmg5Mn!Km6ohJB(G^f-aKlKh9}3U>BvuG;sVE^p&zI z#z~6Kyu}H?yjE-0N}CpY0u;EJa_KNV0N7Yf1MXZ1yScifX}*T=yEYB!2n&Cwp<-j~ z&Y()oAkB%uyZpwToTg(m$VG0HV4}MlGDKR2B+~V%PmZv4$^(5yvp%1ymz!e|BHi&_?VgravpN8zXrYl(|f0R6COF z8hW6?$=4&AHbpYG_s|O$TTdA=)ieHoQ1}}=*EW<8_vuxB`>7tQklK92ern7oezN&S z$>=}bvXz~)3mQnXwBhQKwr{aNtQl+rN|^57rZZ7FDbQr3^5>;3XMue0-fPSXFCU-1O~0G{i34`% znSEsE67fR-k~aP5WBTif5pC1Y-e#M8FD@+4aWD^mcISW-+*KigjJ2)@jFPa@&1U#E zEPKthKdD|;2yw=B-p6O5VF_SN@P$`2aH4WPD$k}VKT=KC0|#ZZ1!?V@!crC)@hmWx zi&ZtU&&pq*ercv;{5q zuC_@QCvOfmjCU@qGWr2Q8~v#Uf-j^>8)~ZKxSEa|CZ!xI>ZeWA{e?o0_-TtGbS6Xc zRw?lfni+K5H{)B7ZI~-a1oFiLxpQEF7Co|x7!!_cm_A(w&SnKJlh5nOD z-Q|>}LV+ba&Og_fCapHiz7w>E-SolfVz6(#*6v^RmV%%s`*bd&`g&v}P^PI#9AIs) z)7)XdYt!u~Dg%$+3NM#7ID2`B-&PO2n}^z?rea29>59|cj#qQgL(+lQ-=F}STPNc8 zVD{6^21~ez@3om#K584uH*n1Qpsx({L0IZg4*i9G2{?jtGya@5XZ@1;0@H#>(& zdla=Rb2x?7)T{|96=S*DP@{6ivfrCm95n1xk!bc8<^0Cv1AIm59u-&wxDAc%>zxk5 z<>pG1Yen`ZtWq%{%X)EIJ6VStf7&hr|JkO#I-CqYwitNUE{k0S;5X`D4 zS&%YIo8Ia^f1{Z)oh3~Q>4LkTi)s^GUX!UqS#OWW)`d!~R;OI~8 zK}RdP^CbmEfRFMdJD5LHyxp5!bEyO=-T$cI7UA#80spFXMP|0XdIA8&`Ta%G&YFl6 zdxH6iO9Y#&oul}*_D-t1PtHB^by+*K7&;!KHQ-|aUgrS(+vvvNJ(Sb zcxGb%*daMrpvi9xW?!G%*>SW|+f_*Fe$yfC9NB=?-Kw;Hm5ThB<1#x?wbPzv&JH<> zJ?#TCMqK9yq06)|p(*xjm|HWTLnY46CA2+<6M7vw49zlsSerAn$;bP&%Na9fK~a*E z_u3T(5u3>TC+`Kw+Nstv-mrithzryR7vmZ3w4L&+AzbbNI7t?IrhAXo+aBIy+(FPB4opQ?aw%vdI=ET-bGV<9c-+u;JcuLh*NYuTEaKMC5V zdr2UXrG|(E(^3pUn0hdy&{tcBrzMo%v#2N9G%!n*nD=O+^XLmCEBJ9`VsM>yL@nDa z58yiA?zo4O&D}}@Utr@|6e-_FJwAT=l-3LiF3(hbj?asDDlsg`Cb~{37=e#Fs#}9W zC!ZVm=bemq3uZDM4qlSi{2f-;gNwX~Q?Ajb@EF%XH2JK10fix>QPzw$>%B5>yWQ4E z7kS#Z?o@8T+6SPTDWD6xoR@$1!_`!M&rR+UjxCKG= ztxkBjvNP>$)RI{p`6Jiu8Y?%`9T8{}Lf=2#nm{B?WR9=A?Y#c%_}z6anU+ia()@wr z+OG0}2t3rhG;HakB*oTnjKUau`D-Rtz3kp92e|lhataQaryt-SYx#+j3&DbPpXC#e zzZ6Pe(cp>7X@Nc??QX@$)yB-#(1b`uftL6A6p~fNNLpUopuKfr!pSBH@dNOZ!1)Zx z$Fr;`(wXu4-&PijSw}T40})f8`)9Bi4D`ry6GBZ=m}DWxK>?U*ji>@0OF_Di?+>vcKQ0M_Chk1{gdWluHcd zo3GEAhVO5fk=@u$@BKg-1#DdzjZ9P5s$<1FWC+Q~=0QN!70 z8Cca2S7Ml&0mjoq%Z1z0PYUvn(u;>j z2bC(`7j&4>!N~6e&2aL#D-CiL{5)vhjNG_;{ikv;aiD0#$;s60C%0vTUq1b2w}_gv z*jY9?(byc8FfGi_t(1|IpbM@#oUxgLknjg})z&uk6PmP(Uh$=jgxILSH@ua)S5$7Q zqWfJ3PSB3fPQ-QJchF2_23SXM!Z-wuY1FMxwtbcN0tDmT$s;8FBcG*ADL1zL^jTro zBI?<{;!wsY;{tZ;`?=Y=`A^xwhmAm&;+tAkFaamadBVs9T45S-bnSs7wCR>M9eT;- zrQlGuLuefx=FN~aE!ac@&+2YC=}esMxiyHhO(6?vXykgVNLeUOmF+7KcjNgbv|*@~ zSub*;?&>(A=uyMKQ0)Cu$|H!?J91KYD@!4kUr~yr4s+V#6p76&j_P&~HaA=ke9r!T z!((Uh%9VF$19G1nf)w=fqx&8W^U*F<=aHR!!y|JX z|Bp~Du>F5!0vU?D_w%n8^4@!hxCAmJDX=r_BLB`~sNJeTsC;YdG3e)Axb64D`XXXf zZrI8|?-*c8YQ!`FHjCRJG&&|e<1AfQ^7SE}F%_<%-)({+KS4ETa zx)LJ4n0L1fGH~}9xH;f{Qp7MkQPIuj5DJ9w2I&&ZglxL|0L0ZM>VZ?8h2~7tL%W_7 zxeL7L7l!YG1oIf6tSTfYDtQWlobr>HJxrD@v7%ZteSm6Zy0xOE{D{&CM5j{@pYb=`FSl~H z$&o$tFXoaK@Q>{4Ur@g&t|)E6eh*hi-5NrD|47jHw;qJ*#5rmwOuxgqa!b6zrX#MX zU_chVG&2D)&NWFoW_hcEwCi6`&rm(LGNb4`>$wPMGi`ETTj@}rF_SV#WR3oSn z+^?;OI~Z7#Zl20;GCqPD?qT)7{~a=LlyYH(zvuz9!j1J@n#4fS&F%2yn-u3Uig4}eh;{qj0pd06$^up()YdfLAOeS#$+_kwb2lll zT7!=Oh+3w#qaNIG0#ET2m15XLqk0riYd?nlU0q_AtW?Abkv5hLpFiQ%J z;FmCp0Cv$*>yYPrvK}6G<@;x5I@HIgMYjEzn|su`u3twpSf@_cO;rQjVN`COGsw3^ zl&puz>7%1wghFy5xkx*f)>~iz+|*E?`F;*h5(>9Y_zbU?j$jMi+BDXBe=;EEG$n9$MuNeg0O)|4^G&CNm!g{`OcET(16)5p0mw3%rr(cstv0mq`TM1psK2E z92=TO-WqXLL8XbC>xbG*$Fe3|efU+LGG00HthhG$ArKCOT>Y0~VBJ2vpEIFxF6nEA zbk(lWDh;ftc{K@`9b|xWzTpFDvf0GeH|c$w27vX8i=8@uDR4rw=t1t+E#9H? zNUMTC7c3a7EQo$8P|Xv5{myti)_N=>Qs?|qS4uQ?L6+>5n^RJpAZ7_1)?_Rck~ynq z1YvVv@uYg(ePVWHIiXKq8Lm;rbVSif19lcwu3mectEMJNb#4uNjzH`2W8f(Ds&v89%w{2~qB$F!xN{f^XlO32K{*isL^ee-f^=z~)-O|V zl2lTHMA@fGvjl`xp$NF}R zMj$(Rh{I-?RR1l8Gu!KQSf+{1AB&rG`O;F##+Fa8wXgy}Hcn7D;6zzVo5-0L-FynL zfFa*;#uI!vTdP6WlOi#rr_Jzo4iMEO6TFMWxIoaQ=YLTNS36p99yxH@$ z7at0<ht@)YK3yWTN zZe?d$+tF(73k6^=q5p9E9LH_cN`uJwyOexGL4%71p1m{NE0v&&Q2*@dqO{b6k|p>m2K<@*e(Zw}xDPz!jS z`@?$BA5i^a{~;e8dXhCUzCDFjEhEcsCdSVclulX{7TSK&9|@kX#q_3kr({e;=d96H zKgOsM<+i%#4$e3wXLPGP>IUbWrQa-_MpQX`oznu0Crf|zmxPAtUaNzLAICdShF$GXA`^P8YSW^TDT z37FoDr9GHzT*aLt4-bgzL{>*L$u@E|$3niy zI11G{OC1JMrBuT4ZYE4|?tS6=Bsrf7$X?k#t)>;BKczRPd?20P{`-2KN;*;YGiZrN7}X@1Y2OP-5rNq>@F_#>sb&_%JK(wYhrzH`Bo-YpYI^! zBnz#;^N6o#Pywx%&DF4yWHV{tB#i5?IIOZwpbAsS{D1zhTGEwR Zamg0~$~wEwF9ZG#9q|6Se1G88e+K^%PtX7W literal 0 HcmV?d00001 diff --git a/docs/img/domino/drag-r2.jpg b/docs/img/domino/drag-r2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1411bd02772a012bef2563ff31285f5dc912e518 GIT binary patch literal 31447 zcmdSB2UJsA*Dk#2Do6+EQEAepinQ41A~py;A|g#d1f&ImAiW7l5u}NLh!E+$Hvs{q z6Qm^co=^jX*}hA5;rDCE z0U*QoLz05 zpPrSKya*^>)7GQ>**mHH))xG&$vDItIUWO)P%itQOkTrX^CLC^Oee@@$X`&9od?L7 z$S9b|h;0CfbQe@)zm?yYK~7FV zML|J9MNLI2)F-HamJ_E={8mo=Ug&-+^gj#3KMRr62^pymN=ix^(jO!3Nm|DL&xJTi znwBC&BtS<&Mw(0%OaK@-#=i@{0Q^5*DTv?xe{!GS|Neb`0yBF?XkiiqebG?HTaIPx z3TM*MSE7Wze6x-oqU)siOgYjlK(47sgGGEH0*odOP__nY*b`hQx-pciFZI*XXw7j@ zVNUsxdXC**&10)*ktbRgyC2=DN2)(o4j!mtB|EG}t^4R;4PWGXW?V~M=W;4`I6eQV z)>!3>B%?OB;{y(BBOb;R8?vTs2bRuOXebA+f6o@@ZiSw-x`Npy0{)Lu?^O;+r4LPd zu~%rwOK_71>H6-r1_|(!JGi2~J!9IY6h~oHmXdVF9=EE>Duw%6ix zl|#(?+S71H8#j{`g@M~N)Au25&~h8Rpm$OXfflFVI(1>F?vew-p|tsfTI$o00sSWx zak3pQv3)Y{1IxB9HzV$0I!e(}C+rz81vfArVT+&3nFZANhc*ilg>S|^XuO2$<91(5 zclMsCoXli+_W|)$Dr5_njq-?rU3NUW0*=havey>Kmlu^&*E!c+{-7@&Y58)jQ$Zp& z9uPDAK*38 zNcTdhRN*>Zb8X>#&z zID)R&!7Lwt6;`b@4{A4R=&G5R&hfDEZA*rs|v*?&3@mP6%6<8a>R|VX9HH$~f-oRFQbxssL zh_^Gx-pu-5@tM+EOYtnzR9OKE#E8F%Mfj_6z~n+&YJPY>G`Cd{x=mmmuDhu#QTG6b zG%FTYOO6-V%SM4aro;I}u_I|&t5fHL_xhe6&xv7(K%2L;Wpu`qy0n>ym6<@KziML# zv~0TBnand0e#$eo?KIVHdVhIxp=8n7Qs#_TlV;ahTAdiI#6BRNm(`XxmWS_kKT4l+ zgG6hM$tYd?V$C0LVvY%#0X_-Uh(K+wQ>#3BXs+j8_+-Vx)4*ALf(T?M3V+xlZ1Oxu zWP%@!Hbm?_!+B9x%|ROyn9g56>ackJh|h0fy2W4IGimPw?hFdVYVLX66w4WX*z%~d zHcnYLe{EeTenZpMgH_L&;w`2`D-T(-84BiBU)>?hMBkiECsO-! z@iTtXF-GCDMvdi)#Wk|mR1yPZSld2ih-rLXPxnZfFUQrnqlf@9301$@!KlD*E^fG&p)&ilu%=zhMwnomki_8>ezb_1-!q->8us@mKLN7d35I zdrOZ$IqoCKj!WDSVKo^oo6KfQhYL}lfSD2*2X+?II;EhmOrO2(at{i`-ssDwZ5Vsy zed%nB)~MQLPkt1ld6Ulbt#QAr>nCHAiKQAPN1vnd6v3^`D=u*~)2>?|dbofMn$G?p z229J*tN_=_4^Le^4WHDihlv_7Rxhn>4d`kULaClR(Uf08Ep|GS;cqt*`Z1qOx~oI_ zA&uY4Rh4KWcSKYt%kC)nP(5^LsskNevRB${%HYQ7qa|}wENZdWwj6cuIh^BewG`MA z58$|ZVD#?8-OhmVC(l2qTq=a=;AFA=&5bAA;au9YM1UTLxK+Isw=poneXKYpx(=7= zQ@k_t;Q4&~bDwWC?>tNQ8gUXc217zvHzXm|`F*u+Y7(ZeeDD6bip73*M^4IDWTyki zM&YyIm_}MWg-y1(XZ-yH0W77TdsKOSPVeL#cKvjZx2?whiK)W0qX+k)h(@9R~p4vtZlJz(P#xJzOVy{Q?f6uI%i@AK;Nl zJypEs<9_|((={^l)DRa4fe-JBxzx5gn};*xeV`bsw*X!8PE4gew{+LE$M()Pp-UbM zZ-Iu~hTX+kVi7abk@7+%Si$MgDODfil5xTM_K7-(i2SEpj2gA~rCv{jzYXXeI)uM0 zM}$F762Lg;nU307P3*=CwD0+a6&4RW2j>rVy$|m|By4!3>m<2BGES6Ma%Afk7mjoM z7(F%H265N#4(HVji;v%3P^P7=zv}p=52%V85n*P@I9Te5-p!tEj3EMaemwRHHQ+Er z5t6X=!#t-xI>YP3&9eeA(gOpWI#M)=ojUjioQNy7wG~vl-v*9kPX|Zb7*dffT!q9h zZH<}g@~*x5QS3e>_?kOyiy=Wl#pgbv+hIquW4eqzVFnRQ;Kd5|r65BhRE+7w-Y>l( z+e_`K=09D26)b*dQAX>>`DZ@lWGh@A?L;8B2GP2#COU=-7{-gx>B%L{SG0w`RX83K zZNl95AD`t!S5Jq)%NSZ3!%gZAX##ic-VZsVOamRgtlceTn-*!s?MUz^U{jhY>cfTe zL2I^5`kJR)IaID_=0ZcG<*zqk*8MCO|VWfvD;_tO{x3+S#2AHw~+k zEiA100)4>eG1krd^@`ag)q?MMckDLGxivLSvb9~QEc^CIrbDVOx2|&P>bT>Obp@<#AR9fxWV0I+zHQ?iNc}+JW(cN=tX8_Z+Av?3h$#_CX3GD=b+> zHfZz4Ncn!`2sb#ea>RlUtCoQB?8x!>bY=88g#BLsx!?a^8J9lVXVNWgO`naUf+ZGjhnIN?JRYANOdZGE9jeB-%Vkss=mwH!X|aE;}CW< z5t76E##1`Z9z80aH)zQ`Vx&u=GTg(?#_x`{b03L;k)0@~Y*hPp(I>oeQIDU*edUIf z+#hXOng-jKo>0WOM(>dAT#8YG zw)dy)kFXZy&|o+dL2SvpemY3)f+rEs8O_8d=FQfNqIK=cBp^M5a<%H!M&gbO4)>!W z-1{}H@!~;mJjkGxHNHD0IgwmJ6)4^ov7+4zW0vU#{-VC2UovTT?x+K-f1|>Q1ApZu zMC}8{-u|AMCodqP{rFbkWf^Vt{I{_dsk2Z9+YwEpku}HlT+Dj!TW>ubMaF&x6V@FA~@x!J5b2m0)~TCPVfT;w>D-c$`lJmU3`bMBt=p2EptO z5jYXDu1^HirCLEaogYMC3+=BK{jR}<2)w+$mq!HNC81*Qf==)Q7tNo&f{$4n>EI|a zS0X^NN)MI74>}1<49E;ZI5ZfHNvI|Q^X(u^sNWC~*tv_BCiPO-PXr_{P%N#8ErM`2 z!1nM%5kC$h&|?cE$5Ez5rOZzm`(!o?{`|T=*&jwh66Ciq7{pCSgDt@HFYCT zo&Bq;!>5-0o4g9q36@^KGXFW3@n9GhkQKAMAd_E1>Q> zb1c7;CW0T=AODnghwCius{^AU>()!Wt|{|)$0J2NCoTeQh=oVgDM`s2Te5HKBwbQK zquKiStF#28gs-Nv8$3`IqxcKiLkPdfE{q1Pt)kxeF^%ITa4slSc3hpzF&39;2w$`JP`1aykBokNDG+7Gqp2a$XsFC2ut~VufQ%x+ufCt zePkk;Q*u+gs`|}%$-oy@)P1+*iC;!C`;Sq8{PW2G-iu)>WGL_%`$P76G&hqw9F-be zJ2u@kkNyf?gp>FniS`VK)K>F zR_tH%N6OP<&m6~#a`fwIJJb&`M(m8MOz4+QTTESd6yBc+SnxU*6k^3C!dnoEYd<=N z7sf^8dKTOnl%us(xr65R0#ZyPlGo;H$IFG%kqQ@;5%?Wx+31cM#IMsQ1M}-}ldlr` z2mluPLWK=A9cE%;Z<6t{Y*kmQXx+(4LNHp(QtSOzUZtlsQ?&);?8TMxEUTkSFy=v+ z7{XI@uqCnUoo9-R`v;SxgaNxDD(0v@9mOW)SNW6MzH2Ehe>DKVtcoE_$XYK(x2fTL zB^Dm6a(U0!3CZa=u;)-)TM*CMQ~rIfDn;X!)r-1{@B1$1`@s#IIPMk;)}*FG_IMr> z)?Q_8yK{DEcv-R6Rp!Jmj~{BOxcW=CdpZBI{=mPi=iAhXAR_SCIeN3-yk<4ujkz6#}vIXYofgQ%IrtG zvx|`Qg0?eH0j+Ly+>|A3%g)e5D1kAcM?XN>lb^EU z%e&j5)ep5>{AIM&%Cmx&{|Ygre(4?Ndaijp_z%oQU!WK1VHe~1bKCOIGd8smPJoV*KiEmi0LTxY^P4^d--J*Cj zs5v7zgY9lR%r#pAUt=sZ;+dN=u<#?DA;IMd_N)K?sae(4fWI|mR@`F7k7fu#GrhV^ zT$&;0E55sjY&%;SKbAG?Ap)v`)iRuu9g5FieO|L?ASd)S(lqd_PGb^0A=lhTZaAKr znwpwifiQJUd+{gk*`>&<4xb8~s-DJOCj$Q?-qNgqO5jcAP{SmnVU`0Ml3|BHJe8$#mAah8rVB-(*YK4EtuEMH47MXcBKPF<4sES03Bq(Hig z6<8pmUoL}z2-uKD5>8v58pDKBzJAeuU;04G$HU$AgE>i?R0-E5+8Kby>CPal;lIMf zFW8vLU>Jz%4}5?f9&P)uGfe6^ZfihUxXw<%bvuSM3Qf^I#$d!|er8Pk$kAZTnFq(< z8ia9%Wu)|15%2yw+Vq?1xJx81 zZ~6#l@S}i<;7s#>tXiDUT)`r+IUm=93Xj1>=`A>kCDzCZ@BrdC#%$ReCrIL|V|^zH#ScDeDy(KXhb# ze7sp_oBj*%DT3$*)Nrud8Zr?|Q37#&c=;p^`7fMSl0`Sf8_X{4%=#)bszczyR;fNEIrSKA-0M zPIM2MUyUGMDJ@y*ycgt^*r#WN_19@x6T#hYx0E1{sKpi;eqAt)|A!{rhyNFiO*Wo% zmgA31v?SxR2h-UNUNWfpX|4jkg6dwNCMd^hBO`&SEI_ZGpb zphpkF=aVl;bZ8%@>~;ee-i7~1B@X_(4c{@kdkkthl-~@MKl-xfwyq|Wg7z{J)#Rcp zl~y4|7oU8$&_ppr`B7mDgR?&_$N1P-=Bn?7Ik}6`7OGL4?Aqp!TTmR)!knCXS#-Hy zryu8WbOkSk%gXP{Ci&qhy5(hOu$}I9E_QpV=O@&LpExm5VyK&BG^?-j3&*}G%!sni$Nc0X>S+~GvH zkT`zT4-|_Gg_Ngnqhys_x09XTB%$8gdv3C@7O1PVjE-x|^Nl4AZMpdoWJb&gW{2QV@E zb}x_0>)9K_lt{1YTeC+p+k^lQ@Bs;PibMc6sePRA z9KIkza%WzVK87Fk`kf~XlJr@ewqyUX2FW18r5sW;Ba_0)q4daU&suGB7K3TAhW=cli3#;KN-+@Oo# zS; zv*bpv1M(=-zszB*Gayo~W1{ zc;bD5o0aCmvkObo9xCqSR(Ul*bF$VFs;+sD$b0`jtKl%EAvNgi4V_zaZ zjM|fxY_D-oZ~B-@61)W>0^He0LZkxhaj_@&(Xr5106 z0^dBNb~|Oby_xp;evY&O=O1zJc^8(d$tdTePaCQ4fAAE<&G%)9Hx$=L<-T!&beIa$uxk3M@ zjh8O(Aecu9=W#iBJzta*CP=N}tGCvCw&8c; zU?m`fBJk`Kbl>9ButN-PKk0b1K;VD~?2%Sz`~#c;5qQ`aPXyiH0z%E0 z0z!M5FcHwULcFv6bL5$VIP@=EMUcMgm_hFu)1FbPAF%_ceZOUa7;`dq9I7jHcJQIw zKNWuBM+fWEGhpchpV1N4WLB~R{g0;8q$PD7S2=_5Pd__cXNl5-G$oyztFKPqk!<8I zQKw|-i4#cSKby#=eEVZm!CQzAi5O7}!7id9tp{q1wJ$N1A@W%tdgtn{>-!wp7Ot;7 zo}EkT<^Brw^t!P}-KbHKfBCG<@~x&m5*a<8(RvKmA-Q~wHu^6cI4crCXJ9II#qn(l zx$Mz;B63km4yF}V;ydYT06Mb zk9R=k>mB2gYa5C@k9HB+JJ3|E(#ie}(%4;P+a>~QT|_{D7Jk?D3X z>U_T{TK|~ZU!F$C#P)0?5ipTag`0?z&v@URx%Z6JbU#Vf?b+Jw$Au+?NQJ0C9y_}o z7@Ktn#IZAQ(nXfs__EU7#Axt>rrs+jW^E9o)2TaL7KkdZ3o(mUbmrSzSU5x*p&YRY zq?2{rQ>KPT7nK3?BS(7L)!x4ubFh(;!F?{f;Ia-mG?3FZf zQ(I(E5?)+xD1Aii1!LUUa+s`6X;7@caMy!ck7pV#Mh(oz2m;8n6%`WYCp2a^ifNxQ zE_zsBoWQS}XZqNh`3CLUqt^vb1cRTyor%0nGvWs6_Y*kIbeoiFXY%B4@#7xCTcH_> z+dr6@*uA4V5-gi`S%^a$Wt(>fnNOtB05hYGm`ZeBNQ1a7gsUMm-o(tvtjpTR+AZC@ zH)qI5f;E3N;KbR4vjHbr$;Ow6z$$`Nt4V4~-7z{LESm@vHInM$&zW1Hbel|_^}kfv zC8h0ipEqH|w-#eI0nT^Ep@#Kb-;)Hp6l~dQiQpfz51Y(@B1f70ufY zU49fz`T_5U<~ic;)Yc?tm~}VVVA$3RhkPD4K!QX>-?(2A#O6uiPgWjgsJF8`{VIB) z{=pC&!3CH2ikWJz>d1fpD#1HI84)^tIve}-xd;2cV;z5ig{`H@3VaZLwm?K~oR2Lr+J7 z>0w~3tUlH-u&_D5^s2bhTLq_5rB04+g`qBqbfDK)ct)Rlf&{nM2vg;Q2)QrD<7`q{ z7rUSPbITl_unNK_>Br96RHYcq>vQX9e=Kgu_*#}k&_jIP zv@h?(`}7c2hf@kkb{pRo5vbYSv5ABcfg=HU*Jqe05h%9NCISOSt4k>d6MI^}=odNQ zjymhVS2E)8zf{Q|2~N3uL;$+1|Bu5JkUZgQ#+H^;F-c(vN08`iA^^9@D)7{UnWEN} zhIhGp6>sDVt!hU~-fF25aOYX%t3q5nPVU%z2|;=FhvLnoiGa(|;l4T7#>A1*ltO~X zdC||pudXz+^+dE@E5D@ku?}jb!x2%W&7&_zm1t)Bk4!eVh zIf6T55O*~`h4bM~tDW>fvc*49JE5d=)QIKlB#me- zycn~F>Js*fKB||ABrQGnaLKH%&Xiq0-?Egxk75o#o4yYO~v#!kmaf{cv;BL!9X zzlVhTQR6MJkVtn1TKk5xnEhzCV&>u+ zLV?(j4j?8{SPF5=mmV6*RbydlH$Vgs0aHk}%BWnxMIcp|ZH1a_4lKop7m(25 zl-jr*$(w@^4q3_4NJv`vPb95UGGvdu5#=Mw^FF@#t||Z!%4@GsvEKQwRHnFRSK8VD*xaQ%UQ5NwZ><1;%>I?(C-oFYmpubdndD%i56w@G%cNi)%MTr-C zy0#?NW|{h{75FUoa0W19JBj>MmtwCKv~-CMo+C@P^bU%w#2@E_}D&nvY7_T8eq| z)g?hz++3#^R(3G&fk!qr9lTK^_asrG+%R)2#JDaJW7w^@1{aDEDwlv;gI_+pk#?8rjwl0KZ24-WTmQhT&MKQ&+^AWp*^T;eDN4#b;{pY{>8i>Yqt(n_U5TxV^|KaiqI=dFr{{ndAI-%w1blLKdU57tV>R@;VHFXG@q$l!m;PbE*IR}DG~9m$*1v1GkNl5ldzsL~gBPpm zKc<_KAOb@Hg!(-_v6~4Is_$>Q4!t8e%${e~NKh{NoQg(*+*wcZbyb9d^6Uh>~`50U+tex zv-~`LfpnTc`Z8b*e`retw&xj+x4^ zEC)Z>BK;@`Me3OvvL_Tn1k^}?HqDQ}Kxq3;LYQvQ(HuO;{`(asb-IgZqbZHPxt+QQ z)=B*DCD)SkkFue_d`_&kHIi@F*p=&V{m@NMziXYU_5)I#)rE)xJ7h2^M7%fZ8SDzc z;#RnBFm_zor8}T0Mr5FM_$E$$kUH?XKpFz_6iq~|;hw>9Eh!pnPtrL}52u&hrc0vU zCRv;wQL%fLYJEXn2R!5vpS%VnkkG2JftE+^9GH zI10jUN*y6?GGve(_Lu_&h9Kyk_%irVm)DmCs_fSV+Ij*y)wpIn0N+VeIu!Chb>ZEA zR4{S$-)Gylek6!RZZmND^loDqSNUsXP*E6T!uoo*c!_m@f9|!9^!m@J0B>`)t*P>( z8`C%&R7RNJ1stLof^>`yIJ<=+<-x7!W~-zd@_P!$$&2bNeidu(eV!uKq3KNJ8Pb`q zjKNPa58y95jo$5&n9o`1qSNZ<=Qr-Ta6rDr5%BEd1&SXcA&sMlkp4#c2CxZkx}BZC zID(hW$7$XjTI0SyXf<@hsJ=8R_R%A0*&t8+*nZoQ8TH&#mz|TFyM)(d+j+lE{Lwy8 zNhdz87IDN`XG#R}sg8X?{}s&spRk`Ptsq)#Mj!c@6R(FgZLv6A(V8UvcNyj@+lZIY6=wcc;(Y{%BsDcxWZ~mnyS;WAxvJi!JLj{=xE#{_2kGko_c6V*kuwIV?Jl^Aq#}QIO*ZC4E9% zC-la*KRd28#6PwLWGCompRX8bij1DxJxD{ZD|h%Y;JVJdsI$iIxAF0OoBs~E9k8`z z{L#X^ZmU(C>5Jv5;7Kd(6PG0xqMN?aEElqEA=cuHa4}{WNEp1b&cI{VQyg~P%Q0{D zlFNJOlM=103*W`qT-_>(NOeT5wh5xJdgqGvPiz3(dDgMBg2Hu9z4KH2Ao)_tb z-!;*mMPVBBuFwmF@5s8tuZN(PN#1_sZcixwhdUEX0DR`GENI`Mvk&_$E5xrK?X8K| zcdmuUE?aXL4Yvv2Lz(p8rH7A?U|+%hYOF(bE@-jMC_l-&y-)kuljA71I^W7=t5qQz zTBA?Lp(ULY%gQwlppb?o#U-gk2Jt&jaw zZ;4xo8m#Di+J`4OC+VNJRA+_Qz52uXH28xCl{c9ssLm4s_azDvq4DQAGTJx&XHffP zoEiSe6GQZak+o>*d@C%i7N+ocBO`ao#I1p=W~-dm$cg%cyWVqEwLL+vt>VJMVrI2( zMh71~PdV(!G=gEiXatXEExI!de`yx$jo zbDE-O7AemAtG&e0%yPo4>}D@Zq;u{_2)9Alxl~5JhoVxt#T;w|rMF5tm3vQE&m6!Y zjgVj%O-zS0&J_~ht_F%|F_f{t>E_5Md-8}rrQ+M!D16K-Su^p02hUwn_LY8%p8cjI z?=AmSmA7g7;J@fMsG?sFXiI0KaqThInehJ1?vC$&m52X#zFvJIUhnZlTqk*EO?CLd znX0sk_YX>0#^`hFM5UtBd0yAIrTAqgT8Bo@+^`VtbcuGcxMh`Q`MF5C?beS8xXz*< zjc6PCe$KTsmAjLChpf9o@B8hsB7RD^bC!lBDW>D88||z&``~mgxDkoXb~u-RC20c# zdxhS2L=p4k#PP6j3W~(KUc6=YdSQ{kI)A_$5sVF289vuV5XwF%o#rzLmG z9PP@W%_Nz}vDlb4Fq2b~Z*&+T=&`=OwEvGQYuBtY_M&V&kM=^gV8Z!E`nmN9bJ*lW zgOtmR?7S6m&on8bbL&ZD_)CTW0-s&YlWwZ@Zr|LweObM21$~$e#h9UyccXdxV4U8y zl!knS*LuwoVn4+oXZNP~tkB!V+$gZSbLq-jgJ!Y+g{B(+tVrY5@n#$X}}+6TuR!Q!M%6wn`E8pObqjl3X}5a7%wz!{!Y*+ zpWzT!Gs<5~N9eKJ@zU$_*wB_Z8EvrfXdfv*Lkx-A*QqWYhB;>95GK)v3AaLxtxO$1 z0vCvY^n;@kO^&mWJJ?P1tpZFu3KZIKZmHsA{<;4}g zPIVZ>@krM}A>(x2_1QulN18T|L=SJjv^?XbGqQ<^B(^@;w3&_+n!X_&5@&?Da#UL{ zrrv1V3vaUOX6mxv@|DLKi(R_LN*1B1ph1fU25$kWvSa{j_+r9LD4;XZo z_sOfNtC!Gr{WA$Y`L~|pW-S9%t8*mET#b|#LwEG)##7hrmnF{iPr1G@qnzeZ)HI5SL>Mm?f*OhS$%kFT_J>MHl&czqUp@JB zOZ`dSzFqmVtEzf-Km5KaZHDJ4!JGR}PC1m!1$sTt9qE?c=%?ib`8j?Sg3s!g6kniMZ8g1tiW4@U<%0Do4pNSAe}<+_OFxiw6$Yh$=>UfePC z-rS^8a@WyK9lu_fv@dLWsfJHFm(Z6%<(8U^c%mX~!}?9p4W`W4nst!8gRsAU<92uz z#f!UlAsI>~866wmLm%$tVQ$2QU-;BZGV}kX<$m!7uYkaWKYM-+&woXdUo-;zkDwtU zgm8J;Bq^vtei_cp|E&ysrnzwWsqIBOWMOZ8;gD?3k2vSDOF+S!uU{gS0>Ywm8)guF z)5z2&KADjQ0aF~JMU7jZOV5!ZxO+lf?%}o$mqlGoW1+i!mBPtd+HJIyy?pO&3ch3q%4H|Ot90qKx{MN+$xm$Qz2f&6%C~& zCCEtUnOuKbnXYBWht_>#u6FMY?DL1q$UIetO&X1qtGdFZVGBo&6en#^fJsUEFk|N=1srOzB4ag7Gu*E;o#%UF|IoI z{7FBDZrD0TtGO%aTSiAcc`_;dnofe5OKTCi<4e=j8Zmk2q6a$L)I(+I@*q#QnLa5+u{&m~O4$9mDz$6E5I{)?Wn zEYDe|ib0RrpLu?^$3uESjJ?;L&peDnA}RIVFL*A4sYC> zfMP#iC(?6ce@oYpFdK+zY+XfgFYWuO99lUG#duVGX&T+iz~=D$&z zq=9{ZR})PuWSdUwCbdu#T@d}EezjjN>K+=>;UG7l#+!%SxgGb2B7c0{DUcH}X+o?7eRn#$i zZCB(}YQGh~=~3C;*}q_5ZzbwFoI1Zuxz15EVv67J@(6A?iI>DQaBE?P-|990z@9Ya zZ-Bb=dcRf}+4~;qh&FoVW^g;xk33z?qO3Mpti*j$ezR0b(fjGZ2{Q(ri>e~FpVpi> zSReewz7DoOEc}(;f7(F6{~3q+d&COyl?a5_u=V?^DLd30;F8eQyBRFYBhhlb47YU0 z3&kahHu>0K42g_`HSO_n*VBg&A^e|g2QIfD1P;ld>C;>=>s&i9s~ag{N{t0qXg4$N z-gtWaLxAIXQE`pA|_0QKHs3_ zvFVn+NcqaBV(l3R{qW{L;&JyF;E0{P!JqG$3~69%_@9^t(vnUf0{>dmQmacVzZiyp zc;#$8ZSD|1R`xCS zQk(NBPrcp-gg5(AOd=)Kob$WzZN2O51bOTlza}O)$@RU-kBV`5+St#Ze7mL0(ubj_ zHYLR)NR*1Tt0R5Rr!UrFlICpLA@3aNpUJ3-jTF_n%=B>d^cRhFGSkBc_`l#~p~Ar} zv)f8{3KUQ$9XSfkVAHyV>sCuxqaXCcMJXYjFo!SfQT0|BS? zors7^#LNzDTyA%n;`vs-8N(-4Rg259WUI)?lnMGAtS=)J|AB6HlW)$K%EtiF%*(j0 z2Z0PBC?ALxn3Y7*2a?x=TKQPMtAHLokuy?Bo`1Ne-DS0+z2Xp14>OdxUyWIL%`f?Q zDAmK%Pbm#&Z}ddmzQ%phSV!d}_D)p#p|`C&dtzdR;dc|{jnA#jK{pIevC{jK-CKuV z)ls@T`u&n`zo02W@zevYx{e5Uvn@hd!|h|v~!8Ki{> zaNq{f{L^^JA-UQ#-*uR>JmJj;*=)gzkWXnkBerw;HKkf{>Sw{8RQw5<48<^cgr}>y zxel0Kk2PZ)5@&2K?oioX9V(`Dv1!pkWS^32d6mRO_;a0}iplk?*tQs2cL%XJc<<~Z z2aL%j)}>64Q>+z{mwH3j--QTlwIvZm56S<*0$TngsSkg9|4CJovNrxRMg!>smo)w- zRn7a$fc%Hk_&GK%-Du>z{vi1_lxFh4n5 zXMR6QDp2`TS*U)J7vf9R6(t$d7_{?VK{jn*0>u421iAFoO8R?F_K3diu38+HArjAz z(tTcor7v`R5E~5}2Na2bM?St_K86>jZfkvrxI#DNsZ&bPAiK&KWMt!?afY*XI(aFrF* zluR+0a?r=0cB=eb5^7pGv7{j)!L4#XJv~Xa4-_r*S}+-67Otd$Z15N%0+S|(W3y$T zacGm+qd44LFWT22;Ej{yqbHA^t5Z_l8H4E3kaiepa2h(})FsU0bJu(Ey*^`A&}IX$T*F3kk)##v37TBIn&1`G-SJQGH3g~ONG()5M-PU171HpGf9iq?9heN zFR77LVxf_4Xf7q#>7;F7twC=zNRBj<>dUtajuqS<^3T2bIct4SM-HBRUO717U1Kq1g0Y>m4gN7dz~7A zV@OWop^t<~`$h`(?(x|dJ({x2#(ZbOl5)wKIeUGeQFyC&IGWZ=1hLWitwM1ti6E+U zO%(y#>_m?zq8H|h0ryP$ig5f4h!bw7i#P$Z6>(<7dIT+gpU^BWFVmhHk+>z7kmWAS z(#7K#Y}GL{hOXrm-h0uANotu6^OL|*H+rE#jGm&M>&`hVDUVF_hPbm|H8&_~Gw^KG zUJq8uXC>Qr|J~ca)J(XDYoF;4Z{WarV66;FNt@s02R0?M5|QJMCLR0wTP8+;g+N%G z;klTvr+h}Mahqt;4l=HZi65Q~yKC9Cf>t8VsvSDfXx3+7rYn5bM`EPRZ!uExd~s@f zRo&RyZ{8~iL`fFHg;UWC>?@b@j*1&!(;RP`gIiTe z`#EE%TO~8QDsk?6<+;q#Wj*BOfT~pKx(bwin=9A(oDm&lkTkI-_!) zLtj&Is(O$1J%R}yXX)v)zs&)A<<9r8?N8kB;hbjeBon!QYRC%YE|=Xaybm=b?XsS! zg@R!E&LLBDb#;#X8rKeo*9Oa01}?T-d2{fLL8$%+wNWueULAsJ43`+_ddU4m@(#JW zk~fc={W^gacDWemfYNd^;_k0aeuI!?kWy{k+URUl70bjrzbwl*vs_|#_{sdT_L&fk zwflSWqbZNvFq^5u;USXPaecgnZ^rc`(Y}?@7)cjlHRp1NDtJgw&FYHRtxvsJ%}%?_ zCJ#?|>srZ=3N>S!mHhM>4@Bm>DE>%IuAtih;C*Tq&xYa!H*<;FvEPKo_403R=s%Yq zeIM?#kgonBO;HIKbTqyyB_%jlA>X%s%3bKgH+W5O?DocV>W|_Vx-Zn(c7!AUoq7GA z+W!{}>)-!>6ygQ!UOw&;I9g zhjuy>MIt4QHQ&&NF_paMDF8H8)iq~8A$$xt%lCSjp8Wb;jxV?6TW@^&sxo45ZTy-3 zIVz9)WY~;AuysBue+1DvrHE5^wa&Aj5@MIO>+vk@KmCbO{2KalFupSRi%!W_HDAIz z_~S{_PADd0rIBMd)#n(}=`C(TE%(KQ+d=sPZ1En)tJ1oLM;jAw-&$Tl6fI>kB*SPh zaI~BsctGte-V7_4TkC`JRhZQq80emyF`G<=9lhC)yIJ%(i0o;m>;qTm&60vgpJ_m- zyT4odZ%KR4p3<}wyix>V;{OOMsx>_Q5C5HH>mUAa+4_s_llBu=2At&!wO&?~1kFG5 zY`U6)qq(Xd*2sxi2bbeDRddp72BunVL&=RN_|Y_ralMiL4r6fp{xhA zI@Xo2MMuFyRD;FaRk(&W)?e^7oSq5PwM!iE-%{hs?AZWn7z5&i@ZZ`@Mhud*?s;?*BU4_LpoXtbYroinL*QJJg7tn;TEWlTWPGj89s5UNnuW^HUo*9#v# zDC1hgc=%)H6DtiOS=-LbGY-R3t9_h#j4yH{t;{?7bn}9=TuF5a3 z0Laq8CFu8VFZ@S9{D+k9_bK3?-ydOok}=n83~#O(aZlYOI6Mlc-C{OWHmJgHY^zhkNk6kWR|R4zAf#*5v0A;5}Lt{Q-$a~Nqc0$xkY zG>`x+3{`XhtWUg7baxT&K*n>KpOgBeyAlP7+!2kyyE;~XHoN}JM+E-K1ho?&@mBls z)fKOcpK}UZcM>au(ttfu>o8U@8mYqhLZ$|iysmvbtv~3g_dQ$U3T~@s%7+zUjwXpM z0_lVL1!VE-=t(GNjr=vDkRlxEv6oggu;s8GnA|Kx?nKbtq?7q{PXsl8xb@1j+-W1+aP>|-T?A>47>i?|q!OR0NN8fKa9}W^b(}}( zNjWlCldaFGuw7blQ8D?tm6xX>i0bSB@;a$_tvZ-udS zp{g>O>zw(*MGKABFVU_E92SOx$?&Xy!=prfjC<{l{N1C8G4ts+6ASzN>N>0+6Mb8G zl1SQY3S8LT1cM~)MYrcton6e=eU=D53Xi~MmL<+eKShD?qqzRJvyWB-o*Ty+164^! zMlb&a!0xg6#}jv?w`Or-jEUFGo343^ZSX9n1hiot!aFM`?o%p@Y`8sBSA46MknCAqsmKQ?y6U!| z8fNK3VHvZy3JQ>Xnfg@D&nR<_8D8NiSi3?;Oy_eVi7h`Gwe7jM13yM7Ovfz^*j-^; zil$fPL%@i3NqAtp0T<-aE3jB=1LOX~OPc+M29kT-h{u)8DaOiDW08##C|L|N467rp zr&VKytZerPITo-1eM`M&vxSSH$nq_j5Q77DYy48ggUP(k{inXd+`+7x&bfxm$T+{g zAZTc7FK=Ip@!`~h7;?i>%zmrMW_O56RjK4c~=r4 zo_IbNX;?=Z3#Xc7{K|5f)Z`*$u|Ay#48q!`^0OHclL}l>r7H{^$#06?S!_1MF5fH- zv?zp#;+RU$q|WJv3X5)a%E$y|bAZ~?nVO_#dmP?s)q-R|%|d1N5P#YSIs%_J^{(|V z9V8cOsrF|VQR*+E9b9svkI9tb}&Y#FF&)VPdX+sv%G(=3~fwtKK-j^l0=mkCo5Y zovR&KhZjYwpQkq#mX)Yhv}h>4_6gVV;1$Z!^$(1K-?Iz;)cL=(800U6`D}_ZcA!-3`Fy6j!d3hmX=R!|A(}8!yDY)CSQddtUp~Z^U?vN0>15wZ7XUJ*XG{1 zlP`c>nO~ZsD7IQ!*|)8h(@e(9)7omK5T@tu4jwztMP8lQym`(`_UMc+n@#@QzzewC z;k(P^^I&iN@vSCfP###6%<3+_^I1oPF=~-XmJEWi&q# z`?b}y_Ui=E$_y`y;(nEv$j7Q=&fU8=avh*Dk->cBhqdEgBtZk7XiSuBcvV|Dp^Sc^ zWGtpGcf>p@b?1O~21t8)D%J}mD){Xq zliz;ouKP#kC7V;Tl)B*K5*>mp;IvNL5tGM6=7+FP^Fc~xAMs*@h-4OMZ;u3t#+5=M zS(fwO^(v->j_ICIBHK`qy$lgo*IBrc%`4ud=gh}!ktwH2+z80|igcl#x+fQNzSYrD z4!~3|0{15_lsV6E!*N?LMH{0ft{r4_o}BAXV3owM$;% z7t>US%JqohYtd?3)l`-<-&;Lz-6QWiNOjk3`X*FBUrA@*NZZ&t?R^ z|J=X2#_xB;)sksr?cthUt@1JPv(2>+j>p)uo;eqfv^Py+r`kT;UC+cd5P$+7%bVYV z@$0#Et`#kWt1HXT=xXf^@0P?1O)V_}#=^o6WWi4qfgG@y9k&4<8zNHQT7a-*$f7lO zTQg3)TK7oSS!%J0i&rfwbx)fOvm4S4q%)DJT4xb#C?XPw*UFzeVSIB0tv4bx`U$u! zQ+f4T=24yCoL603eu9^;2fuS{rhqvVa;feuWio;yvFZJXhl`R}{Zp};deorjO!kvw z$3?k{Of8ebGVJvf-z-VKlhbB2+~*Tut|;(1*Z0v(tfnWP!#s%&@fF5FgyB6vRIUoD z;ngSjN$Rk*TUZ921)V2dT)jG*BpN=m>ORqUwvX!^hMe;2bVB2;Gt5H+U7#q--KZUD z$H8w{cX7!MZ$2ky+t@9cx%f%$vRjUJd}pKT{od}{32ZfPTUB6itnDA~?$sOZ=3egY z?OTsQ-op!ST=B`dx8{2x3vAieFPqZs*z^I_w1Ik*a_+;534|9HiA~PW)GSb+JBVgQ z8;h)D)YO^8DNR*zGmD;!k4f5KCD9RmM1Uu3N9lF6Qln~Qx9h+dP z{Yhq~g!0_n|3`?;KXd)xfbRTbq928N@?Bja1oJ~^_wDySg32VN96TVs^j?}6o=?0f zw|-wEpm;j1mJlD^zwXZ#^L%t%0&jrLO%Jh6)ETyq-f=WqABKN%&+ccetb6!`e+>zd z!iUeMhg{Ab6&}mQ`@Srb|ByO7V-?;*=IR7ckI=${a_@P_l$58~OMoH*&&@Y@`#o$R zD4Smrp?~LT@oqgAhb(D2DA)~vhgZOYn*!$?^wRm=Kn_l#eFTOx3s0F9ZPdTAD)5=A zRA44lm{(5{tJ*4bMZ!!TjFz@eJ#jj89#9PPQ$6&jWRt^k1}zMdX45%7<|7Ekv4^Pz z)({FgJF0Ykh{S*!ztg~Wq*AZsK_@%om9H0iCHK8=kQ5qVV$y{jd?$7l24V$Io*fS2x#WNJU`t7_c5E1xAoV=D zq8F?q4~VebM=m}8*-`xS2+7|=Pky`huS8M)6kqC}n6y-Mc)syC@K#ryOkL#c09{5i zo@TkV>%zluvZYRd=OA<^7$^YlG7CFYIx+jD>CkIWuR4FzvR$7scIyzm&3ppe*e5nC zf$Z(G`K+IbwYC^Lma^^82!FevIUwpRD#uUjwkT%OK8F`HY*{&lH>i8uw9$xC$*<$G z#c@Glb3SDA-F0*l!K!xd$ zHkcYkB;Mf)E=#JC_U76n7ii zbcxGPrsV34GqGpbHz5l?(g>DS#1S|@$(y$AGAbz}fr_2HX}B$S{E!hFE8;qEYcBeP^v} zp4NgTa*xJ2ik~sQviC;!nE}U+L98_NrNS-FKHOJivcTEo>uo;p(e&Hqqs*_u^sKVF zCY|>O=~8|))Ex(B^$G@X_YF#{x?G^5FO-bNPj;M1mRnTV*?X)@pXp=T&1m8!-?9SK zA3Vb7_&jFkCYd~PyC#7^uzz-}(|QV{wks{(gr91!Y;PTEad417$`Yr(dBSm3`|YbT zGq(s@1L>DoWe+qmReYuj?8#rmxlaMDUzV0j?uounqR>8~?V&GhV zhYDBduCxa=Q-qS{mL?LBgNMV-x(5~3larfM-?ksJ!YsU3Xts0-dQb!?r3#mQGnrrI zpvxaOQQ%hQVO}Sq=q1Gy2v}^YYK=F*k6=p>tXt|TEV!I#+Gy@py~rB50NA4sX96Cd zi=#vpoj(CMW{7X^R1lBPGGwII)*e}9yY5#}Zm85L6gcKEOFcQ51j}j?!=y>H&Ipv_ z#-1OeC^5c#ppK@O7uprtHt=#=uDn&_0OnFxow*HK_>7RPRB@91^xi4vU8(CM*R_Ex z8(n^d*jIrU9-Y6p!Mzq?+5YYNZzk{GdHh$J)8Fk7QfAjdVe2V@FbC<3YcrmejpskP zFD0W4ANa)6>oEwu&^Rw|O55Onp1P$3;fM%L4N5C9*QCuB?rf-C^Mzh63=qjZi|Scy zQG78N@$w-x0V&+ptSr(XbUz2R6tXUQ0I6MZPPDNe@Dt)J@MQq=VCE%}jp>(_ zi{qH&IM%IW_cn}vsg=rL{yBH@9U<#)uEyIxKkjvZFl;Sr80}E|j^nXyT+4VLTqpn* zEL`I*W?tt`HMuwfD&rJYN3VD6A4q_LlD^SJUbEI{=b<6Q=dF4>kIUXPQY^T#*bZ&V z`6fLF=Q&KJ07biWCgYi}J1lE`n-?q2G{%)LKS6XK+o5M}_9M;+wmtX>5MXH(HHZ7C z%DT5co`}dSW!&Np3+jww&mdn0h2zG^u49{E&Y;LWmo zvN-L93!2~hy-gviMa2A028j?A8kf3M;j|g1Vyv>wsTePVJM_BG`n5GCCrbl6PJ~7o z=6FtO6+HzUfvFFdW);ng0M-E=rWQ$<@)7gY3PRG@aB`u)hP}Q+Ogq+g$641nV>im= z?M~w7_YSyTjA)n&m${Oqk#yhTK%Y;biooK+fN&aqYJM(XdNn(rnw{~!w?0>gzqFfv z&fcQ8VJ7{f`KQ^r>~{^Da7`FAMpL0iQ7BX z24z4v!vJ5kuwG}nBxxDjBI$BT+3YEF7AnvPK3 z(iPprLx%|Sq5cU_Uj*2fiK5?w8jnC-r&Vt^P;>}F23m)utG(p!V4BrDP_jPcFCHH{ zpS&}Z$Bo=QEDkzLk!wW6j&!_dssOUOB>TtJsql>oNk(=Xfh3@Ww#~^#Zj@Bi?BfjP za@=_s<OEb`rW zW>+S8Oo9hN5}7%%_f&QJ(klN?fX%BEmF<18CDnC7R2MlMN=retr3#?ii?QD~JXMLx zf_sx&4@;oSzBmw>O(C43(e3<01NHL9|EcaS5p&DPpH)vNF%*F(NOxurZ@4_WdS8Ga z5$Ttp8LLQ<{zQAoI^{on(9Cg70@V?3MP`H2Zc|ZGu1*vx{z4xhkY5wBbol&$LTwz} znnjaDe3;4ihQ4nYq#Jr=)(`!nS`&N7L_R?&eE;=!!5>-xf!zOd9d@L%OQR_qsJiD8 zO83bZa6ws@7Hh(Le*QZ=>p#SxpZo;a@a;rP`RaQ{G1V*=X9kU3qL9SeK(jO2hYY`t z?k@`L|0m{#7%}~qxzQv<76Y+^RSj8YRtQ0cxDC=h^o)iR0!nC{vwi#I%2zuO$MT%| zVTlgi&$ENFb~F0bL{O~K{|1Bb+lmOX_x-e`Y*m3qwI;nHQmmQmq28|pd<6Ofu{4E( zWC;&Ysz*arScsm!;M9;Ap5cvai}f*5tbGLYWN{3G%JignbeBNQ$`7D8)o+y3zYz#g zhvT5a_GXbFYsnbsx%sDd{Zbml|jShf`|Q1ji7z#&k{o8!p+^tJ(hw=SnJh&1-a zrjNzVDs6Yh+SeyFJx|*l(NwCax`F}B1Z}7v$gj%SND$NT-dNR8dn?58wYpS$(^Esw z2>}xu;u9W@!SMBX(ool;1Y@_Fs3S$AbN~EW_aC(*=wmE6gE44zW zo)%rzT|ZCVMUub=V6-~|rlD=AC-zspmLm`-(_PLgBu{oSy#ea%jd-qNK(~cHv-GJD zsIy?mdpW!YSPIFrh4N8zG#}b1J00Qu&{MKD)p=Mf_R@1ikMHF3MlR`XMnO(8H)Zdo zcwz-+;5f1CX1e*fl9X+px#o!}gG+UTyseima%|g2l}|Nm`IY&$z%SzJqOVbC!)qAy z^B^b(@$4$MbT4~!@{+Eem|58RbnKqPBHTppeZD zC2Y=g4hKwjv8X?c7K-t|c)CY)nZ~lnGIS#^#m>7B*nOs^I(i{#a8z1v_j$eQQ$W*$4 z2>S-!DorLDEkqd(SQn3Q>BR@EkNVrn(%t13Dl6arb;!)y3xSTblqLy3Arg zgK@euHQtiW;ZqEXP-S@zk;IRa#4msz2l{he5+9zfpW9a7*uDT$8LutnmD+`xlV>Iic66_AN9| z&r_;*zf)=`v+8um#$&{-o0v|6P6j3e%OkUH2)dvk2uYZtubtcyJ5|?hyaC3vYA(%k|q8l@w>EK^w1E z*!1In{=K51ymopXBV9aGTD*XsMfKg!IJ)F&uL*xO)$IKi0Rc2rtA0E?QiE)ztM^RhB>1uDVlS8+Coqo zEGTHab3OPABfSs%Z#Vw_NOi2P&UB{YJjgKn+CqfzgVm>3R@LT5U;U^}s1Aq{P^V<4 zLd8qx+i>xiXjsKZ;?g@$uRbB3cct>j)w zt^1`)u4FZt?wtDWPEFfoib9`?3cL{4XolxZ$6zRGIt@=l&(uV$+kCNVxE4?Q>eJAt z^$dp)XZCsbbE$xX%?Q%BVW{&~jCdAB_q0DfgPC3f>pLh^6(&|upv#O>3jJV_>KJXI zf80R4jxnuw7#?|;qohx%8Q4afDBO;z<~13y-IhKEA!?^RDqf`N-WySjql_HgAiLRO z{ss+R3X6^o;wRlxIaNJ`!TE;f2c89&B`+Q5Z|bBdtz@OxpKLE`x8)J(UTv8*Tb6iM z3E|C19AD>bR)7{+0#*u6vpm;ir{d9Iw0|>nH(iZ}fn7)MERAR(HFx3PMv{-B8td*m zY+^$Cr#^?&4wlbg2BKhUQ>#ZLN}{(K0e_raoGT*YPU z#-m@JAJlxM3kjkElc5ptWAW86mSr*v5n3<>w7T3_v{LsZzK}55^t>Hz+}28=yhZ5^cV7_uhzTmuy9M+ z4rZnL`0-0NL3Ljk=_CXLQy(IKtYOt(nLz6BOPf< z5$V!N=slr?03rFa_dREyv$y{Do_n8jpXdK4nPl?h`)0m1v(}n-l_C8g%>b-7HMKPX z3JL&tOnw0*6cDWewSNKty1Kww008I!8VYuRirk_gzW@q8fado$0NkYD|D$a{A^x{( zC;=eM9-#i)HKye6-wL_s-|qg~H&rIZKYGlh{QK3Eftgf)Z&S>WuLel#fYKd1XAft0 zJ7<@Z7iG=?N>{aYsekL8Tz+qh{@!FA6pS840HxH+f%LD}@K*y!O#mAm#SCR26~$?Q zl8u6jje^t)fXGilL-Bk0{WkIk1tk?V%`sX!dIm=F1r@9SB?T1~B{dZd&2Pg{_>=z! zsM%=PPo2MVj6?q+?P*ue3qf&j>4dMAG;R7aN%_N@{8bGr!W#2=mQpa5J;cx33EZUiiQvBe z!1D6@^KX8fOoy8vy$#^D6S`P~p~2;;ru006NTbb6CMQ~K`GY&1oK8icLeA0)S?9OG z_%W*%BXZ8cjT8_2cKK7O2Lnz`m{~~I^|GeZ4i@ieoV=>|cBANpzobyh%wm`m!Tr$4 zk_12?%L-|y&p#iLr_oYWxNxK0=zhtW&#po(w) z%ZunV#~34PDgonk!sR|y4J<9L(TcJcz3zPjGbqFkS&dQA9uzG(yx!#zp4Y8q5c5sv z`rR-4+Rbtld+S_Vm04)L$Oaj_4;n#U4=b9z-q2ZkPVG>|yrdTzS+t1WDZhpJ1*i(e%LTgD- z`N>Bv0J?+#o+Sa1L;(zVoffldHGHP3788&lI2qDbtW#+IL)(>u`^x(+uJSdb=O-`3 zXG~wpWErlJ+zRQ?#WJF7o8h4~l?XJP3yLtuzmh>%7g(Oj7Ftk}Id$K}q{CCnD{-rF zf}PvI=h0l+hz6=2GK*|cGiyB<2_>kY@v0!6Fr<@^EYC_}O5yhvuhkl!N;{-XuK4V! z7Qv8@oD>Hfvj`C07}Xwk{1f`H#oETzlP2YcaJ+f1+xTgVk^)e2?8WHg_~QOXexeBU z!gV|&rVH)3*J@#HHB_fpY^P|9nQ~xuxAEHJptT7e*6nMaE7`x!kQc4UwOiGm5uL$C z5S(2u2QlM=1=V{_#%hG>QLP>Fh-Vi66@?s~ZUR$# zf&jrYi!Eat2#1uOim?8jas~JgNVcE;XX# zCN3j1owh%K2d0AA3-VYmF}3q@HKU{(voKzGn&!Xom0(9%6-bnY(xR<`SXwh^S2R~! zI?FDa>oQfg(KNt%u-y@-*PFi?8K*<7ak+SDbdTR{%)+bU?y8io*K5p^&*xwA6laM# zI>|hYJoCCiE3Ob6b70)&2yanT05Paud$*Lt;(WpUjcLz@j-)K*C3d@E4&p$EI0<;t z0K$Nu?PhSoxbUJ_5bAdDL0Tj8u4KN9oP9Bh-HM%$G2&$POAW?zPwRZx@@YZRfdBdo zJQmDM6kb+tC_*vcG%I=QeCSy=X5Q@Yr0@MI{+nI@%z3J_xvEu|kT#z@f`V)_zrW6{ zeoy+{Np;6Z3o0ttd{FfcL!#9$QP%8wc2Nmw2NYkMkd%0yLg;oY38-vrm1Tzr2{|*~ z*G5fVka#MRLchWGYP?fX+EL_V;WJqC-5<}}b$x~ENdON5_qpGL=AldwCZOw36Xq6E zu)pC4U(5$TWWALQo#EFKzRrBe!uUOg^62Jg|TnpmEA7Fkg;k*^sALZzL zl`fm8t_Yo^?VfMv;_}7Dic_sj)4-nMAy}5{L!Q(04kW-Kw0Xoa9LnkK&6hpmGw>Jk1SF)x#vdDVP{rF)h0=s`cJ}WXx@{l(}1azBX9o^=z7^ z9%c3B==f)K@j9$*bO0E?^4-oiFP7MSvj1iA)16hhQ70L?nQvX&>gSHRj?2$wl%K`# zY2lMGtK|7-_f)%RiQx=Ok zhA403NL3zSq#sB*I?2;JU8X1#xEI$Y>~aOA3-d->aG*1q>-A=h5~mnRz_YUJ_`p5W zG`>=+`(_=FO2_sM+WZ9v`@YV9&T_FEYLV@#lJ6t-Sh0F+hJZ`FgX`{MAHhpI>>orp zvs!HHEeM!3JAV)%^VJ6s-D91Cw4~)C3`k%^BL_)gi zKebaSCV`Bdac+?AI=z{C6;wxC#%NZP_r;3#n9MPU;d}A7+=OtFc6S-QCahV?L9jct zBhxGmGEgdXEW)n{FKcvY#P_t<8JeIWUDQ_C7tqSQM2pW{9GV zhUwEjgPs2{;b822QLWCHb$Kr)uS?3Ids%QV0cU(t;@hY$Oa)u4>k5M$8kj>FUE*Lo zIfd)Ayq`>L*aJ1MJai16Y-+Q^gIeL->c{c?Gl(d|b_^789qa!sIja}7RDNh8T{Ke{ z_%gozMqmc(Qwl=Ii*q6=OnAT`3D_|poF@xPQ7;LQA?x&K*Km0x;9U|5K@fF<@4H?GY3W-jk;gc7 zXh;J3bTA!6_9`%;I*Z80GL}J%vEX(8uf?f41d){l{5E&Sf6N^S6Gr^& zT2PI{^ihtC?J3NQp)ef>S Z^SW$LF0c5;e7PGo$O`GHV>whMoWgs4=qkorU3>Vx zi_wffYP_pR^X_vZ!CrK&?1Hej#wk5l^J&5<67XMh#Vdm0QA7j3!+`(Iz=efQPEr0d8#7->oddkH_uxE#W2Rij#KsO1XBBV1v->l@T^4#T8iAYf)86hHZ#|l-d1fj zSs#>wU!1QJr(%B76uqcAB0mEU1Tzo?Ehik*dHVBoqTWXz-%Hx73EB#8);N5;?RZuB z!;GJ?#4i}>SJH4B{9_a67K@e5!njFZ^Q!vZfvYW}ssF5i!C{i4U2Hy`7QH-qT#IXG zc!&yM*zfo|1N}qzWPcKS;6Tygz&O~9*lk5w1T$B4cSe~c-8D;q(q_0g-dm2nwff|- zLYIVUGWC`ERmGpAS?uQych6(Sn+s;6dc(W&g3!!Yzj`chJd3wkf3aaSTKk|`?dsxG zHT-x$&-Af63H19qNRFNKnKGwum|h=0ZjBQkf(nh;g>o_hM<0wL}VaHRSZd~GQXR&$ECEgk6T`;ewi?($K@s_$RSJW<)z$6Y&5@<0OO+oa_zQxIc(ZMEf5v`^pqb(t|XE=PV zAEsyK(>6G4{xqGxP5Dy1K5O!b$bbZDPMf)6$vHF|<I1AcaKcpQCWn|<~EQ#k*g}&Xea82(qXZe-b|5e%Dq0NP< z;qT9Yp8FadfCJ^icWV7U-P(S!r6ac$7a}rnt5*tvKzoZ#72)+z(;;o#9)92QT-?*fR&&-oP`BNm$P~1 zcksm9hAyD{=A)2F=PTpE%IX1gwJ-Jr9gqmPt#e2QhnzIk=P6kX2}sR>TC}D-R!H(T zg;u@*-exJdebxkK(^VdQZ2U=`-%$k}8pdX@*ZpIkqbkH8n-F0_%wwMGti6f@Z z#wWGlg$~N`b&pxE9#x{9Wlx2RDkQ8n73whtbc3h}f@nrPF1rRn?;*J@-R!EWy_&!Z zkq4uX?&W%m1<%NSZj4qK6U;;G_{cuOOvEUpNh6)sn9HNzDhl84VSFqOJWlg! z*LC|t$NdwPKlA%#!RerY<4_({r&Sy3wR5*`%Cl;nBjYw(DXBxDA znq}ZXIg-2){1t@#lHZnABo{$HZ@2KjV!HqKle+x=r0@S3FPekeF2NlTVPIxeGb}ai zGNxybAgrv5%fZ3%nlf424INZH^P9wB|61Iv*Ff|lZ4Tg{q+x4ssOOq%YCiGNJ|}{= zGx)>(mmGmURI8?PfW8Lj-zx=a;Ab>l=;%khY>GJY>mdP|DWiY>36OQg=(hzOt5Ijv z^HwW=Rk3$*Bp^_qYzZX?bQT|^jYs0>5%ZRyL-j=>uMR4gh0tlL0LCz!T~`-@CZOSA zjb$UK>1U_0S8pM4A)*8?OU=pJb0V|>YgM;j?|sM-vUhtu@*zvemZP57O?HxJzUd{B zvCgaq{D}n#_LG7L{9-{@#Sybqs|IyhmUWJ`SH4N8L(G{A>vH;KVXpnVn+5RQDqftFoY zoJNV3kbvsRYBH^o%W8a!hPC}t8x>Jkto@~K42*}d(`BD!Jn7ovsQTEm*z|{XKYs0q z4<6UR2;%FST(CmlJKJL|!j^11BpRb7Qu&>B;jEG)bAPh5r;m;PB%cE4R*V>JrD1Arg7)z_fTWtEP zLsgi{_mP?03cmu*;i+cF;r7rHy{7L5cMy$IcaX)1W7g{tq=Id00m z&hSn--i!XYx7@u_&ns9M(%hC;?5H;M)>0b}7|h{wV4|WHPIvlM~daTbk)Em37XI%5xOq2AGl~^|Cp8AQwH7u&CN|XvnNYr zgED>^dvc{gwx$#_@=QKT#ZT}oowV1NcvB6jeAn|46S)ifUgs`}Eu5sd3^F1Ck0;6O zV?Z{ierKO+IhMeUA(;zmutwtP8V1e7yyeThDH^R+3iKiyLYMFBPz2sA5tKA2kpu11 z!kgyE_r^GWr_kWM^KY(UIlvirO-OfuEOY}D24<;1a;ZEryK&-jz0QR8*7ZzJ@FiZV zN{BJ4yg@9ZnNMy??v?29d}Yi=r_2z0br+x((e`vnp2hzALhML+l03I!sT^rf2 zr+kbIZTd~Jn=QUdm&e`6vlkBKr5>Rt0ZhG^>v%6XV^!#@wGEm0{dq`gg71iaaa-zo zgY#Hjo7GEjYBl9$C3FQ*7G@TOQj3i^_uUz$<%bs3l$QC{-?rYbmHy!7e6daBId#LY z4xcfIj!Aw~>~$g?HZ}@A;~&7U)u>}(a-AC2$6ET838VV)hOks^@Ru9kZcqj)JF3he zKCcUF_RBFVUX5jxX1Sw#Z|HhyS<#jP%4O?fQsS*$d)|o6G=@G9gSt>g1PD_V8pDcp zE$2Q)xVU+EA^xi4WLZqd!gwup^lW5D1s;XzFvZ}i(6V&U(5}`vJ66>_?L2cWm!%K% z%?wa6fhTo|;BgkzT1Hv7Rryu>n!te^n{fN5CCt~&Z=kH7KXcD2REH7p+(_Z%#)^Vm3|E!+3i^ z|HCvv;PUa-Ep4Jq{pza!he8I7CXL5Hrvqw5F+9IVt+AGK*IriUu#6$ONI*mG#EExE->UlZ zcp97J6V}BLgkVUXWm0J&5+wXo;Y^N%pQ%&7DuT!vgE(U5+auCe;q}Rp1p1}uDtsg?*M$`X1ZM-JWa#Z>|epH;9Ict;M> zvKlarvTV#F&w{PV6>EV_r64ZF&bBwf(KZot2@`|C3j@fMjJ@xfQ>HuH#>4}z$)v}x z%_SG!Qa9jy_Cp;b+9ZurlO_{2 zbtH}V$;V#E6mkec5^c2(E`mGbC0qE?pXcJut`22JR6zM}R}aa^4$EBoC&w_q7VH zU_K2+J_VP})z-X0L;URCo-I1vBn(sPWJ0lYLt-k9UaR`19~;<}xM9H{OFufL9(el2 z6dCC2aKEe+c}ZnYarI68^%5l4fe)@$Z-0eMd-kD4G6_hG*pCDVtCe{F*_;frPc(uC zy>CUQ89y4dRf9MXJCLP{2+-WWr0RiG#Az6nRA;zOAko%gnw9EGUVdF%zSk-LadKNR`d$f1q% z=^*aUmA=Sud0j2Dls*%s3gY+Iou@=x?i!A9sXzb9Rdtz$Q~%#rPc7L)(y-1E9-DM{FA~sd zTJW^8G$LDdriALb(3%VFr<^nKwLk2I__5CDDdbyxYzJyOSc;sRU^e*ZNnCPel&5>; z#a%Hq*PveCEfx54Q2E`x=STuyl`nTp-s&-py4ODaQ=O%lxta}w8?!sh7T#<7D8Fe| zhq1i2Y0lzx9phJZ*-l=kR2{#3<8mojlUywdbg6Ss!ED`Z7~$OJx2qrXULUqBTA%af zZ3iVV?;f}HE?m!WAo-3G$!K$|fs;Jt1@FjHF2AWn8EDr@Z0iRveghrO=^sU9 z!+X=fdtZu7B__NtG?vK0_qW1``yiASPKLZF>SXztK3!p#UH(JYyp!14M*Z^>Fs z3$K3^L7ti3m*hdoDt+i(;4enKLmpL_AbBqma5$~`R`|{bW#Al*^1n5mPE*N1@8M;( zPXco@)YQ#Qm*sc}H=U$9XYG&ry%Ny@kX^dCf)9jbp(N&+;EsL-Y z9p#n-C<3zt3#_POC0x|(A%l50&jmm86-daJ(C)lCqp!jBmco+uknFqpS_->Q0!nnq zn@2xy!L(!(Ic#CIub>wN#z{b|4z9@q|1CT)@(*D{3gLS(kSq9El%EwHKEY_vqgE2~ zLpEaIN$i>A^-}9teJQuBw(2nP&MRjxcF_K?%Vc474yuvKnNhPsO-FkkS+)6k6?4j^ zy!@Vw+QQuVjKegsW&xe9FPd20Ba2Xo(bc8{=DdiL!^O^o``{=?4!yzRisVEz8Y2E` zNP15*?ri%zEtCAduB%lkvtz1+K$ncuf5Atb)rQ?MS_d7!hyEB5MI+VPMECKU#jO3?;itQRo?Ee=SM9zy`lPS zJdH_!MlR@a;B%d924!z=FfJ@{nE=PHn-t(uoje6m^wF{TlN`2)MgIq*HT80 z%zb3Bi3~e=qbDBadth7P@5$+!$xsleOwq(OH%+snE!Rt9W;F#kRTx7?=~t7#KFn~T z<63X2bT*Qp1fysf{~%`VW=}oJT{eLkE?*K^mOK{X*w{^Vu2y z*YNtXP%l}Rec|gMR+DvJX5>&|5exD1FnISf_=x&m5x}Xh@kfRKD$DlI&c46;{>T>A zs54Qw0Ht>nStCY#(PFu6$$ThHmhuXi1X;?Bual+x1Z2wY-)c<#o~fMR@{MBQ<##Gg zHG6u1BLz4yZmElvL~n+w^N&mU2Kn{eT%4~gsS>hjUAqn371;7imvrD1Gl0F&QWxW3 z&|iGN<_p_nHG7+mz9L|E;eIS9#YBRY3O))KkBP}4kiFQ_#W3r_;>%S7cRFjsti>iz zsQJto^YR1|C16iqk=;#67od=q2JCAN~dA7&tpHG z84czS=x4nz2r)>#hVd_)(1Iu?`+2#42VT=4a+TzxY%s`Ka`M;-?b${yJ(}q3mqD~I z1X=bH)xF|IQ|kAc;VjvMGKTF$<(}a(7w_!b?t-NmH6QWhaC38<91f*pEZfy1l8x`(kf!eQYY8|cil=1!dSI~n9Jvrn#PJsQ(+u- z()#Q!ms)PP&83H&sgG&~vC5GXS_(`oPkkJAY%q(`u6AjuIY3;M?8l2>@gs@&J=B_z zh~9dPklshg@Pkyp*X{JeFOadBFO)|zqPnn3+N!tNo${bJb*H~cuaSUbzDnj^{2W|i zZC9?9&vF*ci^SIu^jE0QOyH2m@jb4K_R3bzq;+`fuECi<0o!x=lfnT4#yXhIc1Ls+ zefg0oxC~-yzg0Y=%G!btd#_&6*$PKV;78un77}D_1N|5ydcooPCbpMeo_{T_8B+FP z&~T+ObwfMLJc~kALB_4H{9Rhg(mLj7J!`!1>1=65NguXe}oKQ z{#j6{NWVxa|9?#&vz==KUY3Ue>~;@~c^PZdX>X0hKx@&06IGii7Rq+n-Aew)PZuO@ zoD?q!rN0`*ADSVqVqyzEL@1m%`ShF`Ba;N7=OIB1(}oNq2tJgExOae)Pg&4I`+(xL zS0M9crb7jfxM^8rj7H>ghNKJYrroEy#QSot%tEwMdK1qtjSX)Olyr^5gzVp$)mydJ zq^`un`pbEw;%xg2zZ=IxH`^6R0F9^ky^i(-2jkVy1f5UJE?cLRn#Ac1lt=OiruYi) zcNWbvEWqU&WVF1Cf%wSl^+AlyYedb5ON&Ji*crTh4xYW=!ph9G5SnG^tb9PUVOl>$ z;%NSs-+#(*iBq8 zwlVsN9wY1$rZw@PV$i{K<3J;JjK+y^RwY(BC{Wioti>w0#`c-hbX$N(jru99igNe&*gt}1nvMLDR)s2r^$wB-UOb{qME?%6py#S83!Zirs$>p((|BA7cj$4`0SNv*Vxb1rxMX6cZCgw;v)|wNr01oz^c%*k{{-B zqD&l4Zc_#WdK<+V{$2c9IWjby#=|E2sB>x(=Kxj1_i}}_AuvtMLRR()8$kmTtu(mS zY!+_h>PqW!dFAr5A%SyXW3JzsVEp0p!}^hp`A?6Z&hseg_daDBDZdcdq8jV_Jv?sx z2S61_OO>*3(_%D>FsW|z4!E7x>m#cov{oIQ)vPL#SR*D=EP0n_-ffs{L0cI?ul(JY z|GxrM$ysX1SU3dnoE@6g>>GxVdAwTdb-ienq6AI>NEz}^2Pnv-@Tj<>M zQzAHHU5id0>a^-sWsC)F`)tRG7tdqNZAuQt3;JFE zE_YZSky%zlj)BI!(@{x2{q@Hqdx0B4;VMe1-)W~_#USBd7|HyP^l9ATu%yRybb#2T z10*2R$B1Vp#~?hzzBaiPHt$E99IsbycS*6giKx7mypGLhIuXk9jSgvn&5IK;E8Hhk0kpDx{5X_e+m08r8%Oi&7`XeLk-Cm1}!5qS1acY|J_NAu2Srbu4)z z?%G5CTe+8&?zt!+)hp_m1aGDoM6^Y9i|Z4mo*!ka-mC$KtnYN0sE>HVU4^6vHgCRn zFQdwMJws(4nlS^8B>}8b(q`?J;(GMUZtpTa+UpaK?nL`IU-VE~F|Kz9GZW-z)tqOk zg@o}D;{N(YG2^CGH7YdDyZRIZDz~+Fk=6VTgt{knQWFn{98BF=&MsC_C*xXLzSH}gPu$JCu>TLL@w$**JiD=>)h1z^~C?vx5Xwk`a-SUI$ zvab2wrTpXmkTImGr&!M=AuoPr2K~7_ewL=}v4-Hp$nGT$%4o6y^2b><3I)VXveK}7 z*YwXgmWv$klKoq}s{;9di+A0jEnE82)(esRvy10XJ~hy9adBh{39!FJRAgaZW`%Ko zEdTVv!PLKvFv6oblvP#PSn;awHr@Fy_Ra*3FIT;`Aj>x~jbn;hsjVOe*>WYP{x4Pz zd~ycKXAYH8jt#EYtv(bNhL~&AaGpF`qzqIIX{woS196h|-JEF|-mJy+_8WJ1Z*RwU zD>|O;`6XrP)Dg3H)UVsa(0Z4`y?Xrftdghc8*O#<7%lA)xM2)S^$MDw8=-%87W7+Fncb> zWY)UeLTOtv~BFN#We~! zL#0kzuAzy59w9L-OQ@<@K0eE`Qcnn4#QqWZR=aQV5|Mm#r%mQ^CqDod(eEAlV1irOh1_ z4awdpmh!Li1+PR^iT9>1ZY&n76|~oR9%?UHielytv_hZmDm{7qVIryQm57-a_>xAu znecZ5J7Y2m4-mG;;H^=)8DhaO33j?^!&-#n#BA1yEY*DVycCSX!}HJyBh1cOKPAI{ z1Fy-WSB<)sh_|Z)ZK!jv?H+v?z$N4tK0=lr8jKf5-5p#Zhu<@}@ya}dFx}ZBF`YLJ zPl9JCOENYB^UhRD%i`j(-%7kZ^ca6MBd0ACUpXp_R0O4wRf_G*bAgmSJ%)f@5L2Ce zDSA`a0%Eu-y}P$LA6J`DD~xHg#8;pOt>|R3-kB8|F1h6lYO_E8{I(0=Iq{`fbzG`t ze$Y~k#vn(#Wy(QsGTv;tcxCb4-QU>t559m^t5+1JG3rG886jgL7tCzBlG^Ls^6U?MdGk7vOxUEo0@% z3lTPjrV8^l5#|BHDM)nveGfSEito!jkZo05*x`79)o=rX<4sMuf1_e`R?ock;<@`G z5_fR|4TnjxZ+d0%rpL>KN+35a*-vymoW+f}sm2sCCpqD_JlF5>SRqOrlh znG^q6{a-LO$nkt+I@lq~`M3p)9xF=_t(hb84V*p{=*>6%EOVwDT8x6`-00;`yqv9w z0cGGJOTJj=F^-lRctis$WXvh2s15bw!J+(8Lz+{t#8a6^wNc*;_ukn?+|wY6h#W7c zz=8w8b{TYd>$V!*k!a#Yh-uCrlXe11Uh>+RMsECco28KBCCyKc@5Yy1&7B5| zi9PZ$=w93_!}<0iVL7G4&O)s(lIw^tbv8Y6oRNG6W9CLN-@2+|&ZvFC#iM17GN#_bleiJb_$ICJEV3mk%@)bWuCW>5n~v77xSFF8)C z>M9Ak|KQtkDHj$$ycyd^EZ0hiZokQnGkUcs+tQOH(;!OF#CLh0iNQDsXD^+-sf)DG zbeU?jy*7BjVbKxVz=r2QdA{n6md^*>A3hzEovPoc{`h$fO7ZbYu}rRw6#tm?RES_q zw4}2$`}6CUxr~lCFG^zdBJI=xoImWPi&mXrmo z8EjeHS(qSZuH|RvZu#wZXZ7MUNd3X%6>2Dp6{h}#Z|6J z-Ne2v#P$1QYG*6vvlSB)#%^VHC|@|Y%XH3RNSM4NNCOvx+ZRUDT%BZAJ)nxshxu|X z1^DCZWiroQx_F*8ExNI-(Q@gu?|Fk*Rc~e-FDEa?}tU0_14#@J9c+$s)ixehxT)i z#h$L%3KKFhe3e$)0?k<|_b*!T$0-yGy()utZs+s2tL2ra?)Vn3?+doYyi`!=8*6$i z9C7u%AV5V~JVT}FBB3M**@<+~H;+?iFsNO4aUlM#Lgc^l8UO6M_nZGYoR-OA`~A;} zM8o&Ua`=yU|DWUp{9AyZd<5#9oEeCfAkol=lo$8Nt8VC5|0>^^ROPob=$PKgYvg8~ zK5zZJX{8u40hvRN%{EOP!-890!!%~cV;(-h?9L}wRrW1BPFz`8APzaAew^c-d#xG1 zcy#~KY}{obT}(z3h`k&nh?iHFRY=GjyARE39cw^1+g#LB&-nl;SuxhUaDzg)vb?%_ zw%{rC4%dO8RR-u7jK(hyru%)jNDI{wA!XXT&U`GV2%-&Vj!2|ut+{$>`g8)X@C$*% zktMZ15XGMniJAOg0LqnL02Ik)67Uy<()LGx|0ya7dz+K3#td9(5qHw^z5c-H)tqrU zObh!EQ}uj9tZIL=STF8ra}$g&@!GC+58br}+p=pSX_&j`)5f&&JgVR_d4mtDr<$S<2g)+uBVWEf z%B-_ObNTgBz*Gh+{7+W?a28DDe93~o^Fu|?+B`N3yNaG{RgAJly_Or5K5^xI^@}eb zZ!Cs--9AR^fGexJ?)Kv<^sA_k+n)E1MGa0XCP6>_;iZA*b{s;{X0PE;W^r8Dbw)_b z6eAn8l^4EiA&gr2A{>lQ-WYe`e@G7hAezx7?uUaM<2gQQT460iI1?5}xZUnN z@or%wqiphY3p82-42kwg!9i|6X%KsNWc39Hdaf=xP;RDst(lw~No`}FF&m^#a+K4%h8 z%DfQnL~R^*YhCPMAaHI`mG1yTUjO5DG&AFu8`%1ap0YJO=sP*#PVD=75)1JhYpBii;R3UOl_J6JeP@nLAM)H6XS zHSq{Xxd^KZ=G~Xr%_-XZMj_uTK#?QEDLp+^q2-3}mr}GgHf%yZ#0_d@1#@~*R+3Hk zT%0g%aGjhEgs-tHt~Y`0@SFcR$xj&MbB|4iC6{Gx$-1-i({-`aJ`sEJy{QY;(JgGd zFp;A)3BtV*t9j$bg2ULftg|iiczEHXtaUL%R6`2*VIuFf z($;YUgG>B_E-sKkVG7Q(xs)?Zz6_{ln;)I8-;Z=`O{K@pk8S$M59soAueU8InLM-* z;JL7f|5WP{xmn!k>P*%Dn=L81+WY3WO_)!ge^Xoum(#S9^X^q3S@k{mPbKYdVUzzI zQVVAJR!Mm?3-oK4yRz~Z@oF4!7N!{}Yo%E;uY|W?()-%xiTI@J4<=i$PMs{4wyDqB;evsg-?_y7-{HuFM!? zwdyC~x_&0~0x4n0}L*X`>nM%{D~*O>^kGUO0!k@vqkbH z*4yu=J}tRiX=nC3Mm|b@Qd)~xzd%k!)dhTL*Q&@#8$*~pVw(jAIceh|5{jh#8I}E! z*8a~qWPh{`gkF}9Jb2&j3+SF%E2ayqc{Ks;QBpKF?)J56JF*0GO_{Hrr$F5=PU+_l z1P^JSpjre^iq)%_9ld*yU|NkK0WApTY3LXPl(?Y7UclsdG`ZOfzPSzcz7T8MyKCX& zci5=rs~)DuIy0ZBd7<^Fc#KfKQj@jZpQXB+Tjof;n2AhmMA;Gi8!g71TV%`NBkTQ0 zVo1t7OdG3+i|Nm*LYQI}5V>0Km4Z(R4)nO_U|&?X`SB)#R?0=giIfRm8nID!HG z{R>^3_M=yFccV|dEuUia3B$kfO&{7Ir=T8N7Gkb(=79t{sEgL}`egGznkp!IkVuOW&5c1(WjmBG zM(m#Kx8wm5d**SUszUEkMnas}Yqw9kV@k;P`0=1-+ilhIbhTSR|Vgx)0&7mr~MdiBRg3nRpv4Lb0Pb09WzXcp=vPXJx1wyfNd z-M;VBS&^p4Dy9lJsvLhY6wC2(sC!EypDIA%$X?)Abr1Z;Ly^03kEuviMI``CBNXYxvu3Id3;(umbOPIG1o zf2{VRYG~}4Y{+f5d&K+la5I6|Oe@7gY5C1!@P$aPollGD%y>_06eQpC(#HJYF8!e7 z#BH}9f?;_m%eZKYSc%3M6<*3(%q4#*d9ck@1+6o({04RPIrl9PV`{%ynze=Q|D)Wo zpF!TQa;7N$Bc1&3Ia2>cIypIy1Ic-m=|NTt4vmb+9de|@fqnUoo=;8`q?%Lpqr+7q zRi1J_D!h&r165<89;q1V1yx8j@Kxc3i@VSx@zXjyd=KMSc<=E_jj!W~7U(k9RO8k| z-^!k>cyX%M1_6Tb2$3Il4U+=9i%s(#EQJJ|YWyy>E=BC&aHy#omqm&sZ)rcIPC6~R z#96X~te2c+2{YwRLbHIN@YbpG_+}d{mC^Uaxury}7+XNwHfS~;^{(ns|JS*TZ$)4q z%<|-MWIadT7`SE|%R$dQGX!AF*u4;fGX0SJ8;(2W733o;178DO(4Vm7e9E7h9uZG_t3lFxy@Is+Q(T4=gW#9HxuUm<1o7!q1y&4eT-o51T5uby2zvm|GWEXRfDg zyRUfK*$kZrv)mv7z6O16gwY0#d#_;rs^B(9oB0zfn_E}NO7`=V8ovJRnB9mdL2;%) zaa+{CaoIGjt+XSfRtT<)b3W$4O%CuC+mMeH&zSjN(7JHnO2kjUC1LAElRs(&vUF%> zSM*Q^8fTysI&^o2V(n~0POkky>;I|l%j2PZ_x`6ONfD8)%r}IrWeJgyvZp~9gsJQ% zsgS`imh8)9FO^9MDUXoB_N|`hcg}e_&-45JF|Yf& zubKOPz2?5J<#T=B%eCJsw7g+r}>lIWAdP zb_XgQ>!4#V+6|3%-mqYd_koSvEp2s=l^9rMTfCdxw}t~#lv27eD5!E%fSs`P6&dyi zZ1IsC@>7G=rt?@PugZ_hC5WxZ^6yO5w=4-K+Oi(GpMmUM8`o$dTqp@|=sL-LK*HSQ z{oWN~c#HkX1IQ7#1Gz7g>J+O`X*U=VS<)50Twm zL#u|0@>~y9&HI*}DjJ!Xw0^MUakMme*1n;cJMl$4q2W0cph#|%lh&Icd*=w8C?!fA zNqY0T5GOS-3^xYoy3#v1HLFNmmb5CnF}OB*YZK4Ao!GrV4j^J#k+Tih z_d^;g)Qbro>Se|Rna<8?=$;IJWR|bcc{9n|C)jz@x4to?_)|vcmnMkzb4O%8*?YU2 zdr#j5BIns_9XCO^WFS^^ZiaIy#b817{l*N#>-b?Cr=bt%$n$=$a#_8Jh!t}zNE-ATobVGNwl#uy>euf4n zLYX?eV`?{N>f}0QGm)sbRv6n#qKJKqyd&W zJJCe>w)G%^g+L*iG)=*%B7A0+oaU3H*x#X}tOo12R&;-%oAcaf?i@bt6~hO)=%-`h=)X;2w4lto6@urV#d(caTzALX*M>&ItQ-;C`BX?!TBKpW(C zAvU#}!y}%#p2p`WrVerN)Z8)@E!b6j`lxvq>i$Q2Mw`-n=?0bJg*r7DD1r1_A=?3l z3o!@pY+lb0xt#jCH1)FUu0$sBw)mb0!uxzTqF3FvS^mgK7nLq;>~MpuE-5oY5KiBo zxiGc(yKoE+E;|# zIO_iPw_QSmt%m#;y^rZ|tv_{FKcm;6bPRk)y9($pxk~S^UAjG2Y4T_|2AEK5T`9xR zd!oB%*5A2m0mNP5ReQz}Bf}{U44{hPyt8>Rr+_U^o1OXb)6%AefSu2D0-|`9r@K-L z8NCoLvbUf=TMUG?S=z`~ly+|V%}V4x=X)Il|Ncyz+Nnm8h$Oei!gc`-yPVT03s3Ct ze9Uhrb~nXuX+8Q;AP>EBM0sQBBgTa$P9crLUcsX^6iR`~dMhB0)pvQyV-+Q|b?XZs zF3*Hh!x&`J0?H$W9Sv7`s+rH%@SGZS+U&iW-=#Ox11A{dJ)cYycI zTQBkYFTc0=FYGseXTbeZlCcEn2iE)GQp)#8YS2H!gZ~TTAt6wmHVdUJX5}I6*3GF$ zk~d3JA1l>VT})Y{Eo4kAI&$#-@JWA}!%1pgl zx?`PbrZ9qdr`$VN-=8uCw_hW3Gl&=~cj%Z0%I7QSGpsw1eK=5omSi%73k1*gd=M-4 za!Rur2z2C>cptS7-U1t-TA&;$o+QIhFd=CwG}?LCTjic~f|d5#1x;v&@G|2%sZOKO zF+PqtV`Zx^O(A|cMY;cDn*7SDMjzhg`LM6JYyw6ywKqP;caiYhWSPHqXC6!+LmGd;vPHh)8RRD9LaT>k_1l4Tux zoimQzZMc^fy`;nF>`U7=Q&2GUxy2akg8=SDPPH)862Qz3vAfnO0~X=^%+SB&?A>sa zrW?8*(61m{|7B8ZME+rO0oW797_fQ=%jx0Nsu4LDo|`Wa{i$4~aKcjik-9gXL(q5c zHW8k1C@o-8E;Nv>uDb69XLea!cH)fVig==C#aP#eYY3mowa86)uyi&c$;Q0YEFH@o zoKY*@xKKfJhEKg1qSua^K`2F6uBTFMra+m8Sfe)cfsd`h?>f2|rVFjUzSMg9VZ35;Y0u`rIJOvG!PgXaaG2PZyK^ zEzY*ZQ^PD%Grf2flpyu+gfL~Mm`MBbzVKK)!PLB@!c`{T-=tPYKS4-JlGKH%0oji7TN2v)5R&_dn6qq!$&;Z=3JOaFA)}lg=hzOP&&t2SaWH4 z?4>u>I{BX5u}MlfUyND%JHRJaXi*Of!K#J4q#K#}DE6{(oXiD(y%8NFuM*m2hz8uAr zQK%`NRgW%3XF42&hS_{9-wf=yKOLDqq9Aqx*4dG^|IFS(TEqCBOEv?nB#;5a2 zGqF7UL_!v4Y|_}uP>qlXNiyZrVEgU7_B3KLo@c~vZK<#o&2f`1(Ut1>z(BoyBG$ua zLSw$yi*RX3alg=h7QN!WOeV){eXgnA-BQjjA_+rj7;7IsoYP~87WD*GXH(Op1!Ba`x?SwTQF3Xx?5vyI%cz5a1MQ`5yf?r2LiTp9~ZKeKE~Ht?8%! zqx}D!Cw8Ci`=a{n3)4siJTgNP&5AVGx*Q?or5_pUC13t7XxlQ_rlP|^!ZgLYZlBSs zl4I7{uIESzqivRx!|$-APUT8_rJ=?4{VoV_u1l z?n<&}_?_6rum$L@H+{uRA-UXRg;EpsSBMcgd%5v^89H)2l7o&GXznPNr_p5927t`5 zt3Kx&mEivkT>V$R|F;3M--nyOh=aaUxca{5XSgz>M_>kG`ZAdWG}|usrcF9LLOiIn`CYQ-#cK>$Nh*F|~3%TUyZ) zIOEx08rl7wO7x!{|1FWmBPTiD1%qUr6pK*G9zW}j9Jh@v186FnYtUHN8YvaO{K50EwLdH zW);eQ87c1GC;14{5@byNuHi8HZJIKvJJOE5lmctCz0>|?(`(&TtU%C8U*7Fd=+a10 zh`-PIwBVz#b_t0<3*jJjWh2@WRRbMb)Q!j+C45dN#P&Cw21g3K-y=(Qs;~@yCX?oH z-2^qLc|Te=_QZxjQZq!5Ax`HnreTR3+-|p!s`1wG8i?LNGqb5WgR@P|fd3Qe0c53jzQI~|shnMrU-_p};yU3SHBr`F9C zci%kBqQkB%S>j&8te)WutYXxhDXe`mziW7A>PR%h)IR~!<1Wioxm zj%H<8i5(k%-E=F68DvY5iwcyWDNGsVHb5g8qEKFITBug#;KEXRm!SUDilX#AH-2Ni zk(Y6aEyZEHGR>9wp}Jpp6E+&XkIs7a)-!;yfz^Bq%=|c4Zn8EK)L$O z{kPy8fd`Po7ib{>w^QRd@;Ue*B6ag>pLE#cwD<0lWr^ogeI)uWbV_$~YqvR?6i;ii z;_Rpv2|4ouO7yg~n>MdUJ42=2nozl!z+-Ry33L5t;?cJQitR^CMPrQ14I_I)!@Tz~ zu}5GHHVP(H1lF5y(G#bbygcW7U5k`+-;2DTTDc0>`|LyglzwJ0Q^DO3X{QyiGhb;E z@;V%=2w@>BwHK8wHNkw+lcyNzMhbF_kdSK1y;v8E=lEf1^x9Ngb2pY(CvABJkl?#- zL!SYY-sG8oOLF5s8)tv{IQf%}N`BQi*#L1NyUanD=}@nD&Y2Q_2l3*uz)xrko-w*Y zPir^>O!Lu_DtfUXGj)qG(vn$6T^NElLnBUM57sDy!*PWZL*uenDguJQC$xwSkJdzC z*%Es*b+6ibBO2$a*>(4qKM|)_^Uc$iQ}doie?0R)Hdp=S-Tdy_qA(X{%of|G3ydR)ran@X#+&+)I z)8Ic-CL}Z;VhczJA@Jkxw?+{?B!Dl6V2Oh>4tngSw*$sRUBGI;9dJR?ir(nn zVp%Kt3d$tBsiR$(2h%IFzOMir1|@I+5&IPyaMU?M7X-93f4v^^Lk(zC{(td~(Rg75 zp%fY;s-pkoS;M%r-k?KY#~1 z8v|b0ouB}IZPe^`=oVm-)3Y>5po@8PFl>6iujpavO@?)$f4Cg{L(Tut4ST#eMp_vW z=-q&|r9vKhc_v6B_9P159$A@K7GqYQ^>+Y@N z-5O78#s9y3;XiAX?7>@>84#i}#e;G1G(ZcLbzz!&Cq=>u0ah%f&=6U5IbQlPA&wfL z6(?Di_n85&Uk%L{)O{E;gQY#hFZd!yGdv%~!m_`Dt}uomU({&IfCKwuK*^J)xZ{f> z&kFYY{;2zX#qS+d^(WV(|JUw#wg-aXItl!dKaV0j$SL^ta};krQVXz%$FKnYg$=M5 zVu?7wP;YgU?e^L*uf|otXQ+DVE65mjLw904qN6F* zYgk&uB$N}$WJM+1efo+j_Sh%aMqNk{Zr+HAe3_`$>!v?8U z?Mfwg3J^+P%FkyyYnO$nI^J{N^~y|rCzQW1_szr!N;jrP&5=adJaw$nCeFz_sIF|=MNv_DjP~$7yCeT zbr2OjI~8rlKIT}4=J5!8d(ucb6a^z$W+D|afX38pprlyToinw?G`d*w1hc8Dh5n`B zN=pdUm}Ri!8`p>587tWQd5d)Iu$KHlhWX1E?xL8GJI`a2g5T1mwDJiPA6$nn!QHrC z92%Ol)AbB9GfC732+lh`GI(gUlr&OW#Ht*Zi*+>oQ{a)x=KgW(?&~szzwI=>;A7H4 zOg`}L&8@cI1^`|UUX?CKd+C2G7YVjRj25`0PBtr*Hb25J9Vt*W^gQfjsXuAR=hCIa z_YpP=qf0}W=th)WvJ?><9(b&Ob=%o?z@fINxW7;&!+%uoh5OyOy)8`@YF5ZasR_Es z6q=cC4UmwTNZ>GD&UtH(aMT%pp?sy+C*r%wTE{}(In5<`*xbic-rs&2q_O^eJorM6 z3mKzJc^2VMf*i16GlD>4-zsJw8VSX7a|H%*`CE{02dYv55s47;oCUQmW-+x}yVreU zX5NCIm1|#8JR@`>NCmS8WlJ-mn$D0K&*8U>CNQs;4Fj~^%`_m*z=@OR{JG_?vt&Q7 z4iLzqpvlaDha^K(;~*s!UtX2KRpRD88?HKZuuP{s)v`}5>{6rRg-IW!W$_@oE3g}f z0QxXWzo*6|+YXs!PMIEh_;gOdd$n|MFg1htz-~@+k>K)xV(@4WV@X`4gQ6QlKTh4b zHOUe}-^oBnqPS*^ZVZZer5PF`Jj95yReK%~!eG;zLAr`NHWA-vB`r5+Dsz#ZdR0TK zz5-Heit+|;1>e2VeX7HMf0KDvMnln4n>rV6Io6eGuEoO%fC~)mprU z1aYaV`}MFS&wT|Q31mZd9nnFSYGQ~LVcQ*kk5h)QXKGYbuzJo-6t#U{?ePfA_*Gy!ajb)of>|7=L40-gMIkt6uHYU@6E-l1WjW6r>R#j599%(?hW@}r# z4(f6U6HRCvIV*T`1NXXS2MNHwQJ1!Jg{iHLY;C1+OnQ`TKkwD;Ct{=I+vAEw)*GRr zqWY;0yHeN{qCs2X|CQkEZ{K~3Eq&`$@t5)rjDX&$w#-nTUhGGmr+u)~l3S{(k_+htn-Vm=&8JUIB@yln8;3l<6nnOyX zHz%&%)k&?=lmn;3eD3$T^Vgdq3)9TE^B59f4w|lIMISj3Z2kGvx?G+c{%G&&<;NNm zGZRpmt0yx{6UUw46Qbqp&$-DXmIpP#nO?AliQi~*>inc`Ze+NTxbBc+A4<^q`LnDm zxd9)OY$8oLZ+6TsG{2XR9@S)|Ww{)T3}B4*JlvK_AK#U*iU5t6rK zxTaziJmL8RAXE8*oiF4$>);Fn#_{ghkZGp*lg~*H1{XWa_8UL*KV>+ zQgG_$CA|b)z9tuL@Y(2q|C+=7xBj~0XLiKjkMaNLIWBQ(J4rF`x)PKf6KN+2gr4?R z)fLs1z4jR`6N4k4$_>I1{whoAWpgm{5fxS3ONl2O*qU=&>PV<@O>ZfD_k4O4R_h5x<>1V8}C|d_eC>Y(BF6_{RgT6Gv&h=)$=nYe~8R)#$Fp+G}f@8U;%= zxM;5I@J|2Z#;Ust=Tg<#cS{lGo^U)EU2~!NWPjRxZaHEL9~nb>dolN4=zZbg&c<(? z#l26=6%uzp?@SnKgMyvZZ8`!a<=|q~Z+Vbply!T*2-Y4Nqs5r^d*B{;=rp8$e&)0* zT4`>11_nXG=Ek{q5Z53(P$Kr(NH(4DwT&k?IzD5gNS1O8SdOu?@g_b>z^~Lz)=bhKCMgu?9e>YSG`OIuo95g{0t8$ST- zC~|0%x$Jp*Cq9N}u%dsmR$qO*?d6TW6d7znwq&K+&Lp^P?;j3Se~Cr@)erjXhyMd) C&2Rkx literal 0 HcmV?d00001 diff --git a/docs/img/domino/surface-training-curve.png b/docs/img/domino/surface-training-curve.png new file mode 100644 index 0000000000000000000000000000000000000000..992acbd424a113cd94a35bfe203718bb68b3adda GIT binary patch literal 71152 zcmeFZby$?!7dDIt5(XWDl+uk#cPJ7A2+}1W-7PVIbVx``3o=MINK2P=gMf5{Gz|6a zIp=r|{@(A8@89pbUcGdN;hDYnv-jG2t##k`dV&<>B=2I9U?L$Q-IaPFp@@WZQy&Ql z*#!L-_yk2X4mn1=lO>YMs$H-EoJ zHbH#t1mk`gc)_rFp>B_aL`Z}9cSA~%`WF(C2$Gb9=&QFkHj^=Gm6a#1Qu&eN)*kvi z?nv{x6LixkA#>5|&P ziPjIID8AjOtTt>vu1@ne_%`jgc~|Zk@7VYK(oXF=+N?X8G(RbDw7ohi9WDKij_rkn zLWy((T?7gFueT9O$cBuT&29e6EQ#FeN=i{owUXiFqxqWLxp{ftO%3Vq{C(Z;uZR@m z(TAXBm623h*gnilPj7NP-VQ}#I!eic{?W8Z94@`;T4<=I77ezZ_9PMhz5n_IZAGKs zplcacfk?h#)DJq~>+$_#RMpWUT7jkz;v0YVjf9d;hfSof)|?&y!T6&$Boqls)|PKK z{^$eTVilDacK2Ol>34rLPl-+Eg&mUd6Xnn5MXb;%TkM)YDqf!-FxD0f%9eS0p8shh zf)p9O_=gP)|KF+ndx~tq$2^#F|I@~zKav4hG>Cd{N- zU_s9Q+Dq;~drYIma6in|WeIfkT;ZQ(7-x8^v+2*`@G(IRTRcwM|3#Xt=qXi;*k@^# z-zL16GSlTaiE$HEhoR>>({-tF<8WKjC`|UiO?n?4cLccG6 z14(1)mK{ZPVE0L*>1d#IV!s_p#QyH}m9g1#%>}MJ_s1ijB6bYnlo@WPMnP>4RXQ?4 ztHu90FtG36sQYT@S{#!1`msL}3g5G|{!1Mk{p58ca_@h3Sz0{Ci6gCRad$fdr1g7B zg=ktSO4;RO?G8zqr((?7)|o-G)sDF%BO{qriP{H}6wqkxTLQ^(D6wq1RyCU?A^h9b z8wDzkS`8j`mJ=nJ%IZ zOGFQ+qdou8o>AyHp1gqG?8UY-|8$0Hy=tLOvPZojmb4cnnQvsYK-+vw*RzhyW%0w7 zOP;oAFVDgqH~3l>q#;hrPO2t_>ZK7fe9v~$G^F59T;rG~c)U+NjBxWF7YMtdNh_l4Zw8kej^8e)NTl}8D*^QA>vHHXF>SDL}?dqq1iWq_1vn%*mgT)XGvfV!% z=Z>p?b=GiY4zG#kwBD&Zwi@YmFL2B4@^~;XK2E%&X-kK27B6VivIDbq7V)#(PEQ`k z{kT;?rI3T-G-b&hr8Y8~b5B^AL-)#RzwO~swmh`UTN1>f9lr>F$W#!=?p*|B-D5MF6sP}TO- zPcD{Kz3nk-=#xXArjy-;=?@IhieovRITX5W!yduaG@p_l*5MVyk1+%%GP-a@X{y*h zM=S!AkD^G(G|xoXEKcErDvD&H22>HK@5D~|dr7G(n(y(Cdz@{>;a2$ui(Oq_48f#M zUw3?%yn@eOUCfI2mecTK_*O`WQKJ&Mz}m^(^6;uk;=7uL2n~XYLP@##7>Bat!;`|= z>X;q9rNlZIgUn06FMBFA35kjPT3biIR&8MF0+d3@ zrjl;K4+P%YXw?dzE=JjYh}l*T;zSL)I&#Me&9NRDVVkP&c7*zDDNq<|dMm}JWxP7H)4V_VIX<7{gHrRHKJf(ZN7e{N1yr-B|&g`~X z9B?s*?#Lmfs!D!dkTDrkOG3)`x zRr6x0YQLACj`rk48cM}l@`jd;RT;XtyOSBG^Vo%8bEd9FlY$1Odp0>XnJ3)@M3<#f1 z^jVA->2*rU-lJ_%H&7nZA8~fut}?D63Kc#aRyqQEc;JG}q%KK~2}}Ka@%`tV97Y`& zTq4XG|7o()+y_bW;y7K8iDb_W&Dmi2PZsjiF#)g#eJO%dGtgsgy7ad9_YLsqWdjawdUzsXTDZm*X}wf8ais}D)B4LIW3u-=^7ex2L+|}h-wAWyWY03Ug zD2^gDGMRIAqk&C{ZC+K!B@-`9$N)+Z*FPNH8Nyc2!-{_ay~q(0UWxmcWL$2Y7x1USWAKbLAA&lqkazc{<4oj?=2CZ7 z3t^8g8?KJ8w9X87rRiG8-8MP)2_FSjF7{8@MkUGr(lBT#m0uDN(_cxPJvY+AUsLA| z9r#o~#8=@UdSSm+K9+Dv=uOyumz3-4rIOor-LcNG{%nRMR$Nacjm-Sv?y1=(JfC2Z zV>qB09oTg2_^Hgpll&bA z^Yqrx5{os%{pCb!Cvw8W7_BC$hlNi>*-}avI~eJ@V_JfB9F$|dnGHpEvv|}MsQG9l za&vRH_-9YU1P}VfxBIJ@&AM_Q>HYlnuGcQh_&_m%u>%(tC+YVSUpVMqaSVFYJ{~W_ zZmbNip8|C8qheEuU^=_ z1}b7=0kikSrkNKsdMma8$W!U55;->cdgm;)j zJBQdy@1M=n;lLboN<5@Qa;M>%d%9^fUzM?tQ79u4yo^jw;!1}Znu3(?dpODQ@Gcv( zni(6vy)}z5{P@E21zRr)y6-cQvrAL0HHChj7`H3;2gc6~ac(>POcV8a@D?(SajWjj zs!W~42iU0zUpkiSsj4evEXrm8p9S({-9&-B;p$>XTgO#J<+i%v!NdHwb?>F`tok)P z5_KGn{&w>o59MV%>`|u}O-LN9XYCosYJ}L@q1YE9p1P;E$kI)6KEo8(XtevBs8?In z2=CWvbp=NXWXiEkV#(imgkLJ0k3Hd-0-?h6Q^_l|GMhOu?&Kli#GXRf zoT+pivrdk9iLnQkW8Re;%0mwg2L^A4v{_U$6yx%UT4$3yA@MZxX{%taPX${Ldi3E!nz(!Yb2tM@ zE*$>lViy1Dp_=DPPNHul%xea$@HT~q?kB`({(?MgVVdoP2P$;9@TVfolPC3)u%-iYuP zbSAEy4Mh>B8(Or>1XmH#SR1@ubo!di^eZyT6cG zC%fxj{rmc>%QLbrrJ!;rMsUfk)Qe5yi)_^WnD~Q@`GY}ne7(yXbplRCcp`NtE^;M` zC_WNi%=DOh8pL-z3V9%YGMtVwYs#s;!whP2dsUM{4yy`8TV!@9Lmeequ1ufzV&g+> z@$$l9STpv0f~?Y`RcYw^YX`zl>H@PcC$D7Yr46*3D3yN=u^Dbf#ouH4VqtBV{Mb<1 z8-3r-c3b;qJB7^7GhMl!9d+oph1j}t ziTp|w&NwokCX=1PO&0bi9LKbMTC4VF>fEMKiL`!R8hKL)YS2(4Bh$PPVbiH1)D!*A z$UhXK$8K4lD&xM^-(G6*kyf*H*vWQJvc5FU<5UJ@5HrD(4`a>ZJ1{>#(vA>`2pohn z?Br2uPde;oCkfE!!wyb=a{C{RZJk2m3hUdtT|dQ@zpi%`JsiqVyvSJT9vAmjcb_sVZ%wdT(K_%iogA1gjN0J;(^%4!GYp~ z#kDM&^(CQ^ZVT47(3LD*i0I+QNaX!nL!BDS;zc{u4%D$v{60LAY_pa*@gry%TcHcl zt}f~mI^&U^K8E%PgPr$c0F4^jf4WE~{88bYsNtm5*PImr8Tf+KY?r`4aF1;OZrGzV zE~rLwN~5XilYgW{;A5)|t-rWin`<7quxE(culhEBr9b)+*$i2k=Z$+c(v3t*i-!Ue zu}k~+mPyOPOt&=0MVp?B$);?U5%y>%EgH&;vgJ0$L0dbkpE$0+GUgcKBT~;gP-N&1 z%EQRcd*jObo>C^xHy$N7?`;A}79|?X)%l9>vIX{q3t+ONjKrM$K(I706(KR-D=anW z7WNP4W!O$UplW+%5;1gHcy>DJhj#F;LQ@ORtFzf;M_Iy~khuLJlG1$Ly?xiUW2N(2 zR9Vx-N$tZ}N2z&`s4ji|Kqo*UVBk4DqYE*ODvDMWgK9^=CbWiZ4Dn!TnD`*q*PUj` ztmJ>HPBnvD|KimRNc(kXrZ3BWw&18j$`MO0@w?GxHzq9mgT^CT^Rm5>0BQ(iB;byV z?BNjYe$ai#cOpXA{ZFmbRjE7d2wuf@F}EG+Rq4Or#a_MhPFA03sIE7fpzR3_T?=*X z(?iJZpG!C72+68;V>e9Ir5vdFQ_ycZs4mXw+9^93XQ>rO(pB4ZnO?M~5Xfn#i%$|v%k$!}Gu+Q^i$$lztCR2Z&$0KWWa@k& zS_SP88)+S5xSWcxZS+`EilEyhfvVPAp2yxVx^0pbQPq{1YW!P^X$q#f+l`J&deLNt zp7b9)``by5IxWvXlQmpS z6!#jI2=o)##rZ{{b3R)w6ucVY$VVaG34sig+^+j6(vZ*)X6dX+$2{1CoaP;Hp&hKy=hcpT4pBbLWc z*rzUBT8{1HlD8`aLYBX{#};ULE+9&D{9U4!@P?~1;V~3TH|&GY_bpWMS7w6z_f%40 zQ@=EiFC2u~?C|3XMO7q5Dp@v?t)nJCbS6wp6z_}~n}xz)ab0-1Lae6Nyj!|QL#dZf zb#WBg&>eVhT-gyCKC^rIJo->R&RKHIf`)&3eRY3r{pFY4m-~TyS7HkV4b5UFn#{Z* z@j811WdX|7js!3BlRFP7M{gH7FQ-8V2mLHynJ#hDUE(T)!p}HH$|NAkk%rF$m~}fv zn>bEd#ZdiFg_(wbSxl=}YJg*J^5;wLi6ZmznlS&c zX}-I=`igDGgRMNQKu~eI+_c(zJ*C&1`gogBl|lk@E;nY>ASfsa{<)OT)GUoT9~1W| zfiNecXMC($=%VLEVCQ-a<4FT4#uGMgrOZe7{()rQol_~;iG8nuj>_Z-d@vQ)uX-DI z+|tcj^L;CPt|lvkE$y<(9Uha#sjk9hIr+xUHveuaC#MpdGw=m@$u7$u)pw{e8GKz& z_r?17@n5tyY9=oVh`H6Bn!d}4f{UF|F?{L0WNPS>)FCX#vFF2C87e`0S}-o>1n|RKqaJ^J^6L%H#@`_j;&w6+LB;Vs@hyq(i@?DMD3?dO*s=w4}^OxKEN|Ei`RPV#-x1y$tmUS?e}?iFmWGrrnB+p77%ln z*nQC|*YuWo_$rol*4ZqeR>XDetdXy~*k^RQ(=MajjY#LMUK5Y1#^J}Kb47)OMasD} zhA`KOs~;;uD|vh?sh%E_kk3AS=Fp#g9!F1Dr0>o?(uI%P$D@t8xH zEkoaTxyAB2rv}9o)~~$qoRIi1xuwJ#E~jKxeorP=RQ>6YAAnl=3aZz0z8>~@S}5q? zV(!(hOy$?LMf$+zQ3Z8fnPly%9med&(@j6XHHU@;Tu2X8ctQn(Zeb*fVcJE1+C5vK zxEeovchadrj|0mt1E`}tGE2<<2aC4)ir|PdSIfct;xG01GC}MkQVSAS1?BCca#Xvp z0_;^xWLMKDWqw}z0&Lhzy1!g)UtO?$i z2qo+R+|#-$&b4mbNyS$}y}f9E)TqA1qfvxr&dg?x?ZTOyO79zU8QAi=p*A5KL}r+0jHDn8MV;O7Lljbk-%Umwp3sKE;7$&kX#0E6hmBborBK}OQTFKg zd^dz0$15LcvO!N%Qmu{aFa3Mibf+d}Txs@S9BciBybs3cuI_$ho`lCo%4^EB%=mOk zTbb$Gc&%kh5ytCdiS7;->Ix6d`REMRp*0t%WjzW}lZ2<`K8yeAIPz6$n}0UI@0wqM^1V}Z@_G1Mc|S+nuInQuU$kbYChT^ zAEnEkVRAD&mAw```Ih60LDh>PIewP)^$6j=xyiKG=tZ=7ud9QD#nK?irzgknvCT`T zF!|plv6$TL9Ba6A}6PpNNQVlaFrgR81@ld-6Hi z?Nkv>^M=W(rlLJlYZt#*Ien&+9g(6-^kT}pz4p&vr$Ud40m%EkiZKrR479zJbo)KI z;N)T`q@eR(R%g*)M5rEn8MYLS{LTc!grvA7$9Zxti*b2lR!hna^4^I)g8w(KTJ?|; zt9GX9c|q5v1#jl-krx#&V#N~q`N`iFJjIBv9cvlfbkex{H&sb5gxsmyw4d*HdTA&$ z`}QRTdNlG4X8p|m?lPQz&RVc25nibzu!o#1%(_|rOV}9CL@D1P5f-C7(E82dA~FY2 zO8Q#w6}?3WIW|2ZvRX@UCY>qT?=k&F{U~6gyumRLU-4dLWW@M`D#gdU;j6}RNNf2g z$%*NWB$h{69VCsw5C0>MTP2XFRAlRDRgQ=nCP{a zLI>!xbE=fimLNQfCf}gljh~&PnHxV0_u(v~T9p%{qpviFY@N}f#y_WS8Z(8D(@}-p zE?w%5ohVuAw(R)X$n~}ZJ3>O_^-Qe`>sV(Lon!>R`eXkLikt7}OzuDN&sfU+ald)s zvC$pL?y>N>FSCz|0-w!>KT{WL?H7VQBF>?`hpscpi^Wz)V7yc6 zHlrBGhS2KP)EshQc8`K6sBU59ifUyZjm+0Z)a%wR4cPy@7BrFZ9(f(QqwOD=dPJfg z*EbLSTZGx~G!BAFOhhN&_=~V}!dF_vR%zDJ|NCl$3}|0#kae)63vQK~61dXxaO+J1a09>36>LUu zE_~sUc~Y8o+f9Hjc9}SBR}P`oYci>GaswPnCW#B&xO-=*c1_x81gA z{8w7~Iijfkn?sQsc%TcLl_i;xsmWnp-E}F!>)(!G16`aB;yrs6o-))IB6b8X%G}9MVrswsAW27l~_RR^`MIe|yU<%m*B4w`5;o+sc#Xk&;HY+bz zfNs9%5e>=1YqKpK0WL-17YwF7bJSmgYWk{$FSOI(`!t9Bd0I#FfQb#l@+wNy)zl$$ zie0?6ph)l1!Mrk6v-X7FQf`GY-fBizZ^(=q$@}?#T<+hh1sWM}4wqoxO=# zmr^$mz|K`{6x6`J!?LqGb>7aQY8LDJ0iGTa3BMNizf&xD{9~xA`^m<#zyB@GuQs1@ zcL443;qld%N8NCTCz={N!PyBs-$;n^f{7S%#|zu;l;6)lYhU<7%^|`7%p!Bu5#v)k zh3Ah7%PFJs^77H~aTDvh!`Plg?!GV~xXF}d<+P%}2%oh6HW=y?jL+nPu4>2I(_0tE zvz4BJ1grpf>Qpp?5-xd8VPU24`5cy#hIKEQL&j4F#UkEeCn28l7h4Y&W-=)vShKL*RAxYO-@*zqV_ zt zAZXrN%Sx>%HfZNFkLz3Mq)RQMk%=AuEESdXGYVB!rEK=<@>niViqZo_X#+#Il*rA+7Nj>|OUPt}qsMPZ} zTE5vvc(k%Mu|+_KFd@TMH*91$>%I>$o*?0}P}A~V`*h>xt?_qgxewwWw5WxW7IsX6XjA(P3^1%}#ZKqf%mr#eJh!}yl_h_PYi zU^hRf(L+B!0{i;@@do=xD!HDAHm$;9LVuyXk`O0&sIGF>gMTf1G%kVlOW8sj&30C1 zbg)dX@!DW!h3&#~Qckm1Oy-HBL2D7eBC!udhp>!$lS~81oZ|7`;u>~wr!wnr(k;7% z=w4qJ3n1K@>TOESzg&G3^(b9J6b2`;QgLvsS)J-KIB<6Y7ZI)d*%dCpoDkT%yx!sSj2I!?-l60Lcbp4R8f0j z%cQe>prLy7A?g9O|Kit2^l6bpC*~Nmnfck-yDu>s670UL1#XNLI`87@OQ+^&(I~i1 zt5!RR9;ze&Nd+Jbj-hok>sK3BgPiZ)UcZZIly~3B>7Q`<)O;soT$pCm=EU0XNmVdSveLJ(px*(P~`+D8!%hOpqmb!Ah- zjmko$+?w6}qZ&zF4)td0mgTL;$Fkel_4DGr8-VvL^T>+ic8f_1f9}n`sl`DGYc%NaCx$pQHa` z)fdH4oBZsRvBw?Bn1;P)fvBs}fu_xOx0Dm@$mWq#Vu;H&#&O83{JYY~JohADA>{>XgtVDe{i>^RZ#D25~m{=XqXT zvO@cr^x4IE(O-Ii^SE)<*WS8V#&|&HMbbhDg_?6BKdmjv{G|Ev@>fFqKb+M$2>WH$ z1CpI4J26sTC33N;6@8l?f@fDW{cQBno;=-Osg#DV=N4AGF?I(~adZdz#tzcSLx-P= zT4Lv$zQ?s#^G;15ZX+pnFm&LmcUmA!8uov*41tvT}?K+gqIF;g;9+6Dh|N{f{|qjlyTRoGs?- z%|iEMjS>a>ertRX-Z$+hdeL*{?C!R33Tq$>$*TL+bo&U1Cx)m!7U>OolXxramZTH8 zEEDb$vr}G1kA@DWq>wFVb1Dsj1p~cCsxeAkkhjhzj{+p{A;c_oh-KnTx)lP@QEhR$J z`r8|twT@56EV)9CcuU@_4>5i+tPpg?LC$K9KS8cV?B zguBU+a>@MAemmy}GxpIN{4C`JWEGR)p63Q`CvySGX^Qo@0YuskAIdpt6p>{&`l?;c z^j&tWRO&TDGNRh5rdV{)UIe0&KH#4{KYzSotD;C9cP+Zarndv(DLxtVyMs_`_hzyL z#xLdSH_`6Nei)REEkqDO(=voA!2+$pl9c*-upb1B<6v(%~(lTK3S^~^`Y(@S}R?HEK+%abB%K$xXLxz!ej~yIRz8QO^PpOb>rwbKa*U)u;VTWRIGU7+!-$PFz1yteX)*qdD=3WlQyepY9B4|Wa;j# ztOK8CIL8kcCo*Ux$B&o#TQYsbRuoz6iX8sUY2$oYh{M&7*aw@bMWPWL_cXiLS@e$l`%kuM9})nG)_zCfGWy=mUW$ZEYm@zpkC?MrsHt>lbKnHN(9h~8AsZrsGpY9zFY+RIr-^yl+` z_IIn@CYpw-CH=GJHkgT1e-PZQHTGmQ5kO)29VV!}3f|b4BRp=;Os4C7w+M zM_nzJko$kg=vyHmY4~xG=$y~AF~p*il@k=S;Y{05)w`UpPSMbZaouQxV?Z{e=2n3; zsm#mzGO$yxeLr`MK%^)8;q|LZK(^8yMj`JU@pqQGC|O>-52fJX%35RG41l=(x$>0trYCOKeFQBP^9e*@%4e_WpL)YRS533;2q7l?fjL4fK6Wm z-7?oLos8E!(n1|s2xclM8p&0i0tlWt8kvJaL9mrGuOYNX-F|vGdaYdUd)pm|OZbcH z(HH)00I*RVEP@(`Eksn0kX#>uFn>IO(|mKnIJF$G`V;qf7qK8Z_wqN!P2v_Q5fO@Hsy12N{ z76|5F62&NAiV_`lZnQ)fw-9f%l!$$AzSgFT91<0w7+Stsanq9Bw;O2Gi(Ie2Atkcw zd54?nH2?5|%j~1ikfm}M-D2f(h z*7wLIs!d-0dm`pboa^&0AqzS}{wWV#S7If?`1D$>$}SU6MbPc23?U|M)TG^YTKph6 z`TgS)%k3Xkz4}h=WX|IUYeUIH;l3@FS|*T5!#HiDk|u4NMpVns+wiGQ*3H~Dx@|hz z5O7!3^{HQO{AZOS4j?6=DjLx!>gtlNxj35I8jz$Y2SQSAvX_Lcyn8&CCky!q#pYmJ zD5)jTV$w2A+~xl9(>H!JdKw|2Nk!yPE`slc``dyf;#?DcHP$GmcM^ANQmVD!oEbH)2V(#7B&ma z#j-p0x!Xp5CEvM)8#(bSZOiG3=+-E!aknC6HZ_8V(N5032DA3#ofK@^{h8@gl5_ZcD`{lD3r zINj1r?86@-9z~mVzjq4Xv5|}iJ>@^@w2vHZFfPc?EoBJ}yH(qOI zBV2ZSHOgl+>TqqSoy%hUHIc4+@l@7kqztQ+T1&bN2oIq+FNaxr5IYAj9*~a&R}G6&$vVvZ)> zwYS?8;VlwV*F~J}y6rSh1U$KXVpXJv@5?KEF*?8YPVmqY%fzJyRX$wR=&amIl z_k@ftdjtGfESd1eOkL~8 z7N)}5`BCpv^VF?cm;E26Hlgt?F~0EvG-h?G#@?fGWbQWWrfzmKpK8lzWbcVS4|_22 z@uA)2_iuAhmem4-*|Ci+dEP=))awKbr7Lpl90`l7oT&^kr&+YA#you7p%jh6otWTI6s=VzG7&x`FFgIUPT`(V8P>b`N(Kn{LY;;ccq;O3-xVQ9^lxXM-p9=Qk^&@9&w zRC|Mit7qxwu+cK<8g_Qg!C`l(f?ztr^?4$nxn=B;om$D8pe<~B2y0=$M{h8~mqb(V z<%fvNV`Wk>OYfE%^*ko?!ynxm?si43rG!^^>$Sh=)cd8;Q8@61)6&N}`735I*CKelX@A#Tq*$V|TGwb@Jq5NANrVqB$V)uf8#=U~9ji zxwd+f00c0-3> zMLlCCUTo8eEoN%JQ*NwU`BvdaiA|2WLoL<=5U#2zSu$*-!KAR#5S*I2OZ(ZuV@t(n_5A`(t{O;E!dn{F$b?ls^D>To$@ zmn!NpVzz#^fJtD^?(hp9l{mN?Ne6|+I$*IC?*m4voAgH<9h(>EZ9_DjvB8-148Wav~ znw_#0fmn`l(|^%)3*3P7o!@yDqch06-h+?!?uO0L%ZLR&ldAOEk9Euj)xtSBId+r5 zmgRyL<|2W2b?@UDqTQ=hZb%|340z!D`Hr>c8|8pdITxnXhfIu&T8;;a=Jls7R78&N zte-|BW(g9*WWYlZv$;UrJn_WB`riAwqsVnNzBZfow@c<`wY1CTeF}vr$R6^=zAye# z%l?kG71BVohdODSsj}5&dvnra58zZ4AN^7OwPs-Z9caMn+*eW8vFQ|MY78lg-?p^V zDHlUh{^j@)tpVm7y;y&(^P#om;YqyG*=Ie9I8FmmH9O)&`*#%mER6@pFEFC=hD?siqy1o@4xN|!W@N4?cdUu?75Ken#L zD2adI@~~$GchK${IuD8@>AnrOxbIM{ub{t3%DfE{7P+r}|I0`%Qi0Qh+zC@znmVG^ z$ifK1mN;QhbCOVLKI@BSGycKL=k~R|BtQ+CMCSuV8Z;HJ8)TDNGW%ZMb_7{6d49S-b!_t@8#q1InyredziNG3Lx=*R&9_T;N%=R% zC`#Vv=bL^^r_YVYvYW)YlbEVYSi5-bAOTgFl1L$HFSJV>))ddyRU6C@#hM6KjLV?y z4ymw5Z74Cv!_T;ry+)^dOHSbKDwB&ul!n7+8fSDzUYGA!*?Mn=aPm*q?GzTdWZ?O! zH7C)iXr@_yp1fvrnWBh3F5Ceq#)soHs%|_1J)SlO`lr%?3`tGpD>u8q-_HV2R(?TK zmfVx4UMj0n1*CUbTP>*Z>AKIVLi1RiA8uZ}<8*53AX~`)rYHFT^*U69DgydIPd)&! z-4w;uiR2W*KZ0^l!1M(>Emb>hX}Ilm(rqD%D`S{c`EE}J(WRIGrpRP$zfl6gVs&r4 zf6`mDw`b=z=SSpS8M5pi;32r#YNcG^G{rSW%exi9r=y^cqgAfe%M-qso_YvSMj+6r_2C!FS^d1FFCq6Ze#CuN4Z6!;uGNvI;+vwDMmz#&`uY?m%V++j2|4PlqK@GiZY-yU;evmxh8ubRKCkFewk+uFrKQ}+WT!yu( zW%k~ZGq|6yAm1K2cK=8{l#dQOY{q1Dn_ltQ_A152b$FveEOl@kO}F7*oahH~u@r5i zsOyvxk|JlPIO&hR5r>84_kHBpl#ICmErhbiSRmAY)omob0E*!XbrfHbq{@WabogUKfITbr}v71F1?Hi<)=HDMr zD1X{D=vK8mg$HMf4=FfeYv6t9ij)D>uB<~d+T&OGn#uwkiIo<`EX5hs)sxk&_W*AM zy#15A%N@k`1_PIqI3C9>a#*YJ0d1lGOOgsTG6h4MmxP# z-Mz$7(X9`CYVuQr^uY!{7%`|PUwPbfyi4lQZ6B3E(R@>%!i%_hfE^^2%_A#a#7ETW zXMl=pSK<9zTXTWHAHiZc96nRqXnNBa>6m9dThCy>3BnQq55Pzq0lDP_$e&kJLT6vt zJ~sLuk6&!GH~fWD3s*BM$xgcKsfIC3oOQPlXh@??F$ER3H%W!9Bzg{Z5&SmSX^-R4 zMry6u zN6v?2^n+cnQ9xB3Qw;)h<(uGbx#tr&`WAX6Y1!^<~|tkJHjtY%?i96GpS{52HUy zsvORt#j@0eeg6H321GI-;)}q@8W9!ZCK=B|OAzv4r*4+YuUh3gn>Di+1yi-WRm4?^ zab7jx88Qp3>q{pFeS*ghqYgOWj?VV=kzC6+7A&g$7f_h-lj%fG^Vl`|7==+fE>7FEqjJ4T zCFT*0;_|T~xx80xC|mzM8|GBbi&G!xAmT@7B~6oeYnL6ZF}Yq~(d+t5JDr)DM4CLj z5X)bA5w&u%wsw=E{|Oesy{ZB`a&*K}@K{B;ccK=^u<%Ns`2hKP%!Y(ahKT4W*&|oz zZUB#W3nV)6{4ja;$)|$0TVv!!ZR4&SppNBl?G3`sR|f-lJhwy1>I=#bzG9IuyU9;K zwI}+>rXGHGk5s0c4JY1bKLNE(zrWJK3qq{k4@$4`fnb)I_xzK*CsB-m;LCzd0R50OkF$R6p+4>#o}Iuv{tn`60q0H7^evtr zjPA%D`SBWi^JUr^K^*z_#orTn2oE9u8@VZ%po~lA^}D`@!)VeR_2DH7s!E1C78A>Z zJ?@n>RYwGF1*Rc_L{p0{Kq@tLe7JQM#^9Ia{0c%o85w`|M;50KG^P)4$sScb0u2#} zpT(o7H%x950h!byE6-fMERDJB!z_n z^j!~^eV7dpnd#k9-6uO=#g*{glFAF^o7n!HaWIb$X}qv+W9Agi6)D2Ruc}TH`p~acq^jh4p-!7Pq+?>dZ9!k3&+& z2mI>@G@l%K$WWZ#a>+wL2yxw!C(F$ewh zoz#)yFyao~)H8-W_&HBXlxNp~h^X=O^CNMu#YRZ)6fk~ra-bwZAdm{99+D}*2xwU$ z5Mcz43NqfIW!Aat7(dL^&-sUK03=mt8Zh+9M^&%5Umbx+a2Y0{13>=l$Y=*GOT&-m zS47S;#RLKMvu75$JVylPe!l4c8Z^?o2!LgXA;AF_n@~$I3St0;tR#H|C69R63$0v| z2@t*)bnX${0Wo3O{U=7_07-kz{lmrwPhVd6q6~rdtEj7+#_70b@4FJa(+*YrgZIx+ zg=oP9LUv~`9^v`L!{`_fc&yxOtM=0Av4SU6-~V?GfX8usL1zbdUg80b_l=PlMU*g4 z0i-|>2*Qq5gs&!apohwCfZfOgfkTWF@aI$BsJAx}Sq&hiX?1SqZ$)|`1Yn!J{5xep zJI3b0Hr<)TYcD&5n*~WUQi91!#pN~cLg5F+u=v|=gTAMubCws}$Acf0scvw!BtEUP zDxW6OWQyJVnz6k$W;xZXyz8KsVlm#YSnm$Eh630(kU&SB1m^5A>G|6-5}lY&E+oM?_Ae8=g(&E{o`3r%oz6=;~rB)%EJfB z<&TYzPgBKeMkX=jZ37EhbhEqYYw8HUVHvd*5zp7f{+A&zgkY;K1LDXMvmbnnoYVkwB}C#1N_KsnT~0 zHa}ipMoVH502Z*Jh%1qX#+d5GDKrCUH_5i&8io4@nhU`Fnr~%=N6VXaCu=K=D3&s% zV{3ClK}Lnq?@C_5>)$QOXi<621&noT38qj}Ou?`bro4)Z@gsew(=dcmeTaHdRY`95 zbN+*|ifv53X97<#4dlXhEOZc`m# z_%&g^rZ5+j&IBA|FNo@$Pi!Pu%V`&;0S>N|008VzY40CS=P6-}e2foTaHIvpzMOmM zs*$!bXm6%fz~eNF3ww1w=d_+Yhxjf3Ef81UH}h*r{^|n6tPxP3ttPYeTLQFs$LzcTq18yzXT)Rl;@bbISt1%1_^=OmX8$m#Re z-#=7}YO%aKRK1f9IQgu`0sno{wh-^5qy5usM4^(9g~GLX&4%`3ZJ5hezBV?$s7)W+ zK0FkEgIZ0zmd#Ed+yOk4Fr(Zgyq277pQq~`R@F7sYkp|bD?>T-H7T*{Vs38C^ri=c z>+F$_!d6&Z&#jVv!7i;OLF8YWYAC0EClr9l8=64c$mnt`V#>S;N!!je>fp9J);@GrU=n0`W$_C z7~446_hP{;ffEl2Y66{G&6dwt8p2S)&_9gMKG9A_P}gw)+rxKP#eUxCs`11wvd+#3 z3WY5COv&|Tf#Srbgh?#XnzA#~wpW3V&F?L)8_qBn6~!6bPIkF)7`#se?=A+CtHe?R z$Ej`(!aVF^{RwRX!YbJZ_(s(F3+wiH5MQe)dI7d#27x9?cRpR>I{jPJ{{(ba5$Dj|`o71Y~r&s6yLj#~eB0xt7nr@2gVb>{;ZENi{ zFeHJ*vz~ku0&Q-A+KNg)K@*n(WB-PeCeDlU5=2HoWlhpxKJBJ2ngny%NQw13MwbHN zGOFz`EzAv_K1_b%R4}qE!T^m%8`N5O>gIN3_ZBF$%lU4YDT^@wL68S9Q-Gl|l4d#m zF*~*ZoJ|fRvM;#d2^w#9Ewu{g>Kq*vP*?S}m+0_TEZoERPm6(AJ*5;q>?w(qh~25hy}nyc8X zQ5u%!Yx$yuORJf0Ol#OOEiILlP>FoRU`m5_$1;Y;IXPysEwE2dJ5sQy+JB&d`aYeo zIEcz-<;j1~Kz ziOVv`o@4BQlQNh!U*G!q{W0tVJX$y~Vx`YCVL&?-p`&YFmP#U&uutqG@tyhb;huMA za17vit+_|hskpF)*cvKP8&hmkbd?tc_Q33vdme#{{T<^TczG-Z>=@y5FEd-He@z7U zg{ZX+`}$24W!8y9lhyv^<9I-`&p4STCDS_?in@wxZbO^tlEnsJl%Fxa^uhR#&F`7? z0^$J9&~Gx-;R39uUD+AjaXCBu4qB1j;jx%nn{u*EV?o{ZK9lV7Z030RGfpk*O`egO z@i6pzzeYNU13s{ZPqhvkY`2SJnLY1GX{^L)#jgha%?D=g*XE9SHB4(w1}1Jj7d$?y zJld`IbdA5pNuWiO?@ zAJ)P>&a7JbjrI-pf@aZ5vcv>(9ry6@1K3CeGmmYVh87OkSJ9tNu@thm)vyQ3z&wZoMFj4QnH05*LG<&t(MX*Vnop7&pw@?IQ#@h1rAnXgH?_^r)lD=-c2CWn`h z586!XpiMfsxQ5Kc6~QN5?nzXLrGWd+L8X{R{dd5yL05s+k&DDU<$_X6w`~oF+l%#? z;mn${(C05+%m5?BUbov;&Uc=LPZ%CHMF}yhQQIGK2?MV#PaR`Q*1>z-t}(a-Q=)c* z+wF%^sC0RVxx0J3bjSZzN1*m%!2W`^)BtjEW}xl~RDts}(5v7ZZg5pv4Z0Zqm;hIq zw39Zp>)p7+yD@*8wp~}GW@T?XWn6*F>kz100H+W)>)Oa>m0mwaRaY4hek6Vi+(-DC zz%7L%e0K*z6iT2=JNilzch721?97;oPBJ&T+H z{2o#RZSdG@ymN;JZ|r`HdTlfC16YjkQ>7e_(*21`X;NK>U-Rxh2c+rSTEkXHFCox0 zwMQGsCMz1P#K0{XLWNjlA8WawBeR(}p@%NL*3R+o+VD&X)Td}Jx6pVus2LLlaBhx< z-RkV#j9#xKW28aa?ZtxO~0%=D$rlry%8X@{et2 z66nx>EUBOeWE$kaeaKU(l5y;b4r`=Ar)H(L3E)Rp$5dyl)siCb4s4{ zyTYmcc>Czz!3&@~sb}}E$Lq;7R?DAXOhaLmil()%21yJpjanZLXfA;#{+I?$6FvC$ z7qh!=e(?W}+5s2!Jg^6#WykX=eRtho<0+h->aB(O`V$=G5SEII1izEhleni>?T_=8 zAO!Ck-^msggtQe@woB8@NThD)_03e)TwyHH3tY3L#6jhQwA3)MyQ8EAr=4%^w@;o5GP9u~5aw8)d>T-+dvY~c7?h@Yax;)-A0FK#W5YsmK z!PGR_JAB1%N)yCjX_!f~&D7ePa!z5*mG$*8lG7=*7-C-;_7>dGL|VM^p?%Jzt*6nk zPuN&j@Yw1);qTid=#5|{{D$7g4Q(?JP-!owL7!tb;NqzSSe}q^A=_3Wz(`8D}jl(YwFyehyO)9yUE&C+QM49i)U3yQV#bHERUh3ymH& zrTK*mPU}RVyi4LO?auBbwg|0B(@{|{KdLe-%X#`?ckT-*a z@~&n&0hgul)(9C|r_M&i1?90o&cldY+CO48)1)2r;Gv|NvVc~Ae>iFC^-jFk)EIjl z23(pcIW|WITx0dcOKAFPkb=S1kGCf4El-T zknnVSVi;`?K#$Ln6s9}yc^RL16+#_SP!Qiq+kwdZ<5#|sO+=9?_mczRB^hA_D(rv! zHP^ciHx|LNsZm+cO_Z>Z(eX)?+S$^!@zZ=Z4g6|cUN=D7^byU>K;c=mclUcB>YM6L}_gTE{{0jqw9paN=)YjTS)d2OnRT(hM?l+S|&? z#l+czRI1;RC)?LY z167~}YZ_!4YOw{@+`sWn{=+=q4HMaX!&H@Fq0f-*(P?fM`H%9OeHPu-NOYiEXEk^ z6WWi;o7kQMl4{pylFXk_AE;*m-=WO#E^wWeft{vb;3P+m3X%y$^^T2KTI)U(BbPA% z)HA2(90qdw#-NR~_fN-$pdmjDWFdh&&Mir;pJSs^v>teFQE9xzIg>1)SWbF%GYfF74~b^)GGm<}IxLQ65A~_!n^6StoLz zZBGnv&xON_?IKiydLix2WT&yMe_U?+Y(xBi{TcV&7retU59E0cGj)-IW& zU+aTxuyT!#7Un(R3bilgb6tH=wnz6You6Bx+E=O33NeXVKlNLSn{n}7JIUc^u6#i< z4037z8DlHV014clB6HB|!oasO=mWZyB=u{yY>IdKT2()z%UP64w+5koPErN9FVbwG`q&8!h<)};@e&Q>nZNNbbghjxr=3o9;8q?d}?G!ONjO@HO`U zCTMF`ZGF;!Xd={rjyG1&Z1>EyyoOb~{*RD;4R>GOrjpRTqPdDRIdu@F2LEB%P;UOOGzt5c}3F@n$Zxkjk-7%h_?$Bp#1`CQ4lviTx=LPmI z?&tj)A69u!9Gdg=PE8T2>h@oZ()ndM9Fn0!TT*5SK7ReMknTzU`rdU8zkrwQx{JT1 zWCsg6kLx)51f64jDmmJGRH04u$J4n!AGPvpep^Sqp}0qXVBtdV{Dh{UO995lxUn$H z$azVSpz2$`(X!XLhztVNXiV5ekplu-7G0K@^nW=FoxA|p{n*B!Q`~_S;5CsH!JN=$ zpGd$dlnEFSA8UQc^!n^Cy<+<=>PKO3#}VRx@G1Nb0Je(-iP2(7$no|>UxnfPMni#m z`DRV)roXn&W6bUn39E#!S?n11zQhyg++n%;td7g_&`IvP`vnbUIq8&?#A@}!@iUjy z3gV8i;O}_}lMBkpt{&IwG#Zf%CaPT+zwgy?hcOSdF^?)uvsFvbpKXumqbt<8H`*m~ z7{1pXJC895*Yu_?;0xXH+g0% z%jdf<9m>{f`;-OB9o54=$J;L5SGP34a#tZ9aD@g56lx6G^{~KDjTc!hZcsib!9!GU zyJolP9#h6>TI^MlFQ6QpfzQ5=UVNBk>!!zez0ZVBsq;q;jl+H7= zLJvqLWfuM+rHAu#NlM)s9oM!t^ChbXRr<=squMxbJoopB!`KH3n4(*WpdTCRayJJd z37`e{54Hxb6YbM4MXqfhK70_N%iM+9fd3z!oEg*za${J~%TQ(ssh=3rS3nb6(YimGt? zJ(HAs>S8CL4PpXc^FsTqT~0HZ-MSEpfm3w`aF@Hlf@0O7q93Yi|_^V>C+KJ5h z0n+*Vj7kAerD!c}!M_+GNjXMH{NYgFr==PDW?bB|{G)s{-V8w<#X1p&ereq1gq+0j z#I2r_a{dV}tvg4id<^G3c}{2oL-&Jz%ykuLFkd)te~|>as`9vc;Q}g8Ghn~?;|BrV z1-nqQ3N)3t=k4^_`Qd8ME@)M>(9wUp`q*=k{>3CNL61H$25LF}`fHi&sMg6M{TNnR z%Iv-`&*yPIXqfi2oXx(FDMR zjFDMzzcP2ZSdA?J#s+9jGX=~5nxU6pgcQza7H;2t{``55ulo)f-2PnL2Bgyjmf{`u z|EbJqQw7p;r{A4!%x$H#PL_QRyz)KaviN~%SfN6)H6GrjKCTQ2qH~f^43~@&I+9?K z)wr-G`FFb!BISjVk?jplh1N)p4M#*YO`vu9OZZFxIt8?~Fe@jxG{mG^pCum@jetm$ zasfS(=BwRbINh&5@&e7>yxFIfKg(<28?Oh*k=N8VPO|p<0wQt&a1CdoJplR<11Bq4V7?!w_`A{+z`DIIE&7D|85ug zTS`jo&*m^9LI9!ECD7nDZb;29FGQZhXC`%@gbeIW(bw`=?XOv`mil>X4MY}FnAFJ` zlzv}DnoR4LD)k1l-X(Fm&s!~scx>kD9U$Hh_ppI>$vm`iVgiYncP3Cp$$YxJ>$Tv` zvCmt|LEw2<%s2c?Q&LxO03)^F=R;kjv=rfC$`y9RGGCr6pxJ7>&2* z-Ge;EVB1k`jKgg!bHF#F37H+DK9(vko-Zz$FEmk2C0lRR_*tRuzI9GG8BN*zq-^r1ls;0T2>02!ImqoKw5{}iq+e-*C( zx8M{==>1-OsoZ4547Ag3M%r^}k1&AfgAXKb$MXKEtDe2ETUF*kPMkX_;gSll^v)a_ zHA+(nxiPya4Lq)C(zg3o^h+jpB}QfZ3To!+3a3Z4$PT~9U(RG^HFF=mArE9EF~r^; zDd4F*IBJe-%cm*=l-jhv!d~&-ciH0HhA%N9n3o-MmN&Kbl@%rAWQE~Oc#T8HG2>t9 zHgMaASWau2C!!!{v8;ql12Q$Ne(Xb=*Zl2P>r<;EQBWd6v{`(dN$n@rN}A|)3zm+2 z$&Hc2?R5-)+H3xkQRPhV6hp5A7}d)=SE)d;y9mgTQUSNiNzn3;B+))StzNehDIFPQ zWX}(6X(-JcS_$z%6YykQRT|$mz%O5})UN#MT3>LU);2v>e-~QVwD!5yB0goqG|eQ4 zCSSwcV`F52$;GMWCf$)VDgA8tb{g#U>T`Xa!RIHQ3CUz3s%0s?Iz>Wun!@`hJ6tlK_ z{KkN%sf0XnfOK%R;IW6Ne!p!ZqtU4PPM-sg?ljEt7&oSd}-0o21k zMbVmyRFh9fbhP%qnm6bnxP(<$^%dXQvV{i~t7#c=J|AGsOZdRJ>CA0F9M0NVnL+d} zep3?uF#Fbfggsv$-}^^!mW}jb>j8Tbg?H2Eh_`NIChuO#QZduWbm{0C_e$#V=s5_m zat7_Q2n65+hrL=vAKbTEWc|Xv<*52qKB?B-KiHezcx;~~;+b~WzSV(pv6Fr<_l08e z)A1q@r%}Ul$o%8CR}by@^mGEXG| z(cc?_P{VxkMgq?j-xu9@2rqePg7K8&ZaHURVTm+(z_FT(H}1M@%dxt8K((=@tqr$q z8~i&S^dINl`&O(fZ^=1D?DQ&loR721-u%>h^Q5^m)w&)39VoZfVjHhBc{_aFeabz+ zcGU+pFsiGouY{AG81=*;n}|Yfj^=1su!h@!WkTBem4q(<3A_V)J#BWz=jErKS)~}Z z-xjKW5fz7cMhjO~II5be8n>@WwF}rT`;O;gt5u~n4W2Tv!vzMX6nMw5a5~-%M)Ev` z(-B911B>-gT<_+}2{^H34SL#yFjv38)y^?qqYa?SuVgp3!9g$FayJ_M-_y6ZHn%=-h2Ozgm7U8HS zLdlGpG{Sx7i8`bJYyqIu#eqT4g*Jp1flLcg2%FWnVs>4)nqP08S^hBz;_)rr4}x{# z0DGtKR$r${DT@zgq~}#%o4tH-NU9nlX6Ix=sSSI5*67Ywsi#(c-O?$0OnhF|%+V+9 zrS+cB%9rL>pKBJAe6zj<;N-q(U|kU5TqVhZ`TO<$^+!l3Fz$gt^%$@VjqJ_hMzRZn z{>?q_nivMPrF;!=Y)@Bsj&MJ;UFisLaZ@Z}Zsz6%p4do3Sg}l6Jcc2hGUl-3lh1<9 z>mQk*;V6BBSQzZc8th4tl(-mu6G_hbh|d_Xh&Y8W^VZO%$I_o=7Dg2>(K%v`rE8 zXb^t8LMhBpf`be(4GBJ$deaZUI*K+GPJ#RW~gwI+V_;<6urFTbAglC#j(&8o$4wLE? zXP+z@V86QwV2bPheb0r^&ms1_}`24mW0xy>e7pdt}PQkRyYQ zLX=qwy*^NTH^-bZN)2QYf zy}8@hsjK;#5L?uoa>^%OgJ=GQu1#YH{xzLK&7^#Ses4}lb+E*Y`Sh4C&+OxdV|Z+r zJaK#T;RRdwbZHB;SM*$tNgU?zG2Bo7!lIlPxRdl4EHbDVaGr_!`wofzz0|H2#=ly= z&$x@>3a7Jh$&iNmz`MEKPP~9bW{9}FF-GszP_EKkV8By7=6 zmu|loeU%mgUzkSu^L5YPH0X=YTE4fmM@XAmi!F{Jd!;`o!N#ayCFir99zDp-e$31E z?nL-Ut$VFBiuFlGl>nR1pnT4H7k~bToO>&2laGW@;G~a-kMpA-A-spGZQ7e#j>)7( z>%BR$^yK?^{eJTAu|@k!cwgHUb7zP_oFoHfS#~w*KE4Zi1kPvaL9bLhE0kMj5+y;H zUd-c+Z<)9mtl{`d6oIiA8AnhKoJR8+|G3qd^hyR1(k2OGt9IHf3_V z5Pc`(CPu=bK<~BG-Ty2JfoNcW1QKV>2a%(RkiLQ?y8YgeThj09vZrb5X79!m4!`!v z7pX8PGK4IG-6zu$+cr5z5Zg2Z;%{{H6sZ$$7GJp2wd!Rxorv?J@#0NK&Bz2sI>~7^ zf)kPZ9|<8y=zQ1AYd*w61N`qu^fVW;iiA+d#D!g2*7cb(gYtk9)QQG&pI+P~%qS5@ z(dcuIH zb|x$R%G~*onu4O+-7h=Vyka8R^ws*yt~8@7t8FVcO8i#B;^CbCd3iCAY;iM&p@|A? zPwP()@)b#yvgKZU}XKLL%@-FrQUr(*TICR2Q(;i;On#J0y4*!1W(DGKr*hj>d50{riwg+Iy?T8yT?2Vq|Z%y6Qfp{|&iuc!puM}y8ftWW4TesMrFcd-}l?T0T`0SQoRc~VRJIva3(Nq){ z?5#(#PGtqDaCSody7BZn#8jVgw(yxkB#Ds&c`e(mD;{CPrBZfrN(`&+D%`*NHz*LJ z_2SV+kMn`ZFf(${CvNqwe($w*tSYr;!GBKB&|sQs8mqU5zLk=`tF;cf(`8ca-Kg!{ z40YR-A319#8!0oAyKE6qW7DDAvcjnv&?cgJY479?Nrf<7{;RyW-oxd#J ze%jX=c;gbvEyAT8pod0i{@*w8lotofT6S|mFf9z`BhO8v%y+W(HEN3TG4m`4Qw8*E ziFFexF%BBvuE>wV^n{MW+=)!_4HU(b5DlzQD>HGA=&!SK z!eErQ!(IACwnCJgK?FCZq+(>dHzD3z9&HFk*lXPk#%4VBZEm^LS3T^bD8VSjllu3f zv#FeT8;{0IF;Rc$=1vr`@SpOj+4Y-|$~N;fp7?MdG}g)XSPI6QKc(1fgN23!d+-_(Ojm+>198&Q zs!6xVnX)c#x$fV2$%&fh6HFec`m9k^Dw)bcA|uIwhIn#MQUw57Z&Obn;c(Ifr!K3| z;AWP~V$Gig;bdp%`l(J6*?Ip16k3FZv@rd>6GG)-v(R879-D0-Q}pt8PPTChKNkon zmwX}qfnxNgHkIGJ(D|^cq%df-FVn30K!kmJ{-hoC*U(Ape0d&OzFy55v8dPkClFl0 zJ%(5+Jw6G}7>khE)R=n@TmyaJnw0PJpUGKrAuQ{7IKC%H>z$k_HLYr$u5aN~#Kja{ zYdLO_R4V&g#L>wyg_jd>n8I0-i0EU&<38KniuAD$yKQ{@XQT?D~c%$XQ%KtLQbf;Sc8YrIXCLcxhs5%{&H zc=mWv6QAh4RSUsu_hLx?9Nj0ki&ns`HTsGzM$Zu{z8ZhEv(@! z>nxWhHhQNwLb|B#?7KN^;lVwR-A~-Q1^UBp+kcj$L8S#z4TERnS`=zLu( z;!WBPf4cOM)UKrlmnh>ueBdi8ltGPyBAqOS9(M>KEry@P)Wd&7>vAcEbxVwpZtZ|% zb7afQZZ-Xmg6=Ka=yIjekvG|X&La$Eo}&3=Oi`4Q@Z)@QmJVhO-L=!k2$UU9wA1ph zQq6vyfkvFiZcJY6?~P8A5)cb4GZ?%K^_as2o>J?)Vj{cDY#7J>w-nX=0DWyA_$})- z#M3q)r0?M$%VRs9h)p&9%7r{6tJ|-*>3>yOQD7^H!UlWt32Yc`Uv5(|o~B1|w~N%Y zrFra}$*~5M@57%6k@QDOk%M2i7Teh<2;-UyQeros+OmmPGQgk*c;4(}N>1|EUZ>a? zX-M3oW~T(3&P<$y7lwL>naw-8HMxP`d(GXZ-?=yoiHR$X56Gnu4ix9Kh1SsOs3 z;liDg?YB1--fNc$&bGqhMwffi1i8Z}Z!B1=Nn(;YN9;rX+C+pq6N+D-$^u$ie4mFn- zt6$dWJHq0T>*v-y$4%DEPn6({+l1^So6`bn5m$x|w!*n5-{{%D{@ok{HuU5}ws*gK z@n>-)k_@9Sev8Z)&&4wP!xMpd{(lMkEoM8h56<>QNv&Pkk zasmcDehm^?os4Fm;o(=x4G!+=@OuGlB>0cqtqRJQxbi+FQK5?;OG^!ZaaRAgoTFxk zmdr;ZV=fA?)Mzl2dXsGA^gL1#xxSoBUrUTTepP-vD&zI9ruPabi|bI)=6$zTQIYVN zvtnXmexn$6SIL;wCI7pNWUF{Mxm3W{#~F>qeo}{zwxh9B$AzecSkOqt;E*Dm^N(Ed zXP+*@(N6Fm|AdzT`}U?{vIZG^pbg(DN{=HmX{>ZogMT>x_?V0E@k!HM#e{MksL$ex)FUfBPE6o%IH`5v zrXqbLzslF?iTn9VtTBnY7Y{o{aP(FnMy{puws)u|+<LTUtPo9fbaQlA^oXZ3mQJH?&XxY6o3_`gvBS<><1Rxd|7-`I zfnH$22|WB}mkQ?QFEkQ0R}=+vBt)U{h%-9kNkl`cD=Bxw#|;GOhsZnQrvU7tc<&LF+k#`Piwra`9HXrIZ9hK{4K&nJvDz{S3o$|#+ zKY1ET?9+yd<3%|Hc@VZTQ0%9apRmcGPZ)fgHTpOJw?AI~*BZnPlK*~Wzq+-CwX|{2 zNb~%92$HC?;WBjb?-wc&1MH~h;QrPY+uilRomqVf#X=AA1as*thaiF(% zVitrrtbvvy+NM`i-q>Qc))6&ENhi&5s34f~w(jOupen#J~FGiGv>rGDrV;!AJTtj{+gJ&;7 z1>+puUw~T66=- z&KqP$;2hmqwQoE;JiVzmQ>pkH*czw1Gsd^4mUoqtJsa0SO+Lb(*1HO|W|#US^p@#& z)z|<9cRh6d7I*Lw*&e#yL2M<%Hx{xoL7D<1_m!6^j0RoSNE<)5t4mFf_Nwm_QJ&U5 zU?!jognUnN?gqJcjjT{<_@bxrT3SI~e(@b2@SFBQSqxHFB!dD45&zym%up*WBE^-v)ZZ z+bB4#doUkFBwjC)+@|wtaEftDw?GeAuC6I;xcs2-;StU?h)VBg6RE?7;t)F(yKz?O zc1~uU4|`=79mLxuQeF9^R`3wKmIX^5v%twBhq&#SesNg=6nvKK|p_KZMGY%w%uLg5481|a_ zC24XA`lgYIVh9RGNcAs`KMc&P&)0*{`UvE!OTJ>%qCS#s57`K%`)6Q-n`eU^<*+t9 zrwV;p94hLM5)YC=niQ2$YK4ge-DkxWW*a>kFFe)=$+0OwY3dnk9?I4pvvWz50yvefQZbHJ$ z-mRtOwzj!kk8fV1X@xraMfrK;SDU9p@=ru2*C~pNhq|dd$Uef2+txIO-}Sg3qMIN` zEic(5e-_tfuVF5r|A*>X#s>Tn_GZ_oPd|Acn@$4100^XZ2N}z81sYWeRiGVe?PlM@ zqZZY;ZXR?eCW3alBYV^+&tcnDRH;4A_g&s-Xf=MO?1TXRaC34Pja5OwBNgL+osE-jxRbrDqG}!;PxEqnJY3G}W z-Sl()Ef#B192GN}#uv{&Za+|HeC_e_)G>aLSjJ8Kl2?A(68G>MlmINRJ~HCGTndEO zE@twIuW8FxJ@V^(5)kvT+_?6{{7#Otikk$u*bZF z4~$?qY0>GMMMZsD2VB?K_hu)6m)HVCMkbVWgQmIV!n^aZJFW~OTri<*bOP-Gex0(zHXwL+9k4d^^8oTg zZ{YD###=sEk%fh6&nIQAVlga*vue}k(;w)2{ieC!eL;3?99ttkYMHw`DSWhsTZbAK z)6R3$fE|o{7`2#>@8$8MtT_&KG$yWW2NmyjcA)W)JhtnZN@5NEMD^rU&4Qi4U9G7O z8;b#C@t{lY&6h-QTifS{aDPG7FgE`IN}*LeihcQ2}wy<@ez39f9qF` z?NS^znfP_%vgTfzth{`^E-c#ZSF-WMG3?ks2H;jGoA-w`n(g-VtE=1uyR z;npG{)Y#I}VZj%oNUr?r`vZE6uQUQsT?`4DRw`ilSWL~Q=`p&6?^~em{4L5uz1>lB zg|{hZKRVD^eR4bS%Rn4Y#xJQ17iZ@r-b}k3!cuf|JRF#h?dMd87TE#0Id4D0o+C2A zY@WXz(y<8ro*&6kKjZD4=BNuh9b^f>nh4qwmlu~Wy3;odq7%aY>-InZA&EPfSE zOqskx`OaJZ77YwSO*A}Anng!0c@v-HSFJB0X@Z0}#j=T<@l`)o8P`C%$w}SZ2{_(Z zt48;ym#$#GJoMNNVZPgSk6Ysa6pWV?P}Sekc_dLYJQOPYpZqQS3t#O_mGv#P1&`d+ zD5BYTjOBje^jgl<995!0g*}~GZ=4XwnCk1xY7>zD(oFFiWa!pPMJSUel74K9yu85w zAaQxw_caKvY>e8=&&+hc(@HIxGUSnU!_IO+@><<${brf*M(~X2Ldt2#ks4&uy8ZCy zO|82$)}jL*4XJJe%Z5Lt!I!(kAnxU&g>^-j${p)W-tCiSGF(bS<$>oTrS0Ft<#kb? zovHC}=!U`jB7RGhypHELu5^m>cY4-!vNP2OXWYTo-_EF&CYACy=GHN8;4@uQaT_(f z3O+Fmc#n&VF$2%;c#{TH$NEA)x%-zHJb#Pq?k~Y3H~f)GUut7z>TGV`eFB=FvIdeW zpF<;Y*>PI5+X}uec zZNM5GdiiJDf%rC%W=m*TrcUWH8dwSYB$5Y=-?3->bahiX7WEhOyXZo5JBWHjwDy!9 z_mU2O0x~Hd;T#cjQNtF!^SF72!Z>z6W@Ed{G@Mncb}NuMd4FK$Cu{HAZh zP<%_g>H$VWZMJVQpC={gcyI4VvUyDICmG1K&q4U}q7>>ck&Lvn*%#5< zc^X=?96(5CZwgZc%7#<$uYUUGB`;N0zP7EYi2Ny1X>D&PEHdL(v@564t31ta2=VaP z^K4m5$UG1%mmH?pPA8IL^II7pRiBHIq(V5R#5b)o2+rX(d-SDzG&A2!UKL$+b{f<# z&)qDNspPlWvW~mnz69qKcsr2lN@TU9Q>k)_WCZ;urXoS>yf-q37C)Vep;!_)>pHFh zgbR)raC2TF{Kvgu*l_rzC=QxY;QefOwq^u+94ix~0|aLi@eV+i+M~ zX`}n#4}OA@{1fFMslkYOufjv7zvzei1kjGyc z1%{?A_INCvmI!JbIB#}&Wn=qLl}#8`tyMhif&t}BVflG+YMKh`Pu0DD;cy*2c5LAiPGN>^6pFh<^*C<< z=&s?cejs0`asIQoVLCL!11^18`qfw5`s$i;El^(tg+0I!($U0zTqV@c0_lv!(ryuX z$=2>3i+3r}xMoz#cRv3j@w<8FIS2dpi1`%V#{=pMIvs%?pMLENTg92dQ@{jty$`ju z_Tk7o?c&eUH@DM%T`RrBhu@#!IVmMP`qH4%IJ};i)qnKMNw|hqwrps8xBUe}78{E5 z{vS9b!wG$m-+|;hc+H)nXhM%txoS4E5{K8@(JXHB%lg@%xjiFfTZ{b=dc3c+mt_0P zYokzB_8Bn@7yV1J#ia^{A^4stKZSLk7vjeUy>X)kL4yoR-njKyE$XxZL69Wi**-* ztBHFx{kpcy$TsCNPJv(V$WK^=5=ogYlKwl^xhOeJorsu>)-I(;&6pl-k3$1XRlK6~ z-z72w8s3G*o$8KrWWm(r7coWNwpNs9(|mH+N)iJQcn|WoN1u~eIgE?DG<%4QSj$OZG=P2(ztZS=tq4|wFjkr zI^c1zmRh!l7>H<(VNwU#20t_x~;cF3GuXMg~yvg~Y zNlMzlOZv#e>O<47ny$u9x-uUSy?AIgk`rTH*`a!^>$K*sW1%X(&@`r zwjF`bumoO>hm9+8DJ3D(l_8`b=;V{*&kQ$KaJRj&SC~~+|;XBNyH+q8#dzi*IKWD^w=8#wwqmr;+f1Xg&U(39Pd6}}f z$4a7ks44bR{0TFhrnJb!^FxOe&Kd zCs_#B&U>fKkl~f9Zi$?gAYteUlBrelw;CEWv(VzxeIh$DUqmK%8N7jEpsco^`Ypdp zVD;37vSHD}LX7$fYx3KlfoG-ZqtK_qAiyb(pfYLhdbyZ!aoA`?UtMn<7=-$y@6uPI znBq$ki2PcoAT=;L<%=y#Q!^!C_&{lx1dqcUo*S;1#CwdS;7Iq0Vj$R>5EMl8q@J15 z2}eXRW$v9Y$_&yNYbT81lg}oO5Ul!NkYRO@{|Z!X$X4RZJN}+gmuF`LupX$ltkrCTHHZ{+slHgWmj2;a^tBK;)QnJ$Eo%lM3sodtqhEthH5x>C-g}Gh{I(Yi*=BlmWrct~iz1aP^3)rfLF_tm zTzUu75Ed56BB^utaE>L784X=0ctq~oCc?A!j&kJI1xpmxJ{8knsFE`ikG zy;tDBsJUT~?3tzTE$|--FbPtXhPds1%!!{MP;2O~vIvX}kou@g*Ff!5q`Ih|e@)}Xy{+4`a?MB_}X z!H(BaNKCgJryVUGc@B3PkJQH~N&7ADVQs{0_a{27g{!{mHpWeNKx#M8T(~k*Q@}aP zftMH#<+5#Bq$>V!Y|oy-2s~U~f!Gdo2ALNTa-5y?KF3uQGcLK!({Iu&^AJ=Tp~4hv zbWGsqoq;?Y@Axgs~f7!JSu| zPq~&kI2#spJiELECQj?SpYAET)5?i6;-ni;<(X6FjEM2gx+f`etMZWp9O31wS zjH=74kR-0z(KzaeYD_{uUOx>fsZN?4jr2_uoFR4utrSn>DfU)W49oF^+p*cmL9#KM zV%cM&{*diNQ*Jv-*nKb}Y&i`8$eqx}4&T)3wX*E4NuX^WRvoTDA!f3sMZXjt;if{E%hXg!$Q zD85H^OdhK4kwVEuG`i`mKYIX|(1XAPX$9ltZ!S1pz=GYxQ&~_j5>fo+vwgo7Yx&8Q z4zKPNkEVx5hqZ#b%JNp@$L-Cv&2a-@qS5|(@KfCi_Yp$RdK-aZ{$en0!lQLp`trvL z^|yA?wSFn0!P0B4zugubkD3m>Yr=rDgJ@xAYw8sH%nNnt(Xa%JLYF#dnp<578Kf1fSh7dtY znxT;p81jyu@0@$@zxn&T^E^A&UTf`DB>)4Z+%^SR zJLc)Vc1IzS`=Dq zkAjZ$b@uG$RTeKZDvuYeIJ4CqQDt=V{-CZ9S6U2+JEHE#x!wGJ&+@m^tqG~oOgjZ? z@jCFZ_zX!ii9gBL=guaJrFrwo4fjL8h0_r6UIaDRr78aYmdR{uaBRUp#7yOh^a2oP zlZ1A&B!I{0@R)O$Lcs;-`H(@FRwE6l`fQPO_EcmKvMgEr^x3&V-*ubk5mUMbvm;zi zB0HW@Di3(XK<#`Pu~RIERz&8teR}Ye8+A?!I#fi9h|G@H`15ZY>w}!C>vf^d4slN{ z;2It)Y}op9thW52`=*L0z9u|;YMV*sV}h+6#Vx|}U1qq)()r}au!b6}-z6t_uIuK{KV&3~CA8>Uo;% z)6W{b^Yx2oti}shhUC|F+D6iY-AF_cVErEY)2n|3` zn3OgcUmPgURo1hA2CS_w?+qX5Tu-vfHQXVRUOPTI^wi`%@73^F)}ST}w7jyQ;3}L3 z_uFbWNpkLJQs~B*oNRd|3MtCwfCc6Cldq~|^$-%`CRFzhnhUVU%1>{smRspqtAQUx z6fI=w*}ulH`+Fe=#Da;#07R}O&g@D+fq8)?ktmXhBxk}LzL;eAgU|e1{kk8y<=GeT zejB!@I5M18C4RQ1OS-4c{k!J2mk2aC3!N8(y>s7LpZ_MSxK58yVzjAFpY;A#i`I7( zJuG-iuIr%yr<4eRT`GN~8?DVSHPiezIZ?2nNX9&0up2Nh<#Jd;o1VoG08UND8ZLI0 zGFlbykj;ST<0T4v%z;)*d50JcuD70@A3uxKBr&vwF?NMM@W!t>r_nVt!}T&H9+Gy& zyr%h+!`4C&!><)~yz*tg-|se;R2~*F#<(4azQd=_>Hep4*R+xAfPg-+^w}?Y=z#tY z8B#JRj@w=RIy}r7U%AnP^@seSWO%0S9)bR)q*8GgP?2G6i+}d$>WJ^C0}>ocSCm0k zKIyvI;ldHjm-oppZSdH8c4p;QgZ}5CNyhn0qpp$G$N1b4lXo@R<{V}+DBfef+c!*S zy3sr1Yz9HK1J^PmeKXLEr5uP2{YsGe;br*41J5VxH6r`h(b|Bg_J8Sg#z0WlN4ooR zJAK;bZ{=S(1=aF*_=%3ici4>qk%15U>aU3cU)4J4o!`j25a1E$=lMPIHkBl};L&e& z^L8_j#Vt(XI&1$r6n^k{s%I23gd?6*zpw7`(iEX5;^&g8+3+;86VDZ?;eH>&VAbqL zYdFehSbvu}E5BD2+Z%sXRU|&L=gC1ih1Zb~lk@T|_HdG!K3djHOoCdx^oGL1KzhX^ zWxF8qa(~do)8gMHMgCpqjS4Fg#QBiI?q>Ox&rK)JXURodX5VA=WENS-(Ul)tW-KgY zlw4}3IEm{S_71yWCT}>TM5)9hzIYb=iQ1B2^s0Cc-lSCg<{=R1y^xu}azsPE#iwV} zrYL?0gs~~IJz^0IPk=@LrS0OHZ4K~U-DbzC)1k>tz^6R?$HOe(>e&H-< ztD*0V=m|mOvtaDPd>Gbf`ZFUXU?FYIj~1p zv?iZXx~tQ2O^Hp3>9VNRAwWl8T9X*gFscuIqL|SK3>$jDx<*lPetgBwd1-3>pr~&k z-fNm6V;#_(w#2np;Je?@i7`|3^pJD+Oius2pXE> zEJrRPDD;Rw_$`vU_yh>n*wZ~Y7|1TYO=QxJ9etDJy)kgQf_1~@@(L;C&yh^xWQaS= zx={M(Goi*FQzU4+A+2ELJjPys9#o`&1_sQFV#LgI{cP#Ey_+A6SQ212(>shY=0q5@HGDD=c_K=FB!znkXq0;na;W<7J#)#ZE|`>i4JiVT^R}R!1MxdeJbKiRM(hEnKd1pa`j-aHvlkyKt}X}fll&r?Piv`${cp6%_H;Q+!=*J4 zf3v-D2zWK0g%RpI8Y?2>DVNsNU!J>MvMk@zIWTLJinHu9v<{rcfR6rt07(se#G8Jk zjLZHELj5JVjke`9cv6GXeu=)!CqqWpOcB?ofv|<$g?qx!8Gi_MZkE8d7}nisi3zOH zr|?db{HpWdkHMlYVokato}-?D7T>`C>n>a;lqHpOY5`RmOk z4RS75XaAYxiKq7V^uCipvfsdpVrhO-ZkxiXZ1FhtrErP1tU?4mU%YW9s%bY zy^6DmKb~A?#+n<2)38k>zn2|YV~C+AJxvU7hN5O*nOjs{(;M73!W3 z`vT?`PmDwD3t)XS$GuBet7B0KSlppuPG^WDhtLC=b({MdxIky0SZL884#t}+GTE;Q zR?_-WTsk3c5zlWaukF{eQRLn7J44tkBikJ@dP>0- zO^u^SlNrURInS=-yqJWjY7!^}UPBb(C)_#4SjtT^FfcJ6Dc3Ls#@8~31W8Fo_^~TG z6k^V)zYh{t^JiDgv1Wo4bc@SKkJ7pX{GNw&Re9ipz?m$5Nr(>N&d=MN&JXh1cvBQg z=dd|%6`;kYq*In8_weCL87Jp7bn9-aJ@HO7RNS=Qi*(eJkSM#O=ERG^> z7lm4Lq0O`~jKsj(>~5mhx6alt9E4;WYRmV*M6>R~{I`OlyV&V=u5s=X556zbZcu|* z6!N_ApC;H9(<$IUv6H}i<*luN00$2*_B_#{PIV*gF1J<55>})y+tI_=o6wWMSeVnW zFm&lx;4MIN6`odPX!tRGZ4ZK7vgb7)v8b&mtxokd(wU)WP+;YE!iKhKXF(98?t0Av z;F<(7$r+8_CTUMw34p!XerxF~1#a#!M|G8Pco7UnV~Q1#D#2TA%OC}YcM|1$CtnUY*X2PEHh{LS4$br}YA)g_ z2HBpAeHcjSG@gm|3R3%U(`)qwjND&XE5I}3Ywmbq*4Nyd?@8h}^lS_a%AGr$+Noh5 zNG)#~6~q+YA9M!dK2Qt5tooXzEjlJ9I=RN8g@q=pvEp%-N6l^CfCY5&ihlk47`md ztwe~Q*8t8P?4krv^8i@)`M^U{^nR)pCkyDD2yLvwL@~Iw7J<8wrdOwVV$# zIV(Hd>8H$4|sXYmJ&p+G=rr+!~jAcr5<$=mhS6*#Ljj@Sx z(R0a0cG)q zQrIEn0neKr2X~=X{f-p*o5e*?GsaMti@S(-9<31J z>Dd3QKBLg)w~-8KzhDxBK;mnNT)X{Anb_I9kSSqAEg<2J-=k~xJcF5)`hEG|=zH6% z&u_N)zmZgNEH2HMML)|5Pi*KY;9C5l%(+sj?*_Y2j4AFby*;K@x_v6o$=wys(yJPO+bF%n`rU_+e zm&z0irFD8|UvJB7%5^=JJ4|#(o!BYl?8%-*@R>7#4tUpPgS;?3fcSCKIWmo7=S&nv zGb+N!(6hjVg^9A1#Se;7EClHPIMNH`#$H$lW331HjTYo&`x-xy6U=8yZyFGrX2oJt zza;ftLNdoYw|LkSGwc-*kTFU;P(~XFl#)yYA{`Rk>BaOo_xHpj-Wqoiu4GBBOSUJa zfHW*VZ^@%6)TlO8$K{HtyQz{uFeeI;5)v8q)^>rMh(2SLjanFK^Rbj|q%tmHX>E!h z@GgE^ndfx#WQAsAveU+!KK#o|+aiR6L6mpxcv6_{kn{-hRzGAOChD4##;Ab8HyHNt%(j!u=7aKnvan~v&@(0*!uVtfzT)0 zq3$D%lTNupjmkh!%6))T!>?{U!eo6{@szr{@G^<5aYnodcylw|F{x}adMP#-?;72+ zJFytvm|?O~2bkX5o(Q)k=GS)f>(RXVd;)E_`%O9WGk=AJP+aHycXGuY=Cru;E24G`rbQdM#4b2 zgyE===)ilFUb~ax220>Hd7o;7;G6!XO8ZGwAd{&dmF~YP#LY0tv*8U@BF+zhpKEwa zPde|W#XE)l4;2I!XJ?a+*xvTD&}XE}KF{Z0S4@|{4zNgI)ah{8$(ogh#(xn!oZc!U ztHUQz$p^j5n>t_zOqcW2;T%yz5`HJrE%*V+$+zjv)56lo1mWHX%Apm>LfP2+d7Ykn z*Jksa%K3G{c8cVpfk$?(*XCGUejrJF){t~3BslL}dWZDY0kyqT{+NR5y^ZZd{Y7b-K_l6z1ZkIyi$UJi^V;60+$JJU>#Lw7~FS8S&*`l?RNpR;z+i2 z?pUknK1nX!>`_MY$sW{P+81rKYm~ z5UZR-&w98nPkURAk4XuNC01#*T#x^mFB+h#Xr?B}NElx?+dlz^+LS z#J%Qdg0e;4ZJ4-3gPg39n8z64e~|=Qqmoie^m-XxWI_<4iK1(T`sm38ai{PLlT0{r@NVRSpCB^$+Fa)FI_b>S?(2r75w9h$ZYBK(tCUo7d6Hp$n~51 zX|%?UTzVSMj&lM?1{C&;Aui3>Q>W%>O?S|1cYdwKB}Y~M6QZ+xb*S#XQ-WU!s653E zaTq1>`;5bT!bQK@@LtAt*j+(4y~wl4HTYm7hdHD`a^2-ufn$>p0XB9c8QHt%ROs(X z*r@76s{ekUWb)t|(%b=Uu)NOv+jNgNDFug9MADilU{Pr=a-{P3!xE*IF5A2n2gA$d z2zcjyTTk;AGK{G%Qh(9hI7C_pod&16v*#LSzcL??n~*!#u@fr$OK!<%ZX#6P0LBia z4y36pz;0lj`uN%GKZ$4^0P-5l+C!=N>t^&@oBDMIC5!x;AL_f?%mP*h_{jAH`@4DS z(t^AAA9m(YpliWqUw&^Vp^m1sJq`I>dQ9m%P7!^Uuk16Te-a;1l$hC18kLrm?IZ~| zM+E_j-;rmzdei_?iODr=DN$|-nHw|J87a4Q{r!&{c+t6bh#0m7$)5FXMx10b3))#B zz#Q7wv(WU9(QV_)&cld za#UNGy7VDIe$O#XT(}OM`fIs%L8?_nkZ{Lz#E;w8b6(F+Cn%EIOaip6C1aJ?gGc2g zDP*`}T*};>Qx1`{)oO?et8x|Rkh@jJWxRs@GN>tW??Ek2pRz9fvIvEh!88O}AdYT( zb=a36;ug4BZ!~fOH?{usrl&X=d>M+EuKle(*YCdCBfQ^D)Ix5!VnAZ%JYnZ~RKRmB}jQK7=}ok$W+~V^K0qX zZRw#^9o7$vrG#`&Nd6sK_S8Met0T4O3|X1~@|b(&QDfcaM>rubiF+0u?R3}KEmSO+ zEK`~El-ZQWkADQwwAyPQ5piYhyF_v{N{Lv%^X<{?=8kb~DUS)!GCW#+lw8ys2)TwBg+FXD(go~2Xq$$lew9gIDSP%8xvJ!lGnJD! zNdP745F&If%_Bkmfsmf|Yu6`xTe*;#e2HCM-O{4)r)H9|MT>P3L>)0+mayAMedD`_ z$If&QZ6;i0rO(L?j6)Q2ZXGF<9u%nH$;#1zW>=0+41{)FVyo}(b5wg0&~g|TTf%)R z=4X@cl=>i4|5c+*G^`gVc=~WYCD~3S`lUF>ivt$C?w6HM05m5J>Mn=%}cvt~Da%GGwSEKia?nnAVs0aMxI*Ooy;HJwsR zPSJ$(vq%|kv*=KF<#RcpN~fCPjX5tT*fb|RaYJT;fZ^At^tppR<)h)sInn9~ntIu> z#+Gn+^qK{4UAhbPBhnsn_Z@WzGVBJd&n@GSp^;#wx6$}K?w%6FY0|m+!Q$! z>@(P%8Za)#1Xi3)(Y`1O{Fft!>t4JVk<(e%CjyOXg&;w$b=oX(1i#l59+^tjyaD)C zr_~C@@YP;j=A3W5whlzr1z(wrPbO$0$C4{6fYp8^Z~Xsd5#$hqhDV^n#eF=07$P<* zc3YI-$x|+oWrm5zZ0Oear_dgNvIKp~R8}66IWTx@$K6PYXdVah!94@Ki@lXCQoT{4 zWPQisMK0ggajpDCg!KMOv)_Huhjlz`p|C@#D+6O3brqTm(YS6&LH45$Dlojy~mQQOSP7Zu>v5RtvZD-FsRN5OlXd zSDvo$mGpi0ipwoCaH#xhKX4N?*IaW*AcW+Ma5jTbej<#<#mPL50El^asBS;opb5Zf z?30-*R}nPxyWvPEs=RYG9r9(GIF%$p^w!{T6fyvhBtj?kos!F%tS9Y(BFfiR5pPKs zB{f&qnTy?)KRz{RNO)(^QD?=dd{5`?c+Us(xw@}guI?Y}aC`QAyK}=(Im>@ozRu5I zzqR(mk>3~5QtR^U=jk%>(Tk1`s;vo6f=iCPIFdYH=atH|1VwNWwkRj`cksVo*Zx>x z`)@0Nnhf(sD8>rqhu2zFC3agR^?*Qa5qe+%1*T7#QNRez+<>iT(si#u6OXij9$vil+UVeM=`gk zMGa&_8#YP@1q=}a=#{k8s({1w#LV;?*20OL#wgVYZgZgJn2AwOH8UpHCV`Px>w#+i zh8ZIuk;_f$Y%$Vz{8$D!>y(TM-9hO~>+ZG^48VsN#k4Z;Z^2E(^v(GlxtW{F7h4*G z^KXqFFYrC6+}zDqY$%cqzn=C|cvjx3S0T;z{9NYh>MSLf#;4PH!;9YeZYnEuR{1hC z#Jw~;F{t1F^VMDQ=93MVZh#N+LmjJ2S*O6FjD?$kGxs~RYMkA8`E1=jc?^sHs;5AX zn37%3Vpt1hbgSZNcaH@Z7evMp>Eo_eQ^Y^%lAdr0TQt1Ej@~yEnP{6?cC5rc(BXrd zE@krS);rN1Mq)3H7sPLb;x;^1&QR+qLN63wXQx6W`Gd=&-WHpKfid&t z(WKwdGzl1re-tFPiZ#l@$2hOoVRlE}HW|RfG9 zpWLGk=atr@obkPYR2!!`xt0bUtCkkwX?$Pt<|M~3P@WF;3m@D(^;C*&l;k|jKLx7d zUtOhQsNb7O{8!|cN5d7c`DebChMG!+TQ)}GAjGikMV@C7PV-7qNzNW~9V$!Mw|D{!+Eb zlLoPEO6s?iNVQiEZoxTJ2B$g!dp^59)Q z)|xaDY5eI32a*4%Tsm{=NI=FvihktGYMZs)^RPi>^+nN(_!qjv7bo{#F~&y%2Ku|# z`>h|5=9E^X45#xZDt?V1P8>T z@@xae$9T1?!GaSeAqIbmvQ7FybG?c&i%SvcfR9*a#XuAa(MK~^s*A9_8S|`J^GbNr zuPo$*WrDso%zycmqX;AhNks_-4chBQBOcV4t}N*d)R~~`XPf#_g>svGM;^KR8!EWSY39%gQuZcg1NWU4So0ago$lZCj$$mL z)Y|rhBI+W0uQ(#UJF?n3k(AZ=3MGQHYOSdYwl0(<3j4Vt&=9T_Mle8*ofpx<&8QbZ-fpn9m z$Xx9UBGuHssEzd{;JgG3c1eaM1!&cE3CN)iO)RTg=JM}ZzeM=PwOb_8*{YPm&4Al` zN@n&PhQx`=MVLFj{yW0liJwWo@=J=EJP+mPcn;pG^OTp3*x=5Au=A&~IdN5SZ!>ni zh>wDdV<_iv^+}oHvS(q)vKx(l>eOVYfc{Lr_gDW+yubA_^8c%go=2i_r~s(Y=VyPC zqza_mqq@oL?0jEg+WIMXuRw_K%~hp`wk_q4H_(BM(tKI)zV**QYu0LEcpLP9smvJU zM4ZU|{J#Ex`cPyj7vl9QXv~$DK?7E&uyg5)$do*KT=xA<#m0n`txB)5x`4%4U0JBbSj)lhd`x=VD=*s>5H7wdo=~B{iMYm6}jK;jp+=e@) zMhH_`PP%+oE9*C*w`;7v-)&o zF5)acQHLmCDF2_2b4q=e7VYYp_H5xVIaFpeWCrDuq|!{6hHC7eAQ~lixXGgp z2HJvgYJOu!$QImS2?wikS}k}*d~hAv*JpR8L8RM*wFlK_u+7Yb-T`R?X)2H*^m{VY zhJM&(Sd&TYzPaZxyyd1<7TBwG3mU{{*7BG6ems{r;~)|WxAOix<8WcOEe}KwKp9x^ zIq*&+H1H9_bn5s5mCqxELew(Z-Yn;kW|@s*@j}^c)=8}S!s3{T&wESW!YWc@e^dcw z0TtbRlDNJN;#oXu)1J>|rVOuIygEQ!UY03#VRJ;*Juk4FS7h>=oM}^T9UEr4MXwxK za)~eu30%N>ksWH%VyPhptR!p{DI!V#Vq~)NlJ`_>`=@#4kin8uAhluV6iI%zYQCfR zx4HGVM%5&=^tE9&HWpuz1C*beR>4{l$in?(t1-w4%(m*D)VFO5<-DYU$>pQdPZ6;g z`I2#S8v$mc+Iwi&4{ElwsUy-?6JV$|)rq}P2GbzR(&IqJoY2|ROufC3gJ1oI4I4QN zbwp)}D&0Ph(|RGp1<@~+TUG1>&1abyWTve*C6m*hYDYrm*YK+9ApLSgecJFidZ^#m zYAg8TKsw5UasJEH2XrDZCi=I4QBd%6$2`lzWb4qg!SBQtKzbmnLY83a?wd{D@9->XYwh{DE1^xWUYU7XMJ%~?Nyt|GZ0#sM!Yya~ry%%Wc4tVii9 zS8XCmATq86DlO6jyVky8lKR|u=K~}+c0U;O+f!f^g?;4j+h}e$&0KC3_nY(bd+$6b zl>0Qg9h)3d<7lAiO0d_!uf&;~LQTTc_6ZE9C~!zjWtECOnA;+$IK!+aHR=V_WRk#; zB^AQG#5MG`0u22i!JNJSx$@X38BicL7ew1fXU3|vng2s5`h}^gLXK%iZF3^)Sdt1w z6q8N49b$txrO5Loe3_t#(a2J-@inTm#tkom|If^(h#2&|mr0*WOQ0C0lHc5CXTR@+ z^AA@Y!|}sV3%>>{4;$Pw03u3YVKJyW!cRd`tp2ufMTcYpQI8F>?bkYIF1=Q9-6e9OWf z2+D=_dCgwV5efH3Hv0>zmOY9g4{7NuXS@4a--o4}>oygMkrC zuY?sE#JvXyy^JTuum^M3`^wGT;XhXi<;Vj<}B$KwIe8)dcoB#Z7xn9K9tXxY-$PEk1qI3PaOlp9!J1iq$7 z<#Dm0Q6v#b(;R(FwARcYu{wz4Z;|?GHTQD)G2oF&HY;qcLNG$G{q(n190|^F5@|wO zQIq|!sFAZsul>Z}`)?*KuMBL8zdB}SIybPT1ZB$d#+(0=qtfIy`gQ7e)k2?DlaOyV z=I#jJ*%lH|f~_TU>u-cAM&UcHC*)sL$d!ySZ$>C;Nx~<96G3Sb#Ab;#iB7bgT%4UW zP9eS^14h(nU_QcDdyp9U&9hwANWj_QgXJ?gg#uXVG)GpkwmdX|nDu~GQ~tm`ORUmb z6p!jSm`pwPtdpfl6>K`K8+9Vyaql-t$)OGw&{f;ZLYp{rHwrdLs?04+rywswnvQo6j293xJz z_SXd4o#uq{{F`a<^tvpZx~WRuC`lS96!)CO#i~edcS|ww<{eLm-c{m!B z)WOF`@Dc9DsVC)%5Ve`h(e2he-rUIeZ046l z{VWWAjoXQfKfKzq2;6EMmGnA|Ur^IsA>hJKqH?fjfPu4Eew zS{2Zq#(I=g(MVRgaVcsGr5J0=oZ>BD4E-{x8{q+{Sc}m0bQdx}8+JP!gvUMZyWR4K zkPKxM$k}LuHJ@m^Yy9_R8+!q+(s+fHzf54QRgrA$oC1IiHQgZdDbF?3Q%F*?UB*jX|!N+VYVh%7x|A z>yeuoABsoN*n(hkS7Je6p(&92XecT13CV7)Y*}K+kb8Z)+|<5x`G;(Ju4&LC*vA4@ zSdZxId#~LV91Wd->sR0AJb2CMau`alDqndTt%N*PsJi{_MB9=+;>x(6IKwg}oxQ|T z;p{*HX`Fzue4TqEY#gnmuZG^^c0pXwJ(CD+8!2$%^g( zV#(Q4$W0=TXd0~xoq4z@5S}pG1wqdoy~i#7BwO}caZmK$$BP=+fzVuLH3f`GMHON~ z-K0v=(<~wW@|y#AWpbk|68ON=Uq>1WGl_NAvro@*OE)7?yNnkDTLl@j7}?F#>f|@= zeOb+g06jAMX@6x5&G0+}yk@SDFfq&MWfI@V@lrBJhOKt`nCJz?Q)`wdW8vB&iLO_q zR$|fAd*>WB%kRH|uB0{aODPOTzPaj|zA%K>7hi5ViaX`XK1Sb*M_Hqb^bBo;XLIe$ zBmlIb*IW6Abz#4m%vGTHu%}UXxeJ=Sa zb(z1L7FIA$j*0Uqv-L-dxVV4Y98+1z7Qw6lF`onvKRmTtMP1kWXS+pYw$DxcGH27E zT-sH;g?uh@ci%CC<#1jQ0RPHh4H)Ql@oZ&si2A8`8nF|{07N_{KOyS99%Vu`*R4og zBXeQGxo|Sm6r#1~=X65(t};)&vv9pT{kmCBda~@23ugUolOGD>)BTBK5{YGqsmBP= z*CryJzjfZX%0zJDSn^_tyv(^W#n zGgy(oxYDH*q{I}OK1AYV5!MG&Z&mXaSBa+`Zvj?M$?DDhW##|zbp8;AhV!%pI~YAP z%P`HK2gYfAJ{8}Ra%hj|wW6+9@x0%1`m?3aVy%o@>FbL6#aLa)B6?%OWu$hBZ7;q#DL9UCX@IqnM#-XB-K zYOY^LhM3RL9AAc4jIuLaaM(Jjop+MX1oKx}jnY|(Z3>G*H=C?((YUD!W$j+Pp5_0` zZ~sGL1T%tgQp*zD=22$%hzkI3gsUZf@r1W4&b5?5p2P;f67Xfjl2vp^z8){R&cxnW z?kJ*F->R)qx|zr{kMZt#rz(3EYz@v;E)BALBWG>E+INTE`x=BZ#&M$^X@Ps%jV#`7 zHCnWk?D-XZ>#dUeQbA!f#kMYR)AbaJgXkUg>U~2_*b?NzJ+Np+-KbEIrj;7T{lZ%4 zDx%Ey8z(J(!`V`H)+;@GIiKcm7GD)NX~#)>;yz3b5~t$WMXsnEH4W;HjA*m(OfmZB z(7*&md5-ef-SPgntX$P3pksO7ZK+2|j3=i1vA(lJ-tkpGY1{dgIM7C^^k1$SMqf+; zQEx1~uHt{S?JG&lor|dQv)g(3>B}A8I^rU>C;!?uh`73iJWh?XxM|TDugcRj<&1ZC=mew zLFKXie4Dd!lZ-FNa@mSBh+ffdf0*AIokDV$37b-18+Kv(f1@}`#}|#ZOy?YdKl_PI zkIG_<)+}ints-l)-Bqn|;a`RR0XX+j)LsVkl*iAMK!+oSmy`uYKTDU;T_+x9AYuAgxL#Yhy(X-6aM0@i?c#4$6 zZ*~kjmlxiPcV4Ji`g|lsG~#mn;B&QS&?4(QoSL;s+VI~}dL>_@pzT%UPUQLyvyeaD zJWhXLJx>N5Uy;nF+m!W@(Mn?u^3Hj#wxd%#_hYt*-{UrBHUDwI*V#;F7+ptd`vkp(L0cv6x7i zuNI=lVxue+{du`?&kz=7HO5UI8sT@V^>XZ5&UL(#-p(@&*CA!Hb!BhqaskxbLs1vF zQP=#~CL$p{^`$Zeq6mx7r%JRu*HG*HOnbT~)crH8tG>Jk_8bs}*Yk@d+BN*_NfFcN z{z%igQ3s#MAr_PtieWgSAUzA2TK7kii3Jih5gzi{(6_$>43XxCWzM!w>LTmEh+snT4nmBm#4+`JCU(oQyMcxiO;!?WM>W~Y|Gk~qH<-7^boT-#49S|j z`DM^Me6j4!7L-SVeavj$cAm5bIfC}+7|^-HX{O^porg z4oSI=1oJC6c#bP-(rR-n3!gu8itM8ll6S{veOf@G*$T;IKZ=HxHMKa!DYrcMT^m-D zE$pIv3({C)v38SfT{}L!T}^_}?hWK7n0B&DoqnX(eX|$mPAg@rI@}F>ewjI{(<}sJ zK}45^zDhcnYQc8F@$6yEMx#N8kpY4Hom}Z+H)bE)%l|*18Fl3oQIE_LHWnPqp(`-a zHJ*f1D$IbB2{ArkM6SC(`#3Zwf;~1zw<4eFhSM{+Icx0(af}5}3O;rDZcgF5_v$+= zt!(@v_7C5g*_A=Ic_(Z(#&*=bgBUvGj+*Y^#IA@h%rPjJEMY{YIv&FA%9Qu%mT{&c zZ}6edY&;Yl*%u?2pHPl2@BD{<{a0x55l}?@Q5gcIfW75gJ5P(5D=Ie!se#ytb>XOu zCYbmxz4OZ7?kX6zC-N60L8Ys7lmI$(>Bo3Fe;k_mM3!Pd0>X)Zk|U%~4~E&1+UoHf z*(Qd$;dKL_SbCWhkVo9{=8*AF9;Z-zlZ?10Oyhd7;y>xj46eA980Y>G@ihD;{$paA zKJuceXnq3+14Zf!`7f#Ok9NCB24z-7%Fi(ub23yNV8R7Bjic8udb^br{_+&m3#hSeQts_ljHT|m~$c`a#n3MV|NeGav zG}|sQ(f6{x**bp40*5xXH*(|0py>NHd7RRZ5HnB&C>*d9TraQOG8$IGj zt%bt1twCZUHpi~?OkeoB%ub#_7b(lJwoVBYucoMextZSdCaer-rA zfyrJhi5aY5xUiJr+@T5&?)&kBsPOgmX;?dXB7ruV0%OwY0#$%rY^qI5DZ0dgqV9OU3SP{#ZoD4&;WtEp;b~0Q!ob^wb2ODt z?V|x3N}r)tb4NKU?Aq)Y{h@$CK7=2!tTYgx0LDoaekw*un@v83 z5o-LLlrX-wWzAhU{{X|k>~5+q@Hj{po}WBcOm{bsWkrJu$e7Ki)gg#?Ljs`j!;Q@Y zil|(~DEE5se@1S93=+}I56mx}Ltm5g`?qtl=r)Y=6*B6R3sxAy2)`yc3n?>~n_GwC zfy<1X$lDNc0(11!p%qVH+&?@9+_gkHh2ycKh>hs17wEHq&hx-}>itt*2DJ!-tId z3x3w3Hj@yVo41>ABh)f}clHfmh*vZ03ARfD$T}t7Z15E+*N3-+^aUsj(a-Px=ZuUJ z)rx{8rRB+`WVM&x4--dGC?>zsjSFH(R94Nj53nXx`BsnOZ1`(;g8)4Y9QvR2YNEQX zYxn+Hp67vaeP+M!a(6NN2K?rK8`jLB{6sE?=~FTO@5EGmO@h2`(oyrbjsYH@UBAjQ z!r=xE6F>aTzCWiF32*v{1(|F11Uj<#OJDeXZIpXYJebi8jD>OV#-)AOXxXj zF?+rBi}napXCOSP<3ZF@9QlvVzDB22rX42gQpuy`5&qwE1b+=jgJ+gMZ>&`Nsu)ME z9YY*NIguGYiDuH`I>39D;VvfIMUYrA!EVcgnE1lS<$n1YJ#?nI<8s1*+m)TfwtAC5 zN>-mcwqxgj8vaW$_#qGHuCdf_a-%_7B6*(_7O9{$k=18?hwccem+>+yBg~SRS@no~hVYFa9bKeyqt&t=SE%`i0~dRAbEv zxwj;7ML-hR+4nnzLne-0HGR>d&GPJ+RUr+GOH;scg>hmNcOrE+II?NIB z4i8%y6qkjqZ~x!BKh1zvvmKS%&X?i_ebu5KA6$H(WXsSi1Sv}!{zt0F9JVv*2eCQm zr2vei5@gC^uU$zE8GB!!COz8su*vadgy}NUc1awFAAt9rIXf|-pMC~*I?o)#ky*Zd zXx^Po13%^Y9tq6SBmiA#V{5(QFYfUy;@DvVIg9gb(IpDU0K#A)a3L0325Qu@WFBwMk!uC7bx1~Ogx&SJ zh2$MlxK1UihyMwjwq!iJTc`Zoh+6T%X|ynYf$PekJtVP!#`sLdyd;_y=dhx===#&%*hQMWmcqG5^*KTnOMBYssDx3-QqdUew!&!-PcsP-j>hhoTg^P zW?Z8j{-Z14<^PeZ{?TTA=n|3++Kly{O5{4E zPFq?nTNuI9T1>p6-EJ1cj;jvCC#%ESDz-d4A3Aln{NUG303;K@1nRtcj$^MW7k1C=_5 zN%KqdPcL~b(4Zif4l(Z{#w0(Xbf~iQF7SgJI;gO3%*gQ{0}T4qwLi-Nyqb2O{r64$ z-GucN9i~0>EOy=j7A(K|7!bnSt6P4wP(MdfmB5u{Wmq>w78uYZ=3m0j`GIjRxE>EY zoTaw2^EjH%sAIJ(i|fbYc;%tzxC>{ZFfMywT~)VqRN_IDQ#6p|F^SY^buTRn<|8=M zQr6R7K#R|{W;$I7FG@^eO1cQ*tS+dp8%MGIuQ~qjH~I5vh9tWZdNQUcU6Ys{1tb=h ze6$A(+bjW}AJ!_Gk9;DTEUHvF>aD(Igk8h8@N(WMZS#bl{-5UFGOViaYa0bb1(B3c zK{^c(q`Nz$QA$ARO?QKIOQ%SeG;Cs1k^<7rCPW%GUD9Xrd;TFl=i52gd%b?rC9E~) zTw{)Lk9&+YZ_K#+LS!!3Q@kIpJY83iX(gN{_tb)6eY2Y*T%PQJ3XMazEb;u)T^ZX+0+|;STZ78Ztqsx}k6Sh$H|5mlIlge3k0EQD9L+o` zsL;$@f7;c$Ot<2&u+cJYT}AU^rKE$%wA%{@O>{;p`KgrChp{-a)y(|9k_l@Z{r2yX z*ndcAb9$3cxW#w$+4pC~=t}&ORU~s)7vUJJtXD}d)I)0*D0kmPo#eJg=My^^{#}eS zsc`21j;4TW`}If<;U?arHg!(_b%|(RlXRI()VQd6r{JBupmKs4MMctW+h2;>#6~Z) zmcy$DEglIJ%rYAam5N7<<8LfED5wyLYO8z14=?11N)WKOCvr3ykhdkx@hyb@N0VHz zkcON&$Lmp*HYS^)S)!4(Z?fdu8S>2hamIAIPn9m}vOzezibA|GhA$36NSNc8>J2?j zJ?98^nHY@&TFxb+I}#aI1L983@(T^ZOW`!9qy=xx9`IE4?4n9LNpU`nX&SP%;7zLc z!n2{U^W@wLK1zj{rA_Ute?C&ZM_J8}OGNP5qqANBj5c&*-Kj9yLvTgmDbVOuOOg91 zL&&IQ>^Nf|FZY>ul2$fh;HSgmas%4vvclrJZna`9leDYzl}&J*x4Gmi#I64<_|rJA zj!YBASN489qXp;AN84lh#??*cr?v(B{-pi^d( z>--9TqdVS#BY#y~ld7#l&q2q2d@?Mvr6KQxwYIYI#p^9rx zy#!i~CIT003Xq<-7h_O}=Ehi2a*?hs&+a>v-uUUZC>j|AI3S*(mY?9imS;>b{S!rsjIF?{PW$MXNar4B*pY39b%(Da|GI(wc;9Vp zViDrO4?^+cbmcMBP(yc|qD10lfpz=nc!<*#KUXZ92Sj!AH8kaXD(r24##rm_Xc0@O zw=$7bm~)T9-T;Bj%IGU=XkBPQa(>4Sg9(W)*XMfu&e(vjT-3fAD2{7DZoNV=Q+!g2 zF7-WKij~hrr{{S9-}z1}dZm`mgX?z$*k|X_(_0i+#o=7eR zMLYdI-9es2A}^q_CM3Y!qZVu6+ryG_@+}UVl0ats#cPUZb4s6eMW@5OsralTLniIf zzQhcM!em{mqn?WOj)UJ9+7lRkU>BwZ7N=a&lPBXx8`)+*rX?tu0%e0<(b$h`RaNAb z?{A)4k3X!#nA3T`Qd^u509(Tp#_Pm^FfT4KwZEQu-Re8FelJmg!ddkgixBfHV{BL9$FcS=zBq=e<_iBi$91QmraZ3*xZ3gB}m1zXa5w!_>gXv2_0=1bi}Zm zL031QAc2P|v_KLc7~q);3Hlx4kc-T6R_$kd@L_Z$O7Z5yGrL8)BPo|_5AFgOuw&w*X zFZ_yGe1U(lRb^^=ws^HU*UMf#Y3zP_0CiTqwb{6|fmqaYol1GZbFXE$aGrcD zLz~_b^o6m-C)djj#&~{pn_0tZp5s$dA2*v( zZUGB9irp#&PBQxk_Zj3vyP|1u2}D&j{M}oWJ`pp`5i}^SWKbj1m#C{)jmF_$V&W3h zJYT|u*^Ikcn~LbLWk+y;4>dTdbfjP6jz;GxH{&_e(D26>&wWMsFkrhGKDZDHWW!xu z)!PE647*+&jzfmBq~74>*!b05A&dmfj%Qs{+s@KuA6oEHS21^> z7cywW6E}lZ$auBW!{hMWbFtRQdZ+9eUF)ph5no)+U_CS?>epV~5Eaw!leZAjM>r6X zP@^%$N{rK>e8j>3xb0rq9lkv~h_yC5sq?nFt8cQfdZyesR@Zb+RTY9zAcmT;byDTp zYSij~a7ZgF4$F~6E6d4h*w84c-wW2|+5hnhBfR(>8J@mn(*?B4HplAVWbI&Bp?Yfu z2JZ+X-sG7(2?s|<4>Q6L<>#hhQ(NODX+X_*1_;m(1E?oB)bA8D^L%sTz|Xe#qlXvW3~M7)}&C5BY1 zy4}-soM^kdQ2R72L4bQ@yEbCz;|3x8wsrwxqD3Zf@>Sw;Qh zjyI4@?9Oe!z9?WN^;yvA798fdy-1uZX4wQ5$KMMxw@YdOdX};}&E5u?X*zc}=`nn&IMaUOGN`avd17ufbz*Db z>z8}#E@RC)W7Z2Unlss&-}ibfj>-;I&MZ7r(?k=Lb}CI^!%)bP%Q52{in#3;FvBJx zhmELFu7$q6jo;$LCo6Qsuf}SXjd=3wl*jyuC*P;SsM^-sqUGm*wO6gX4SW$QyZeyW zsq}DrZtm``)4`ww#yUNHKu2_!CKT+rGx8_>e8*bQ&uxiAXWLDd-SLiQzjx**_mtG) zm~<`N&(z?hdacs=@~JFn&#jMjb-y&E(Aj#y;PW<(#c^4Org5y@0$1?G%>KE>jB4Ijb& z&*+-FG92bTEs<#ZdTTH{o0hNc?7GkP)cjccf-3v(BQ|m|HR>8+7>T-YKN~;dXF1H4 z!RL9qW^b2Whl0`B{eF$}S`IF(I=xaeFL&g3`1CSakcRCW1&T2E?lqD^1krFjTXB7U z_SHw<^P22sx)7P_X`7G_ERUF=eVhxSHBUWK5<8L@s4qhwz0%)^ME?%}B_Ak9i+Rvh zMxkDOl=+Qsq+YISD>jHOG7=bqhXW%-6M$nP+Q3?k(`*&VMNYMvNck1ux^_M<7}vD=c0N!|Kdj1rvz<`?f#RX$ zw1?Ev5}s^<>QsJovwG91>KV{tS#0ekb6HL25;nO!KRMd8^n~A?=luQiV+hoCLN9>e zg)2B^x*msTr}@@})(5d?O8>%GUe{sS`wmXvz05lZol+To@aYA6k3@Z$>7vNZP$hwZx`EDmXi0-rTVlcpuP@ zQR2r)`foak<0ZI+6Bt~kB&K4}uhaR-QNcD&+vCfA=nE~wcG@xvMA_c4MBvf`lMIMo zvcT$>j|d=z)a#$O{i`Lk8zaX!m`WntJmb~;OPqg3`#+Xp+<;oYYKu$-F@vyn);JFh zeffwZQsLjML`|J>{F zLFB`Y(b_P+`3&yvR}M>a%i~STRA%IDkI`QS%_NpQLl3S*{;xeEd3WHxR*?|%J--aR z3^U8~ZNSy15zM&vAKZF9%7Hy}Uu#p0>B#iTVLE2gVSc<#ksA-i$+JSiGXPFo;o>>2 zem>PQ(x0>Q611fp&|4}WQ6Qp-+F1!S4 z*>@NIn2lmhzLqU^w#ls?w$9X5ewcJ(<(Cen^QVv4vhJ`?~-yADK=`TG=Q*ye+^RC%z*^`;T>Gp)x zDaUPM`FLg!+jOhKffs)W{$)AhKg|S5ckl0~PxnXSpX+En zYfC{RMw(NxP;x?Q_E7$2b`3Bxi}YI+A9s})w06DfwZtXvql-eYv!^`ouNm{f?2M&} zl_(9*k{rUi@n(>`@zBZJq*2A#{+t=utWvmwfiz-K4*yO+{$*@2k|2Mkq%XK>GdPmL zNW#w|?|7A9UN~mbMX*gZ7vl8hIPu&&%Yw^Nx_xPgD|9zz-vE=sXBvQm#gQDJB3FN4W1N&s7GH zd2n9eu*`gtCC&NrWCcDbL8kpWT_&FKw)20}nSVwLiwBE@e7PQ;XLmGdkLNs_j>AnHES$TeOoG zq1esMMT1Ljs*mEhFd%|`SR|<;7Tq>VY+0|jPMWY?6y?PY5*kEC2xIw5n&<2yGi(zI zM`T6I?+FQHY>rfs`XKtyweS}%0eIi%me@wO*=QLi&YuLlaegHuq5*>#}f zJ6+9$8gydKJ=qO@R{Zkz9w+z;vPc`|Fi8cOs_g0m0AQO2wpF)LkK=l}mb(dL7(QCA z?Da5J135Uujq$2-w-Xy(x3%o}DPQylh`h}3@>|%v133z57QuOvKL1UwfYm;Y5R8@Y za<*Vu7nd;QL>80f1)?J6t^Sx}wYEml6H2EY9F2NJx^88vs?H<)+snz8&~4AFGj@|9 zYNlDY@AN}lB2fjrp z60@1AAR4U`D-3)0CPnUGLmf8Zny$wbG%|w1+9>W3d`CMiJdCdJO{asFGsJ*>}h>Ps-QeprVFycwv2 zT(s)u%BPxtkEEbg7%fmOG#|_mgMR2CRQ8bk2Zr+j7%rmb;M{^v=R`qk8P_(w?Qsk< zf1|;5aWGuudc2iW+vBktqC4Sp8v{XhIZsxPq>>7xX_7^!4I=mq)pgygX6%OL^v0v& zA*pv2HUpT3q8zjIyHKe+4k)rEkj~^Hir#kk4=)vtfz;-aLF?LHyZ`V?2xe|SXD`)H zTp6z|1&_T4u*=hp&EE?V$1QZeq2na_b0>X{Za7%IhcwuMLyKL}7g_y}$47JIEVf@? zh%l4}bi^<`znl*s)3Neb;5&>Va=m@~Qz*eRwFI94DtD@^RmP)vV41JAZD)T+`q@Ex&c6Q zDEQaO9pVnEIQ|v!Cqg<@&P6VV>q{#*%GGU=i`g!~xDds#t-YDs*gS}rx>;NuF?TQ@ zFUFsE-ho+o2m9o0WjwZxOT&NM)R^TSfc3-_5I6f7mTiH!hnrE#jqYa-bE#3q+FTlV zlW0C#GQVkkDvhajUIgbT0=J*J6I=C)U5l_d>X|_+O{S@xn0d`0ngMaMzd;!*Oty}e zeJ`#7tQ`MAy{MXsub&}XZG~@w@W@??oZ^sR3woxv;7j~bwnnp=I(vTM4AFNW=(SQ* z^c-%C8;!s5zPSMc1TC5XhhAc6ODlxbb-SeO-HAzG3K#z~Zt>@V&%~t7Wuuyvk6VGt zV$PRGsS$OY>tR8@Wau|`aUKcqJmRw$x4+peTKx|C>DYJDP#RmmOM)f>Ec^Cv1o zLLH(K9B&gZkL{Eq%`fr1*l_5giT^E?2&~+6y_`NGzT+(rTsOUdR z-yrxYvL9(upHQz+Vbp)r!ti*DA7(Hc%xiWp(G!5YQzY_xC@c;L4FOWQXqg@i$H@=z zOPlSqH-ca(48%aZnp;VtZ;*NLpX%tZ_j1e~6qzE%1TGf@rnpv8l&$d5vp!;Re+Mct zQ|5s#Vw$`FP0Bss1`2rP@6u}S_bNiibzCPgIi`O$Snf4TkvyIHrK+yIbjzV_Q-bv7 z)6TAgi$2r5E%44~wN3n|t5h2^u<8gG4fXxuTm=sIv0`nr^^QFHfRlN|#+2@d0?&>| zM!zGH6X0WC-z>n>dGy^E>Hl22<}E5O9GQd$O1iCze*O+M+;#{c!>Xl31v0x3^!%wYfptk~P1a!XNvhD~tTIjt8? z;r|aee;J{HQJ*iRrKGAFYe^g3MLkFY5k*7IdFy{D$y|tz=Vj^hl1lD&@q`tsv~;1j zy>xH&l;JIPb58dMXkUi~ls349e+FZ6Y@cnVLCs7}Sw=@kd(Wgf&qUFFB(e2WzjFJ% zBRZ9d`)6=vl|WhOj8NFDOdZa=7olP5lD`pa9r|)WDFU86khc^FRtOoDgqe!oQx#3anr#O9uYeP4t0*>wk&7{nr!a z!1(^Z4_gqPz+3)U>K%xAP^Vq}t2MB7CfhM%Tvd{UdD# ze>K4apj^=LaD9~Ba+Wvx;Zq4;$Ygo@wY%MJIQ|ToDE)SNxM5__8GhKKq+kXj(mxdw zpi7qzdyRd=q5m6}fjB=Jk}iVHH{#ktU9gz1R4p2mRg=Z&=Q!(}TLd((xE;3;8@;JK zwG)5>yMQ58hZH%)=TFFDx2}s zr85RS&ZVKS18CmWI=iBzprG9SXtI-5E@`o+m4G3y6O7hmIgL!*?{{QC zFE>Zv=S~za-ZWKM&^5^3Nq7ATg!LLY zOs8wCXQ{Hcg4bR0_7)-p_+7KAXI*++i~TW);|TPgjyx;@^_UZj@V(G5G68;=LRlj60TFf38UIDuvFmIL)-TVyE4ndSs9rV`vh27%(gkD`4K%AQvQIjO+j&tRW{C=DzdoF8llMIG(J%uX0WeIzkyZ>wG7! zvEQvaUZZPmJo8i>OvO&+V{zwMVUF*h1d$+`9)9vGgz41wsd3il`#~*0Pt;jaLqRo| zSn^$V#VvQqHQa$793>Y3I}axO6cP3x3H4PZzm6*Zx|E**V__XPA=_J)g^Vc>-wU#@OE36V8{_zE9Jp9erNRk%Xfg5QxE^(BO&ESkO zkNI54Jj$q$@R}tT*KGICWR248^p7@5nxkY-QJ|iLm~+1QY+ZDlIQ*H+Q*4Sc$tXO& zJqxM8+t&CHCE+ztS~~v5BkX}=$>(x__J*tHGoNcx-5m$S#30yEamqQEw~zCfOmyv~ zVsIIpBx|v=@WR1*9%<8T@DMI7;cEQq*n8pNc2@I_@zx!z=axpM@uGrETrrD!v@{Ax z#PXj_JVGX{_&>d`E}MVv+)?F^*{LYSh)paX{-G?+2b1WT0woz3R50E_O`vg`AJ_l@Qp%IZH1DyTz^3D1qFf65(ElV$MC811h>OM)WzYQt(t%krsF#&=&l z-t~J4X@K$E>RDX6C#AQksylOsIGX=_N5mBnO>*A^_M$1dOur_eercJE{woLrXD#zn zC+{rUH@3qkzu!V~-KPdHKf3HMhi*dvK%G3@%My!TCUV`XU2#)sH};sWqxBlICSYcE z(Hd2ycZ~(^tO>EJXWdD8(`iVCBWXW)@Q%SUUU<+GFt_~D>8Mdr^?Jbtc|RWy;_STz zG)YZY=HdB%ziU|*vtF=Ajeh$*rW9%P;=$3|Ll3!ZvdBIj0pOOzYrEc^_m13DVxjF9 zq9(q z>3ioC%rp^i+zmR) zi3}n=u@3mOBAb@VVb$A!O^Z(|$Xd`_hxh?s;< z%C*#g0u{k85n(wC=%-W zHRMgk&i?B%-Zjo3VZi)rgn7w=@VXENND(SaVyHXV$;*ZpPB+NMIcz@XejGMq1h9d_elG{y ztfaw^%>CdKCK>;9M+BMWwJ7^$Stn`PS!LS~5DF)+&ik(>Kv~~3Udu*mP{uqE67Q-I zOj`el&>V0e77d4Lt|v?Jn;)_HOV`&;L2bV>75J60T!plWXHVnR&8qb#0llzu*Y$YD z@w$hD-EQ?Y&(Vaz=4xg{_)-XFr#?dx!&%)@oTfQQO0OXuF{jm+YYOa&-yTT7I|!I_ z63Ptzh}UuHx~{D!3wSnqEE9)0M{{D5!3- zpB>~1Z1%JtgAfV@<)^twPEG`G$#Kpmd-gXW$mak0rPu+j#9n@}3=}Pb%(3!uvKiKSB;hm}i^xmT_(#Tvj(v!-2@4w`#Q ze_mIQO5B#7>3FSqt74bB3=BoMf*J88gXDpFrMr7+bPDoYLg4h%by~hB+pd1K|4hC3 zK$fNh)C1C%c1G{8Yk6fE`|FUp0Rrd%MHV~jR8Hx$($(d~Ch)gVP#s%DL}0G`%ySfB zcLQ0p3-Q)yVf@v_+iHu{x$U&8leA4hjT#COPgo&Gm9xaX&@@bDhwSxhmFw~?hz|pB zec>J{7)?bfXXJa&!F!7iiv_IH z7hTWa^8p6ToR(@G=wm+sjhtdV8({uq)NxkAQ{a8i;KBCQ<#uCl2xA@nLZg%WAOOg( zbbq%1c{kVQ5H(B14S9qS|Jh{!=|;>&mrj+6j&?o?+~PZ5{>DUa{*Q)tU2d2AX~(i9 zji>9O`<$o4j!ecZTi?1@6p~mOGF>RxeAUgM^PQxwKWuq_1yZp9-RHxHho0vE(ZAoE zbnZr(Mc@(lW#rk4HHlnz{1{!cmWJ1HTvu1d-Dv%c^6D6!xcGsIkY4G0(;2L(hZo3m zUph*%pMw?>!;bJL$&Z-z!AV(fjFE`3sJnI7Ic5)eyK9I(JaQ)s*j^rpLg@4xZ)M9z z^Bx=C{=%zIjn;-6`Kgd>cT`&arn?DaOKvYpMV3t3paB+QSdw+M&X_RcHGCa{UeTR zJA%mNE{T40Zfcf_q^jq-9~W_L#w~7RZLYnk#4ye zpE6g!#aEnYq~TYV2Q+Wq+-0aJ_kH-$v)m>omJ{V0%|<9oaY8*WijDKLCCBmah07dE zmtzyS_7Vd%S;kS-r+`h~n<2PHvc`&q8jd=s0D+t#uGuEHb=WG=XU|!6YtmGo0t$tT z9TBB?UV8&@R9O+oxhUXF0rwyM>fABmZQEPwvF(1_-Eo~D%0Hg~<%1s$VTTt#H(*d3 zb4^IBtHL^uhwamGr);bz+!ML{hxsVb2G`@h%O+U-{mGvHahzeq1O}6q9z%|~Q61i* z57=%HNlkC?ph}w_#NRCN~ zXU;_V}JS}!DTvL8-a-Pt^ zDBc+u+f!Ht-?>&N)_0O~i>3C>L?;d(%5&A$->Bd33n|LlWz)~Mt~!Napabid*8J?Y z*CqChyCjT>2T|H;v;MA?i&J`bpvJW!K@o{H>|$(NT}#Ovl`o>5FS>nHffAb{4z->Q*cuJr_y zud-~M^;*5Pwv^_oultAkQ$RUfo`OB)V)wcwQZQ z!aXm>Jx@NfrAtT{V_r{Gcb!M*v)BcxA5lg7QCt{RTks%-XEjLwWBmmJpK&-%OGqVy zvaY9X-oU{P#L61)vi0?bL?l)HnrR8>z7(VCBMW6t|w8tpE9t+CDk5?~CuDZg@& zg9zjHFQy}1@j!rjtIlC)PZW*!W*g4vVy7QpuRs>&SMuXu|yWX zg0Xtl08$rY=5Nq8<;3NTvkyHQ-h#H|UJ#Bg#By z@xwATj%TmQ0#Bz=*88B{?t{<=^BN>6sYm77LA1`_VptpB=pkZt5g*i~FqIi(1epv+ z-Q{PAY3x&@cSaM2{>R_w#?MICOAjnpeD&PU$rmaO&zYY_MBvrYv!7u|ouIHTuL2$I zZd^U?tW#$*!QxpcFDP4HK1UheY1c%I?5)($tCdtO2UwsUT8Fy)5XMP5J@USwQ&TO5 zY&4E^a=0k)b1C&`4qr8(kbQr(MVXyc&o=0bDPh8DuDy?3M<2xR6;RjLwULlkI`2lA z4e8;@Jh`oHrz@~q+UDirlm-Dv~*2 zMXq?IGzYu4P6dzQ_^R|k_^Z;ktY3<{)o^#DR2bDuy%y9tuf~Tl__Y#Uewl|o=K^|4 z611BAbkVREirF06!T$Jlb)sf913ezEy36>Ftj|Q@@A={d8zCHvpBJGM<n_A;O6mW zXV(&FQZik=dh6{0y1rPpo!x$V-<6*19slNUt5|!&BKKL`xwU2A`!j9~kq%xrtk5Dv z&()E4gR;$Ln^@Bsln|W-*C$2<$}e}>u&5dR0?uCNaA-@HAKfob z;a#WU&wn`oP1SW=Rd-2`Y$C+}ge)Jp+_RB(&YiH?2byT5uutz zP|qAJ5$EzhU4Kew&O725&6SjFJRWfvf)PNrUIqQY1%1KpZJmFdMYHDyNT$uZ0>8a$ zgC=|BsA-z_yBspE{|F^)zVaH zUN@OP8?@&5Mk{lXhJC16hzXPG;xpj8wV1mDBUmcr2E0lBFSyItV{W@Vlc#s&s=jOm zxhyL!w!MTZ&a%kGKR~hbGtv(8G%xVQF?%}Hd%*ea;iG5DSC}ILrL=l6$!8o*CvtsQ zE)}`XmWxN>{wi1xxC(gWRB+A@OFXN%EKu|@SNqctT)BGeyxz^bpytnpo*%iWTE9^Y zJCC=p>m!sC=eGVOy8W#l(0C&DP4#Oz$>(3}#8Az?yJ>QTRYit%OgMnG@cYuO<4?-S zRJTfbmFWqGR1>5=r9gQ76IBo4cGt!IxyvJt>kfo_!uc!JKA>cq@2Y7#9h4}XAHY@A zLW+mpJ>1jAY|!CrE`arxIQz1{`e=K8Aoexs{s5c&`Fr@hBLlY{7yEPWY`UNce)rSg z=@>}`;=@Cdb|%uX06`}dMQq(^6y1wiF@aI6%^sj=F3+nr<$oaG3 zCcJeCS(0bAD_?8n4^tp^fzhCpEcXw^m&a|MO4RsW5Ycje| z12bsL3K`{22ZxjGW|;U!`1Zb2U1DgW$bSwUi(KwaUV*6IXQdZlj5ULsW`dJZ9jBKQ z=+u;RO#|&YnaEOt}PDe6l!}7W^4pBEeD^1d^v6x zE&n2RPQTZY9^eZa(^<~qWGoF1j*e1n`?`*VumT3-cbYCYnoa{v_i`B4$~y#ZBtv_J_UWBwXu#18*|G1v?5vJ4JPSNQS9b?VEoE{krz2iJ`@b zacxKKJ`GxXhV!H9Cdf4|&>i?ERqMOo;H3JJtic$cho<4T8a9^m$RQ%~pGdJ}IZo3y z^0tNuPyaM47DwOYQ?@o5Ckmcwl9^cXj?x{wjm^+H*SxE8t(7eQ2LoPO4ZTX9J4#U< zNvk1mem!%dlb$T88eCd@V*)(QIq&LKObZ6G^}su%Zj$l6iC(@ME|<_{S%2F<>y@CM z5=jYYxL_-j$2Y1HUV)OwMd87a1!}eYl z0g;l_xI9`cknNEv)?Z}iQ*Y3AKLyAaLSE-k_I5+08)cXa9GJLb2ynn7IlJ^L@iVxbb7-gGM(_QGG#olQ0KO{ zKVGZ|-yFw`)E6bOgan+g299sL zUGIgHImN=P4=i zfX*)CA=Dez2DOl( Date: Fri, 17 Oct 2025 14:18:27 -0500 Subject: [PATCH 91/98] Remove unneeded plots. --- examples/cfd/external_aerodynamics/domino/README.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md index 92db82b656..c20e00939d 100644 --- a/examples/cfd/external_aerodynamics/domino/README.md +++ b/examples/cfd/external_aerodynamics/domino/README.md @@ -305,8 +305,6 @@ please open an issue on GitHub. To provide an example of what a successful training should look like, we include here some example results. Training curves may look similar to this: -![Surface Training Curve](../../../../docs/img/domino/surface-training-curve.png) - ![Combined Training Curve](../../../../docs/img/domino/combined-training-curve.png) And, when evaluating the results on the validation dataset, this particular @@ -327,12 +325,10 @@ run had the following L2 and R2 Metrics: | Lift R2 | 0.971 | | With the PhysicsNeMo CFD tool, you can create plots of the lift and drag -forces computed by domino vs. the CFD Solver: +forces computed by domino vs. the CFD Solver. For example, here is the drag force: ![Draf Force R^2](../../../../docs/img/domino/drag-r2.jpg) -![Lift Force R^2](../../../../docs/img/domino/lift-r2.png) - ### Training with Physics Losses DoMINO supports enforcing of PDE residuals as soft constraints. This can be used From 01a0c15d9f717411af967dd18c3607a8bc104d08 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 17 Oct 2025 14:21:07 -0500 Subject: [PATCH 92/98] uupdate r2 --- examples/cfd/external_aerodynamics/domino/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/README.md b/examples/cfd/external_aerodynamics/domino/README.md index c20e00939d..ee456cc573 100644 --- a/examples/cfd/external_aerodynamics/domino/README.md +++ b/examples/cfd/external_aerodynamics/domino/README.md @@ -321,8 +321,8 @@ run had the following L2 and R2 Metrics: | X-Tau (Shear) | 0.138 | 0.145 | | Y-Tau (Shear) | 0.174 | 0.185 | | Z-Tau (Shear) | 0.198 | 0.207 | -| Drag R2 | 0.983 | | -| Lift R2 | 0.971 | | +| Drag R2 | 0.983 | 0.975 | +| Lift R2 | 0.971 | 0.968 | With the PhysicsNeMo CFD tool, you can create plots of the lift and drag forces computed by domino vs. the CFD Solver. For example, here is the drag force: From edae4366298caf7aff04a07cf19ed2060a39d9e4 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 17 Oct 2025 15:18:29 -0500 Subject: [PATCH 93/98] Fix ruff issues --- .../external_aerodynamics/domino/src/loss.py | 14 +- .../external_aerodynamics/domino/src/test.py | 177 ++++++++++++------ .../external_aerodynamics/domino/src/train.py | 22 ++- physicsnemo/datapipes/cae/cae_dataset.py | 14 +- 4 files changed, 154 insertions(+), 73 deletions(-) diff --git a/examples/cfd/external_aerodynamics/domino/src/loss.py b/examples/cfd/external_aerodynamics/domino/src/loss.py index cb161cb06e..3ab52c7903 100644 --- a/examples/cfd/external_aerodynamics/domino/src/loss.py +++ b/examples/cfd/external_aerodynamics/domino/src/loss.py @@ -227,7 +227,7 @@ def loss_fn( num = torch.sum(mask * (output - target) ** 2.0, dims) if loss_type == "rmse": - denom = torch.sum(mask * (target - torch.mean(target, (0, 1)))**2.0, dims) + denom = torch.sum(mask * (target - torch.mean(target, (0, 1))) ** 2.0, dims) loss = torch.mean(num / denom) elif loss_type == "mse": denom = torch.sum(mask) @@ -311,7 +311,9 @@ def loss_fn_surface( # Compute the mean diff**2 of the vector component, leave the last dimension: masked_loss_ws_num = vector_diff_sq - masked_loss_ws_denom = torch.mean((target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1)) + masked_loss_ws_denom = torch.mean( + (target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1) + ) masked_loss_ws = torch.sum(masked_loss_ws_num / masked_loss_ws_denom) loss = masked_loss_pres + masked_loss_ws @@ -359,12 +361,16 @@ def loss_fn_area( # Compute the mean diff**2 of the scalar component: masked_loss_pres = torch.mean(((output_scalar - target_scalar) ** 2.0), dim=(0, 1)) if loss_type == "rmse": - masked_loss_pres /= torch.mean((target_scalar-torch.mean(target_scalar, (0, 1)))**2.0, dim=(0, 1)) + masked_loss_pres /= torch.mean( + (target_scalar - torch.mean(target_scalar, (0, 1))) ** 2.0, dim=(0, 1) + ) # Compute the mean diff**2 of the vector component, leave the last dimension: masked_loss_ws = torch.mean((target_vector - output_vector) ** 2.0, (0, 1)) if loss_type == "rmse": - masked_loss_ws /= torch.mean((target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1)) + masked_loss_ws /= torch.mean( + (target_vector - torch.mean(target_vector, (0, 1))) ** 2.0, (0, 1) + ) # Combine the scalar and vector components: loss = 0.25 * (masked_loss_pres + torch.sum(masked_loss_ws)) diff --git a/examples/cfd/external_aerodynamics/domino/src/test.py b/examples/cfd/external_aerodynamics/domino/src/test.py index d00d6dcd8d..4bb32dc6cd 100644 --- a/examples/cfd/external_aerodynamics/domino/src/test.py +++ b/examples/cfd/external_aerodynamics/domino/src/test.py @@ -120,7 +120,9 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): if "volume_min_max" in data_dict.keys(): vol_max = data_dict["volume_min_max"][:, 1] vol_min = data_dict["volume_min_max"][:, 0] - geo_centers_vol = 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 + geo_centers_vol = ( + 2.0 * (geo_centers - vol_min) / (vol_max - vol_min) - 1 + ) else: geo_centers_vol = geo_centers @@ -160,7 +162,9 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): prediction_vol = torch.zeros_like(target_vol) num_points = volume_mesh_centers.shape[1] subdomain_points = int(np.floor(num_points / point_batch_size)) - sdf_scaling_factor = cfg.model.geometry_rep.geo_processor.volume_sdf_scaling_factor + sdf_scaling_factor = ( + cfg.model.geometry_rep.geo_processor.volume_sdf_scaling_factor + ) start_time = time.time() for p in range(subdomain_points + 1): @@ -174,7 +178,9 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): sdf_nodes_batch = sdf_nodes[:, start_idx:end_idx] scaled_sdf_nodes_batch = [] for p in range(len(sdf_scaling_factor)): - scaled_sdf_nodes_batch.append(scale_sdf(sdf_nodes_batch, sdf_scaling_factor[p])) + scaled_sdf_nodes_batch.append( + scale_sdf(sdf_nodes_batch, sdf_scaling_factor[p]) + ) scaled_sdf_nodes_batch = torch.cat(scaled_sdf_nodes_batch, dim=-1) pos_volume_closest_batch = pos_volume_closest[:, start_idx:end_idx] @@ -199,9 +205,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): else: pos_encoding_all = pos_normals_com_batch - pos_encoding = model.fc_p_vol( - pos_encoding_all - ) + pos_encoding = model.fc_p_vol(pos_encoding_all) tpredictions_batch = model.solution_calculator_vol( volume_mesh_centers_batch, geo_encoding_local, @@ -213,23 +217,23 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): prediction_vol[:, start_idx:end_idx] = tpredictions_batch if cfg.model.normalization == "min_max_scaling": - prediction_vol = unnormalize(prediction_vol, vol_factors[0], vol_factors[1]) + prediction_vol = unnormalize( + prediction_vol, vol_factors[0], vol_factors[1] + ) elif cfg.model.normalization == "mean_std_scaling": - prediction_vol = unstandardize(prediction_vol, vol_factors[0], vol_factors[1]) + prediction_vol = unstandardize( + prediction_vol, vol_factors[0], vol_factors[1] + ) # print(np.amax(prediction_vol, axis=(0, 1)), np.amin(prediction_vol, axis=(0, 1))) - prediction_vol[:, :, :3] = ( - prediction_vol[:, :, :3] * stream_velocity[0, 0] - ) + prediction_vol[:, :, :3] = prediction_vol[:, :, :3] * stream_velocity[0, 0] prediction_vol[:, :, 3] = ( prediction_vol[:, :, 3] * stream_velocity[0, 0] ** 2.0 * air_density[0, 0] ) prediction_vol[:, :, 4] = ( - prediction_vol[:, :, 4] - * stream_velocity[0, 0] - * length_scale[0] + prediction_vol[:, :, 4] * stream_velocity[0, 0] * length_scale[0] ) else: prediction_vol = None @@ -283,9 +287,7 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): surface_mesh_centers_batch, s_grid, ) - pos_encoding = model.fc_p_surf( - pos_surface_center_of_mass_batch - ) + pos_encoding = model.fc_p_surf(pos_surface_center_of_mass_batch) tpredictions_batch = model.solution_calculator_surf( surface_mesh_centers_batch, @@ -304,13 +306,15 @@ def test_step(data_dict, model, device, cfg, vol_factors, surf_factors): prediction_surf[:, start_idx:end_idx] = tpredictions_batch if cfg.model.normalization == "min_max_scaling": - prediction_surf = unnormalize(prediction_surf, surf_factors[0], surf_factors[1]) + prediction_surf = unnormalize( + prediction_surf, surf_factors[0], surf_factors[1] + ) elif cfg.model.normalization == "mean_std_scaling": - prediction_surf = unstandardize(prediction_surf, surf_factors[0], surf_factors[1]) + prediction_surf = unstandardize( + prediction_surf, surf_factors[0], surf_factors[1] + ) prediction_surf = ( - prediction_surf - * stream_velocity[0, 0] ** 2.0 - * air_density[0, 0] + prediction_surf * stream_velocity[0, 0] ** 2.0 * air_density[0, 0] ) else: prediction_surf = None @@ -433,7 +437,10 @@ def main(cfg: DictConfig): :, 1: ] # Assuming triangular elements mesh_indices_flattened = stl_faces.flatten() - length_scale = np.array(np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)), dtype=np.float32) + length_scale = np.array( + np.amax(np.amax(stl_vertices, 0) - np.amin(stl_vertices, 0)), + dtype=np.float32, + ) length_scale = torch.from_numpy(length_scale).to(torch.float32).to(dist.device) stl_sizes = mesh_stl.compute_cell_sizes(length=False, area=True, volume=False) stl_sizes = np.array(stl_sizes.cell_data["Area"], dtype=np.float32) @@ -443,17 +450,29 @@ def main(cfg: DictConfig): stl_vertices = torch.from_numpy(stl_vertices).to(torch.float32).to(dist.device) stl_sizes = torch.from_numpy(stl_sizes).to(torch.float32).to(dist.device) stl_centers = torch.from_numpy(stl_centers).to(torch.float32).to(dist.device) - mesh_indices_flattened = torch.from_numpy(mesh_indices_flattened).to(torch.int32).to(dist.device) + mesh_indices_flattened = ( + torch.from_numpy(mesh_indices_flattened).to(torch.int32).to(dist.device) + ) # Center of mass calculation center_of_mass = calculate_center_of_mass(stl_centers, stl_sizes) - s_max = torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.max)).to(torch.float32).to(dist.device) - s_min = torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.min)).to(torch.float32).to(dist.device) + s_max = ( + torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.max)) + .to(torch.float32) + .to(dist.device) + ) + s_min = ( + torch.from_numpy(np.asarray(cfg.data.bounding_box_surface.min)) + .to(torch.float32) + .to(dist.device) + ) nx, ny, nz = cfg.model.interp_res - surf_grid = create_grid(s_max, s_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device)) + surf_grid = create_grid( + s_max, s_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device) + ) normed_stl_vertices_cp = normalize(stl_vertices, s_max, s_min) surf_grid_normed = normalize(surf_grid, s_max, s_min) @@ -468,7 +487,7 @@ def main(cfg: DictConfig): ) surf_grid_max_min = torch.stack([s_min, s_max]) - + # Get global parameters and global parameters scaling from config.yaml global_params_names = list(cfg.variables.global_parameters.keys()) global_params_reference = { @@ -496,7 +515,9 @@ def main(cfg: DictConfig): global_params_reference = np.array( global_params_reference_list, dtype=np.float32 ) - global_params_reference = torch.from_numpy(global_params_reference).to(dist.device) + global_params_reference = torch.from_numpy(global_params_reference).to( + dist.device + ) # Define the list of global parameter values for each simulation. # Note: The user must ensure that the values provided here correspond to the @@ -515,7 +536,9 @@ def main(cfg: DictConfig): global_params_values_list = np.array( global_params_values_list, dtype=np.float32 ) - global_params_values = torch.from_numpy(global_params_values_list).to(dist.device) + global_params_values = torch.from_numpy(global_params_values_list).to( + dist.device + ) # Read VTP if model_type == "surface" or model_type == "combined": @@ -543,13 +566,20 @@ def main(cfg: DictConfig): surface_normals = ( surface_normals / np.linalg.norm(surface_normals, axis=1)[:, np.newaxis] ) - surface_coordinates = torch.from_numpy(surface_coordinates).to(torch.float32).to(dist.device) - surface_normals = torch.from_numpy(surface_normals).to(torch.float32).to(dist.device) - surface_sizes = torch.from_numpy(surface_sizes).to(torch.float32).to(dist.device) - surface_fields = torch.from_numpy(surface_fields).to(torch.float32).to(dist.device) + surface_coordinates = ( + torch.from_numpy(surface_coordinates).to(torch.float32).to(dist.device) + ) + surface_normals = ( + torch.from_numpy(surface_normals).to(torch.float32).to(dist.device) + ) + surface_sizes = ( + torch.from_numpy(surface_sizes).to(torch.float32).to(dist.device) + ) + surface_fields = ( + torch.from_numpy(surface_fields).to(torch.float32).to(dist.device) + ) if cfg.model.num_neighbors_surface > 1: - time_start = time.time() # print(f"file: {dirname}, surface coordinates shape: {surface_coordinates.shape}") # try: @@ -607,15 +637,29 @@ def main(cfg: DictConfig): polydata_vol, volume_variable_names ) volume_fields = np.concatenate(volume_fields, axis=-1) - volume_coordinates = torch.from_numpy(volume_coordinates).to(torch.float32).to(dist.device) - volume_fields = torch.from_numpy(volume_fields).to(torch.float32).to(dist.device) + volume_coordinates = ( + torch.from_numpy(volume_coordinates).to(torch.float32).to(dist.device) + ) + volume_fields = ( + torch.from_numpy(volume_fields).to(torch.float32).to(dist.device) + ) - c_max = torch.from_numpy(np.asarray(cfg.data.bounding_box.max)).to(torch.float32).to(dist.device) - c_min = torch.from_numpy(np.asarray(cfg.data.bounding_box.min)).to(torch.float32).to(dist.device) + c_max = ( + torch.from_numpy(np.asarray(cfg.data.bounding_box.max)) + .to(torch.float32) + .to(dist.device) + ) + c_min = ( + torch.from_numpy(np.asarray(cfg.data.bounding_box.min)) + .to(torch.float32) + .to(dist.device) + ) # Generate a grid of specified resolution to map the bounding box # The grid is used for capturing structured geometry features and SDF representation of geometry - grid = create_grid(c_max, c_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device)) + grid = create_grid( + c_max, c_min, torch.from_numpy(np.asarray([nx, ny, nz])).to(dist.device) + ) if cfg.data.normalize_coordinates: volume_coordinates = normalize(volume_coordinates, c_max, c_min) @@ -633,7 +677,7 @@ def main(cfg: DictConfig): grid, use_sign_winding_number=True, ) - + # SDF calculation time_start = time.time() sdf_nodes, sdf_node_closest_point = signed_distance_field( @@ -647,7 +691,7 @@ def main(cfg: DictConfig): pos_volume_closest = volume_coordinates - sdf_node_closest_point pos_volume_center_of_mass = volume_coordinates - center_of_mass_normalized - + else: volume_coordinates = None volume_fields = None @@ -723,10 +767,7 @@ def main(cfg: DictConfig): "global_params_reference": torch.unsqueeze(global_params_reference, -1), } - data_dict = { - key: torch.unsqueeze(value, 0) - for key, value in data_dict.items() - } + data_dict = {key: torch.unsqueeze(value, 0) for key, value in data_dict.items()} prediction_vol, prediction_surf = test_step( data_dict, model, dist.device, cfg, vol_factors, surf_factors @@ -771,9 +812,15 @@ def main(cfg: DictConfig): surface_fields[:, 0] * surface_normals[:, 2] * surface_sizes[:, 0] - surface_fields[:, 3] * surface_sizes[:, 0] ) - print("Drag=", dirname, force_x_pred.cpu().numpy(), force_x_true.cpu().numpy()) - print("Lift=", dirname, force_z_pred.cpu().numpy(), force_z_true.cpu().numpy()) - print("Side=", dirname, force_y_pred.cpu().numpy(), force_y_true.cpu().numpy()) + print( + "Drag=", dirname, force_x_pred.cpu().numpy(), force_x_true.cpu().numpy() + ) + print( + "Lift=", dirname, force_z_pred.cpu().numpy(), force_z_true.cpu().numpy() + ) + print( + "Side=", dirname, force_y_pred.cpu().numpy(), force_y_true.cpu().numpy() + ) aero_forces_all.append( [ dirname, @@ -787,8 +834,12 @@ def main(cfg: DictConfig): ) l2_gt = torch.mean(torch.square(surface_fields), (0)) - l2_error = torch.mean(torch.square(prediction_surf[0] - surface_fields), (0)) - l2_surface_all.append(np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy())) + l2_error = torch.mean( + torch.square(prediction_surf[0] - surface_fields), (0) + ) + l2_surface_all.append( + np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()) + ) print( "Surface L-2 norm:", @@ -819,30 +870,42 @@ def main(cfg: DictConfig): dirname, np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()), ) - l2_volume_all.append(np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy())) + l2_volume_all.append( + np.sqrt(l2_error.cpu().numpy()) / np.sqrt(l2_gt.cpu().numpy()) + ) # import pdb; pdb.set_trace() if prediction_surf is not None: - surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 0:1].cpu().numpy()) + surfParam_vtk = numpy_support.numpy_to_vtk( + prediction_surf[0, :, 0:1].cpu().numpy() + ) surfParam_vtk.SetName(f"{surface_variable_names[0]}Pred") celldata_all.GetCellData().AddArray(surfParam_vtk) - surfParam_vtk = numpy_support.numpy_to_vtk(prediction_surf[0, :, 1:].cpu().numpy()) + surfParam_vtk = numpy_support.numpy_to_vtk( + prediction_surf[0, :, 1:].cpu().numpy() + ) surfParam_vtk.SetName(f"{surface_variable_names[1]}Pred") celldata_all.GetCellData().AddArray(surfParam_vtk) write_to_vtp(celldata_all, vtp_pred_save_path) if prediction_vol is not None: - volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 0:3].cpu().numpy()) + volParam_vtk = numpy_support.numpy_to_vtk( + prediction_vol[:, 0:3].cpu().numpy() + ) volParam_vtk.SetName(f"{volume_variable_names[0]}Pred") polydata_vol.GetPointData().AddArray(volParam_vtk) - volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 3:4].cpu().numpy()) + volParam_vtk = numpy_support.numpy_to_vtk( + prediction_vol[:, 3:4].cpu().numpy() + ) volParam_vtk.SetName(f"{volume_variable_names[1]}Pred") polydata_vol.GetPointData().AddArray(volParam_vtk) - volParam_vtk = numpy_support.numpy_to_vtk(prediction_vol[:, 4:5].cpu().numpy()) + volParam_vtk = numpy_support.numpy_to_vtk( + prediction_vol[:, 4:5].cpu().numpy() + ) volParam_vtk.SetName(f"{volume_variable_names[2]}Pred") polydata_vol.GetPointData().AddArray(volParam_vtk) diff --git a/examples/cfd/external_aerodynamics/domino/src/train.py b/examples/cfd/external_aerodynamics/domino/src/train.py index 9758ed7e2f..55731696d2 100644 --- a/examples/cfd/external_aerodynamics/domino/src/train.py +++ b/examples/cfd/external_aerodynamics/domino/src/train.py @@ -209,7 +209,7 @@ def train_epoch( io_end_time = time.perf_counter() if add_physics_loss: autocast_enabled = False - + with autocast("cuda", enabled=autocast_enabled, cache_enabled=False): with nvtx.range("Model Forward Pass"): if add_physics_loss: @@ -259,9 +259,7 @@ def train_epoch( scaler.unscale_(optimizer) # Since the gradients of optimizer's assigned params are unscaled, clips as usual. - torch.nn.utils.clip_grad_norm_( - model.parameters(), grad_max_norm - ) + torch.nn.utils.clip_grad_norm_(model.parameters(), grad_max_norm) scaler.step(optimizer) scaler.update() optimizer.zero_grad() @@ -501,14 +499,22 @@ def main(cfg: DictConfig) -> None: optimizer_class = torch.optim.AdamW else: raise ValueError(f"Unsupported optimizer: {cfg.train.optimizer.name}") - optimizer = optimizer_class(model.parameters(), lr=cfg.train.optimizer.lr, weight_decay=cfg.train.optimizer.weight_decay) + optimizer = optimizer_class( + model.parameters(), + lr=cfg.train.optimizer.lr, + weight_decay=cfg.train.optimizer.weight_decay, + ) if cfg.train.lr_scheduler.name == "MultiStepLR": scheduler = torch.optim.lr_scheduler.MultiStepLR( - optimizer, milestones=cfg.train.lr_scheduler.milestones, gamma=cfg.train.lr_scheduler.gamma - ) + optimizer, + milestones=cfg.train.lr_scheduler.milestones, + gamma=cfg.train.lr_scheduler.gamma, + ) elif cfg.train.lr_scheduler.name == "CosineAnnealingLR": scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( - optimizer, T_max=cfg.train.lr_scheduler.T_max, eta_min=cfg.train.lr_scheduler.eta_min + optimizer, + T_max=cfg.train.lr_scheduler.T_max, + eta_min=cfg.train.lr_scheduler.eta_min, ) else: raise ValueError(f"Unsupported scheduler: {cfg.train.lr_scheduler.name}") diff --git a/physicsnemo/datapipes/cae/cae_dataset.py b/physicsnemo/datapipes/cae/cae_dataset.py index b41e217635..8a2dfdfc5c 100644 --- a/physicsnemo/datapipes/cae/cae_dataset.py +++ b/physicsnemo/datapipes/cae/cae_dataset.py @@ -1213,7 +1213,9 @@ def compute_mean_std_min_max( end = time.perf_counter() iteration_time = end - start - print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}") + print( + f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}" + ) start = time.perf_counter() var = {} @@ -1241,9 +1243,11 @@ def compute_mean_std_min_max( std_sample = std[field_key] mask = torch.ones_like(field_data, dtype=torch.bool) for v in range(field_data.shape[-1]): - outliers = (field_data[:, v] < mean_sample[v] - 9.0 * std_sample[v]) | (field_data[:, v] > mean_sample[v] + 9.0 * std_sample[v]) + outliers = (field_data[:, v] < mean_sample[v] - 9.0 * std_sample[v]) | ( + field_data[:, v] > mean_sample[v] + 9.0 * std_sample[v] + ) mask[:, v] = ~outliers - + batch_min = [] batch_max = [] for v in range(field_data.shape[-1]): @@ -1258,7 +1262,9 @@ def compute_mean_std_min_max( end = time.perf_counter() iteration_time = end - start - print(f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}") + print( + f"on iteration {i} of {max_samples}, time: {iteration_time:.2f} seconds for file: {j}" + ) start = time.perf_counter() global_end = time.perf_counter() From 6c04d882666fcaf29fd137e6f59887364b930fec Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 17 Oct 2025 21:09:54 +0000 Subject: [PATCH 94/98] test fix. --- test/utils/test_domino_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/utils/test_domino_utils.py b/test/utils/test_domino_utils.py index a9e1166640..fc10b93688 100644 --- a/test/utils/test_domino_utils.py +++ b/test/utils/test_domino_utils.py @@ -122,7 +122,7 @@ def test_nd_interpolator(): coords = torch.tensor([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]]) field_vals = torch.tensor([[1.0], [2.0], [3.0], [4.0]]) grid_points = torch.tensor([[0.5, 0.5]]) - result = nd_interpolator([coords], field_vals, grid_points) + result = nd_interpolator(coords, field_vals, grid_points) assert result.shape[0] == 1 # One grid point From bd74ce3a72310d3b958e785a67e4893d7ec9c5eb Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 17 Oct 2025 21:21:25 +0000 Subject: [PATCH 95/98] Fix dict item with normalization off. --- physicsnemo/datapipes/cae/domino_datapipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/physicsnemo/datapipes/cae/domino_datapipe.py b/physicsnemo/datapipes/cae/domino_datapipe.py index eb7b004d18..491ab5a199 100644 --- a/physicsnemo/datapipes/cae/domino_datapipe.py +++ b/physicsnemo/datapipes/cae/domino_datapipe.py @@ -796,7 +796,8 @@ def process_data(self, data_dict): ######################################################################## # For volume data, we store this only if normalizing coordinates: if self.model_type == "volume" or self.model_type == "combined": - return_dict["volume_min_max"] = torch.stack([c_min, c_max]) + if self.config.normalize_coordinates: + return_dict["volume_min_max"] = torch.stack([c_min, c_max]) if self.model_type == "volume" or self.model_type == "combined": volume_fields_raw = ( From 90235753d3f7c2df5471cef27ed506226d5f8fc4 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 17 Oct 2025 16:26:00 -0500 Subject: [PATCH 96/98] Change codeowners order, exclusion goes last. --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index d5881d69ae..1e6ad0ce59 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -19,8 +19,8 @@ ./*.md @ram-cherukuri @megnvidia # All changes to documentation, except images: -docs/img/ docs/ @megnvidia @ktangsali +docs/img/ # Core release files From b24b0d4772bf9eeec47e1c2c8bc1be83bdeb153b Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Fri, 17 Oct 2025 16:31:51 -0500 Subject: [PATCH 97/98] Undo file order so it can get fixed elsewhere. --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1e6ad0ce59..d5881d69ae 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -19,8 +19,8 @@ ./*.md @ram-cherukuri @megnvidia # All changes to documentation, except images: -docs/ @megnvidia @ktangsali docs/img/ +docs/ @megnvidia @ktangsali # Core release files From b8db738b93fe42b4c90b7c081f22288db417a0f5 Mon Sep 17 00:00:00 2001 From: Corey Adams <6619961+coreyjadams@users.noreply.github.com> Date: Mon, 20 Oct 2025 13:38:39 +0000 Subject: [PATCH 98/98] Update doctstring tests. --- physicsnemo/utils/domino/__init__.py | 15 ++++++ physicsnemo/utils/domino/utils.py | 78 ++++++++++++++++------------ 2 files changed, 59 insertions(+), 34 deletions(-) create mode 100644 physicsnemo/utils/domino/__init__.py diff --git a/physicsnemo/utils/domino/__init__.py b/physicsnemo/utils/domino/__init__.py new file mode 100644 index 0000000000..b2f171d4ac --- /dev/null +++ b/physicsnemo/utils/domino/__init__.py @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/physicsnemo/utils/domino/utils.py b/physicsnemo/utils/domino/utils.py index e3faae8123..7f67f36e6c 100644 --- a/physicsnemo/utils/domino/utils.py +++ b/physicsnemo/utils/domino/utils.py @@ -67,8 +67,8 @@ def calculate_center_of_mass( def normalize( field: torch.Tensor, - max_val: torch.Tensor | None = None, - min_val: torch.Tensor | None = None, + max_val: float | torch.Tensor | None = None, + min_val: float | torch.Tensor | None = None, ) -> torch.Tensor: """Normalize field values to the range [-1, 1]. @@ -93,11 +93,11 @@ def normalize( >>> import torch >>> field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]) >>> normalized = normalize(field, 5.0, 1.0) - >>> torch.allclose(normalized, [-1.0, -0.5, 0.0, 0.5, 1.0]) + >>> torch.allclose(normalized, torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0])) True >>> # Auto-compute min/max >>> normalized_auto = normalize(field) - >>> torch.allclose(normalized_auto, [-1.0, -0.5, 0.0, 0.5, 1.0]) + >>> torch.allclose(normalized_auto, torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0])) True """ @@ -111,7 +111,9 @@ def normalize( def unnormalize( - normalized_field: torch.Tensor, max_val: torch.Tensor, min_val: torch.Tensor + normalized_field: torch.Tensor, + max_val: float | torch.Tensor, + min_val: float | torch.Tensor, ) -> torch.Tensor: """Reverse the normalization process to recover original field values. @@ -129,8 +131,10 @@ def unnormalize( Examples: >>> import torch >>> normalized = torch.tensor([-1.0, -0.5, 0.0, 0.5, 1.0]) - >>> original = unnormalize(normalized, 5.0, 1.0) - >>> torch.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0]) + >>> max_val = torch.tensor(5.0) + >>> min_val = torch.tensor(1.0) + >>> original = unnormalize(normalized, max_val, min_val) + >>> torch.allclose(original, torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])) True """ field_range = max_val - min_val @@ -139,8 +143,8 @@ def unnormalize( def standardize( field: torch.Tensor, - mean: torch.Tensor | None = None, - std: torch.Tensor | None = None, + mean: float | torch.Tensor | None = None, + std: float | torch.Tensor | None = None, ) -> torch.Tensor: """Standardize field values to have zero mean and unit variance. @@ -162,14 +166,16 @@ def standardize( Examples: >>> import torch >>> field = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]) - >>> standardized = standardize(field, 3.0, torch.sqrt(2.5)) - >>> torch.allclose(standardized, [-1.265, -0.632, 0.0, 0.632, 1.265], atol=1e-3) + >>> mean = torch.tensor(3.0) + >>> std = torch.sqrt(torch.tensor(2.5)) + >>> standardized = standardize(field, mean, std) + >>> torch.allclose(standardized, torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265]), atol=1e-3) True >>> # Auto-compute mean/std >>> standardized_auto = standardize(field) - >>> torch.allclose(torch.mean(standardized_auto), 0.0) + >>> torch.allclose(torch.mean(standardized_auto), torch.tensor(0.0)) True - >>> torch.allclose(torch.std(standardized_auto, ddof=0), 1.0) + >>> torch.allclose(torch.std(standardized_auto), torch.tensor(1.0)) True """ @@ -182,7 +188,9 @@ def standardize( def unstandardize( - standardized_field: torch.Tensor, mean: torch.Tensor, std: torch.Tensor + standardized_field: torch.Tensor, + mean: float | torch.Tensor, + std: float | torch.Tensor, ) -> torch.Tensor: """Reverse the standardization process to recover original field values. @@ -200,8 +208,10 @@ def unstandardize( Examples: >>> import torch >>> standardized = torch.tensor([-1.265, -0.632, 0.0, 0.632, 1.265]) - >>> original = unstandardize(standardized, 3.0, torch.sqrt(2.5)) - >>> torch.allclose(original, [1.0, 2.0, 3.0, 4.0, 5.0], atol=1e-3) + >>> mean = torch.tensor(3.0) + >>> std = torch.sqrt(torch.tensor(2.5)) + >>> original = unstandardize(standardized, mean, std) + >>> torch.allclose(original, torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0]), atol=1e-3) True """ return standardized_field * std + mean @@ -236,12 +246,12 @@ def calculate_normal_positional_encoding( >>> cell_size = [0.1, 0.1, 0.1] >>> encoding = calculate_normal_positional_encoding(coords, cell_dimensions=cell_size) >>> encoding.shape - (2, 12) + torch.Size([2, 12]) >>> # Relative positioning example >>> coords_b = torch.tensor([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]) >>> encoding_rel = calculate_normal_positional_encoding(coords, coords_b, cell_size) >>> encoding_rel.shape - (2, 12) + torch.Size([2, 12]) """ dx, dy, dz = cell_dimensions[0], cell_dimensions[1], cell_dimensions[2] @@ -318,7 +328,7 @@ def pad(arr: torch.Tensor, n_points: int, pad_value: float = 0.0) -> torch.Tenso >>> arr = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) >>> padded = pad(arr, 4, -1.0) >>> padded.shape - (4, 2) + torch.Size([4, 2]) >>> torch.allclose(padded[:2], arr) True >>> bool(torch.all(padded[2:] == -1.0)) @@ -368,7 +378,7 @@ def pad_inp(arr: torch.Tensor, n_points: int, pad_value: float = 0.0) -> torch.T >>> arr = torch.tensor([[[1.0, 2.0]], [[3.0, 4.0]]]) >>> padded = pad_inp(arr, 4, 0.0) >>> padded.shape - (4, 1, 2) + torch.Size([4, 1, 2]) >>> torch.allclose(padded[:2], arr) True >>> bool(torch.all(padded[2:] == 0.0)) @@ -424,13 +434,13 @@ def shuffle_array( Examples: >>> import torch - >>> torch.manual_seed(42) # For reproducible results + >>> _ = torch.manual_seed(42) # For reproducible results >>> data = torch.tensor([[1, 2], [3, 4], [5, 6], [7, 8]]) >>> subset, indices = shuffle_array(data, 2) >>> subset.shape - (2, 2) + torch.Size([2, 2]) >>> indices.shape - (2,) + torch.Size([2]) >>> len(torch.unique(indices)) == 2 # No duplicates True """ @@ -510,14 +520,14 @@ def shuffle_array_without_sampling( Examples: >>> import torch - >>> torch.manual_seed(42) # For reproducible results + >>> _ = torch.manual_seed(42) # For reproducible results >>> data = torch.tensor([[1], [2], [3], [4]]) >>> shuffled, indices = shuffle_array_without_sampling(data) >>> shuffled.shape - (4, 1) + torch.Size([4, 1]) >>> indices.shape - (4,) - >>> set(indices) == set(range(4)) # All original indices present + torch.Size([4]) + >>> set(indices.tolist()) == set(range(4)) # All original indices present True """ idx = torch.randperm(arr.shape[0]) @@ -660,7 +670,7 @@ def create_grid( >>> grid_res = torch.tensor([2, 2, 2]) >>> grid = create_grid(max_bounds, min_bounds, grid_res) >>> grid.shape - (2, 2, 2, 3) + torch.Size([2, 2, 2, 3]) >>> torch.allclose(grid[0, 0, 0], torch.tensor([0.0, 0.0, 0.0])) True >>> torch.allclose(grid[1, 1, 1], torch.tensor([1.0, 1.0, 1.0])) @@ -794,14 +804,14 @@ def area_weighted_shuffle_array( Examples: >>> import torch - >>> torch.manual_seed(42) # For reproducible results + >>> _ = torch.manual_seed(42) # For reproducible results >>> mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]]) >>> cell_areas = torch.tensor([0.1, 0.1, 0.1, 10.0]) # Last point has much larger area >>> subset, indices = area_weighted_shuffle_array(mesh_data, 2, cell_areas) >>> subset.shape - (2, 1) + torch.Size([2, 1]) >>> indices.shape - (2,) + torch.Size([2]) >>> # The point with large area (index 3) should likely be selected >>> len(set(indices)) <= 2 # At most 2 unique indices True @@ -849,14 +859,14 @@ def solution_weighted_shuffle_array( Examples: >>> import torch - >>> torch.manual_seed(42) # For reproducible results + >>> _ = torch.manual_seed(42) # For reproducible results >>> mesh_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]]) >>> solution = torch.tensor([0.1, 0.1, 0.1, 10.0]) # Last point has much larger solution field >>> subset, indices = solution_weighted_shuffle_array(mesh_data, 2, solution) >>> subset.shape - (2, 1) + torch.Size([2, 1]) >>> indices.shape - (2,) + torch.Size([2]) >>> # The point with large area (index 3) should likely be selected >>> len(set(indices)) <= 2 # At most 2 unique indices True