diff --git a/CMakeLists.txt b/CMakeLists.txt index 3891a11b..88113295 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,28 @@ include(ResolveGitVersion) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CUDA_STANDARD 20) + +# Set CUDA compute capability target +# Override with just the number: cmake -DCUDA_ARCH=86 .. +# Set default to 7.5 (Turing) +set(CUDA_ARCH "75 - Turing (RTX 20xx, GTX 16xx)" CACHE STRING "CUDA compute capability target architecture") +set_property(CACHE CUDA_ARCH PROPERTY STRINGS + "52 - Maxwell (GTX 9xx, Titan X)" + "60 - Pascal (GTX 10xx, P100)" + "61 - Pascal (GTX 1050/1030)" + "70 - Volta (V100, Titan V100)" + "75 - Turing (RTX 20xx, GTX 16xx)" + "80 - Ampere (A100)" + "86 - Ampere (RTX 30xx)" + "89 - Ada Lovelace (RTX 40xx)" + "90 - Hopper (H100)" +) + +# Extract just the number from the selection (first two characters) +string(SUBSTRING "${CUDA_ARCH}" 0 2 CUDA_ARCH_NUMBER) +set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_NUMBER}) +message(STATUS "CUDA architecture set to: sm_${CMAKE_CUDA_ARCHITECTURES}") + project(fast-feedback-service LANGUAGES CXX VERSION ${FFS_VERSION_CMAKE}) set(CMAKE_EXPORT_COMPILE_COMMANDS yes) @@ -111,6 +133,36 @@ find_package(CUDAToolkit) if (CUDAToolkit_FOUND) message(STATUS "CUDA found: Building CUDA components.") set(FFS_ENABLE_CUDA ON) + + # Try to detect GPU compute capability + execute_process( + COMMAND nvidia-smi --query-gpu=compute_cap --format=csv,noheader + OUTPUT_VARIABLE DETECTED_GPU_CAPS + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET + ) + + if(DETECTED_GPU_CAPS) + # nvidia-smi returns "7.5" format, convert to "75" + string(REPLACE "." "" DETECTED_GPU_ARCH "${DETECTED_GPU_CAPS}") + # Get first GPU if multiple + string(REGEX MATCH "[0-9]+" DETECTED_GPU_ARCH "${DETECTED_GPU_ARCH}") + message(STATUS "Detected GPU compute capability: sm_${DETECTED_GPU_ARCH}") + + # Warn if configured architecture doesn't match detected GPU + if(NOT CUDA_ARCH_NUMBER STREQUAL DETECTED_GPU_ARCH) + message(WARNING + "Target CUDA architecture (sm_${CUDA_ARCH_NUMBER}) differs from detected GPU (sm_${DETECTED_GPU_ARCH}).\n" + " The compiled code may not run optimally or at all on this system.\n" + " To build for the detected GPU: cmake -DCUDA_ARCH=${DETECTED_GPU_ARCH} ..\n" + " To build for a different target: ensure the target system has sm_${CUDA_ARCH_NUMBER} or newer." + ) + else() + message(STATUS "✓ Configured architecture matches detected GPU") + endif() + else() + message(STATUS "Could not detect GPU compute capability (nvidia-smi not available or no GPU present)") + endif() else() message(STATUS "CUDA not found: Skipping CUDA components.") set(FFS_ENABLE_CUDA OFF) diff --git a/include/math/device_precision.cuh b/include/math/device_precision.cuh index 0a7adfa0..8f02b4d1 100644 --- a/include/math/device_precision.cuh +++ b/include/math/device_precision.cuh @@ -34,4 +34,15 @@ using scalar_t = float; #define CUDA_ABS fabsf #define CUDA_SQRT sqrtf #define CUDA_EXP expf -#endif \ No newline at end of file +#endif + +// Accumulator type for summation/reduction operations. +// Integer accumulation is used because: +// 1. atomicAdd on uint32_t is a single native hardware instruction (fastest atomic) +// 2. Maximum possible sum is well within uint32_t range (~20M << 4.3G) +// 3. Final reduction to double is performed on the host +// +// NOTE: If pixel_t can be negative (pedestal-subtracted data), change to int32_t. +// NOTE: If pixel values are non-integer (e.g. gain-corrected floats), revert to: +// using accumulator_t = scalar_t; +using accumulator_t = uint32_t; \ No newline at end of file diff --git a/include/math/math_utils.cuh b/include/math/math_utils.cuh index 42371bf5..9740274f 100644 --- a/include/math/math_utils.cuh +++ b/include/math/math_utils.cuh @@ -13,6 +13,7 @@ * constexpr scalar_t RAD_TO_DEG = 180.0 / cuda::std::numbers::pi_v; */ #include +#include #ifdef __CUDACC__ #include "math/device_precision.cuh" @@ -43,4 +44,24 @@ DEVICE_HOST scalar_type degrees_to_radians(scalar_type degrees) { return degrees * DEG_TO_RAD; } +/** + * @brief Integer ceiling division + * + * Computes ceil(n/d) using integer arithmetic only, avoiding + * floating-point conversion. Equivalent to (n + d - 1) / d, which + * rounds up to the nearest integer quotient. + * + * @tparam T Integer type + * @param n Numerator + * @param d Denominator (must be > 0) + * @return Ceiling of n/d + * + * @example ceil_div(15, 4) returns 4, whereas 15/4 returns 3 + */ +template +DEVICE_HOST constexpr T ceil_div(T n, T d) { + static_assert(std::is_integral_v, "ceil_div requires an integer type"); + return (n + d - 1) / d; +} + #undef DEVICE_HOST \ No newline at end of file diff --git a/integrator/CMakeLists.txt b/integrator/CMakeLists.txt index a5ef06d6..aeb4b74f 100644 --- a/integrator/CMakeLists.txt +++ b/integrator/CMakeLists.txt @@ -6,6 +6,9 @@ find_package(Bitshuffle REQUIRED) add_executable(integrator integrator.cc + integrator.cu + kabsch.cu + extent.cc ../spotfinder/shmread.cc ../spotfinder/cbfread.cc ) @@ -39,4 +42,7 @@ target_link_libraries(integrator # Allow CUDA to use multi-file device variables set_property(TARGET integrator PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_compile_options(integrator PRIVATE "$<$,$>:-G>") -target_compile_options(integrator PRIVATE "$<$,$,$>>:--generate-line-info>") \ No newline at end of file +target_compile_options(integrator PRIVATE "$<$,$,$>>:--generate-line-info>") + +# Add tests subdirectory +add_subdirectory(tests) \ No newline at end of file diff --git a/integrator/extent.cc b/integrator/extent.cc new file mode 100644 index 00000000..bf03cf10 --- /dev/null +++ b/integrator/extent.cc @@ -0,0 +1,191 @@ +/** + * @file extent.cc + * @brief Extent and bounding box algorithms for baseline CPU implementation + */ + +#include "extent.hpp" + +#include +#include +#include + +#include "ffs_logger.hpp" + +std::vector compute_kabsch_bounding_boxes( + const Eigen::Vector3d &s0, + const Eigen::Vector3d &rot_axis, + const mdspan_2d_double &s1_vectors, + const mdspan_2d_double &phi_positions, + const size_t num_reflections, + const double sigma_b, + const double sigma_m, + const Panel &panel, + const Scan &scan, + const MonochromaticBeam &beam, + const double n_sigma, + const double sigma_b_multiplier) { + std::vector extents; + extents.reserve(num_reflections); + + /* + * Tolerance for detecting when a reflection is nearly parallel to + * the rotation axis. When ζ = m₂ · e₁ approaches zero, it indicates + * the reflection's scattering plane is nearly parallel to the + * goniometer rotation axis, making the φ-to-image conversion + * numerically unstable. This threshold (1e-10) is chosen based on + * geometric considerations rather than pure floating-point precision + * - it represents a practical limit for "nearly parallel" geometry + * where the standard bounding box calculation should be bypassed in + * favor of spanning the entire image range. + */ + static constexpr double ZETA_TOLERANCE = 1e-10; + + // Calculate the angular divergence parameters: + // Δb = nσ × σb × m (beam divergence extent) + // Δm = nσ × σm (mosaicity extent) + double delta_b = n_sigma * sigma_b * sigma_b_multiplier; + double delta_m = n_sigma * sigma_m; + + // Extract experimental parameters needed for coordinate transformations + const auto [osc_start, osc_width] = scan.get_oscillation(); + int image_range_start = scan.get_image_range()[0]; + int image_range_end = scan.get_image_range()[1]; + double wl = beam.get_wavelength(); + Matrix3d d_matrix_inv = panel.get_d_matrix().inverse(); + + // Process each reflection individually + for (size_t i = 0; i < num_reflections; ++i) { + // Extract reflection centroid data + Eigen::Vector3d s1_c( + s1_vectors(i, 0), s1_vectors(i, 1), s1_vectors(i, 2)); // s₁ᶜ from s1_vectors + double phi_c = (phi_positions(i, 2)); // φᶜ from xyzcal.mm column + + // Construct the Kabsch coordinate system for this reflection + // e1 = s₁ᶜ × s₀ / |s₁ᶜ × s₀| (perpendicular to scattering plane) + Eigen::Vector3d e1 = s1_c.cross(s0).normalized(); + // e2 = s₁ᶜ × e₁ / |s₁ᶜ × e₁| (within scattering plane, orthogonal to e1) + Eigen::Vector3d e2 = s1_c.cross(e1).normalized(); + + double s1_len = s1_c.norm(); + + // Calculate s′ vectors at the four corners of the integration region + // These correspond to the extremes: (±Δb, ±Δb) in Kabsch coordinates + std::vector s_prime_vectors; + static constexpr std::array, 4> corner_signs = { + {{1, 1}, {1, -1}, {-1, 1}, {-1, -1}}}; + + for (auto [e1_sign, e2_sign] : corner_signs) { + // Project Δb divergences onto Kabsch basis vectors + // p represents the displacement in reciprocal space + Eigen::Vector3d p = + (e1_sign * delta_b * e1 / s1_len) + (e2_sign * delta_b * e2 / s1_len); + + // Debug output for the Ewald sphere calculation + double p_magnitude = p.norm(); + logger.trace( + "Reflection {}, corner ({},{}): p.norm()={:.6f}, s1_len={:.6f}, " + "delta_b={:.6f}", + i, + e1_sign, + e2_sign, + p_magnitude, + s1_len, + delta_b); + + // Ensure the resulting s′ vector lies on the Ewald sphere + // This involves solving: |s′|² = |s₁ᶜ|² for the correct magnitude + double b = s1_len * s1_len - p.dot(p); + if (b < 0) { + logger.error( + "Negative b value: {:.6f} for reflection {} (p.dot(p)={:.6f}, " + "s1_len²={:.6f})", + b, + i, + p.dot(p), + s1_len * s1_len); + logger.error( + "This means the displacement vector is too large for the Ewald " + "sphere"); + // Skip this corner or use a fallback approach + continue; + } + double d = -(p.dot(s1_c) / s1_len) + std::sqrt(b); + + logger.trace("Reflection {}: b={:.6f}, d={:.6f}", i, b, d); + + // Construct the s′ vector: s′ = (d × ŝ₁ᶜ) + p + Eigen::Vector3d s_prime = (d * s1_c / s1_len) + p; + s_prime_vectors.push_back(s_prime); + } + + // Transform s′ vectors back to detector coordinates using Panel's get_ray_intersection + std::vector> detector_coords; + for (const auto &s_prime : s_prime_vectors) { + // Direct conversion from s′ vector to detector coordinates + // get_ray_intersection returns coordinates in mm + auto xy_mm_opt = panel.get_ray_intersection(s_prime); + if (!xy_mm_opt) { + continue; // Skip this corner if no intersection + } + std::array xy_mm = *xy_mm_opt; + + // Convert from mm to pixels using the new mm_to_px function + std::array xy_pixels = panel.mm_to_px(xy_mm[0], xy_mm[1]); + + detector_coords.push_back({xy_pixels[0], xy_pixels[1]}); + } + + // Determine the bounding box in detector coordinates + // Find minimum and maximum coordinates from the four corners + auto [min_x_it, max_x_it] = std::minmax_element( + detector_coords.begin(), + detector_coords.end(), + [](const auto &a, const auto &b) { return a.first < b.first; }); + auto [min_y_it, max_y_it] = std::minmax_element( + detector_coords.begin(), + detector_coords.end(), + [](const auto &a, const auto &b) { return a.second < b.second; }); + + BoundingBoxExtents bbox; + // Use floor/ceil as specified in the paper: xmin = floor(min([x1,x2,x3,x4])) + bbox.x_min = static_cast(std::floor(min_x_it->first)); + bbox.x_max = static_cast(std::ceil(max_x_it->first)); + bbox.y_min = static_cast(std::floor(min_y_it->second)); + bbox.y_max = static_cast(std::ceil(max_y_it->second)); + + // Calculate the image range (z-direction) using mosaicity parameter Δm + // The extent in φ depends on the geometry factor ζ = m₂ · e₁ + double zeta = rot_axis.dot(e1); + if (std::abs(zeta) > ZETA_TOLERANCE) { // Avoid division by zero + // Convert angular extents to rotation angles: φ′ = φᶜ ± Δm/ζ + double phi_plus = phi_c + delta_m / zeta; + double phi_minus = phi_c - delta_m / zeta; + + // Convert phi angles from radians to degrees before using scan parameters + double phi_plus_deg = phi_plus * 180.0 / M_PI; + double phi_minus_deg = phi_minus * 180.0 / M_PI; + + // Transform rotation angles to image numbers using scan parameters + double z_plus = + image_range_start - 1 + ((phi_plus_deg - osc_start) / osc_width); + double z_minus = + image_range_start - 1 + ((phi_minus_deg - osc_start) / osc_width); + + // Clamp to the actual image range and use floor/ceil for integer bounds + bbox.z_min = + std::max(image_range_start - 1, + static_cast(std::floor(std::min(z_plus, z_minus)))); + bbox.z_max = std::min( + image_range_end, static_cast(std::ceil(std::max(z_plus, z_minus)))); + } else { + // Handle degenerate case where reflection is parallel to rotation axis + // In this case, the reflection spans the entire image range + bbox.z_min = image_range_start; + bbox.z_max = image_range_end; + } + + extents.push_back(bbox); + } + + return extents; +} diff --git a/integrator/extent.hpp b/integrator/extent.hpp new file mode 100644 index 00000000..6b86f23c --- /dev/null +++ b/integrator/extent.hpp @@ -0,0 +1,79 @@ +/** + * @file extent.hpp + * @brief Header for extent and bounding box algorithms + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +// Define a 2D mdspan type alias for double precision +using mdspan_2d_double = + std::experimental::mdspan>; + +/** + * @brief Structure to hold pixel coordinate extents for reflection bounding + * boxes. + * + * Contains the minimum and maximum bounds in detector pixel coordinates (x, y) + * and image numbers (z) that define the region of interest around each + * reflection for integration. + */ +struct BoundingBoxExtents { + // x and y may need to be a double for extent calculations + // signed as some values are negative, but this should be clamped later + int x_min, x_max; ///< Detector x-pixel range (fast axis) + int y_min, y_max; ///< Detector y-pixel range (slow axis) + int z_min, z_max; ///< Image number range (rotation axis) +}; + +/** + * @brief Compute bounding box extents for reflection integration using the + * Kabsch coordinate system. + * + * 1. Calculates angular divergence parameters Δb and Δm + * 2. Projects these divergences onto the Kabsch coordinate system to find + * the corners of the integration region in reciprocal space + * 3. Transforms these reciprocal space coordinates back to detector pixel + * coordinates and image numbers to define practical bounding boxes + * + * The method accounts for the non-orthonormal nature of the Kabsch basis + * and ensures that the bounding boxes encompass the full extent of each + * reflection's diffraction profile. + * + * @param s0 Incident beam vector (s₀), units of 1/Å + * @param rot_axis Unit goniometer rotation axis vector (m₂) + * @param s1_vectors Matrix of predicted s₁ vectors for all reflections, + * shape (num_reflections, 3) + * @param phi_positions Matrix containing reflection positions, where the third + * column contains φᶜ values in radians + * @param num_reflections Number of reflections to process + * @param sigma_b Beam divergence standard deviation in RADIANS (σb) + * @param sigma_m Mosaicity standard deviation in RADIANS (σm) + * @param panel Detector panel object for coordinate transformations + * @param scan Scan object containing oscillation and image range information + * @param beam Beam object for wavelength and other beam properties + * @param n_sigma Number of standard deviations to include in the bounding box + * (default: 3.0) + * @param sigma_b_multiplier Additional multiplier for beam divergence + * (default: 2.0, called 'm' in DIALS) + * @return Vector of BoundingBoxExtents structures, one per reflection + */ +std::vector compute_kabsch_bounding_boxes( + const Eigen::Vector3d &s0, + const Eigen::Vector3d &rot_axis, + const mdspan_2d_double &s1_vectors, + const mdspan_2d_double &phi_positions, + const size_t num_reflections, + const double sigma_b, + const double sigma_m, + const Panel &panel, + const Scan &scan, + const MonochromaticBeam &beam, + const double n_sigma = 3.0, + const double sigma_b_multiplier = 2.0); diff --git a/integrator/integrator.cc b/integrator/integrator.cc index 4723c039..d81ae4e6 100644 --- a/integrator/integrator.cc +++ b/integrator/integrator.cc @@ -1,13 +1,20 @@ /** * @file integrator.cc + * @brief Main application file for accelerated integration processing + * containing the necessary data loading, argument parsing, + * threading, data preparation, and GPU kernel invocation in + * order to perform GPU-accelerated integration processing. */ +#include "integrator.cuh" + #include #include #include #include #include +#include #include #include #include @@ -27,9 +34,10 @@ #include "common.hpp" #include "cuda_arg_parser.hpp" #include "cuda_common.hpp" +#include "extent.hpp" #include "ffs_logger.hpp" #include "h5read.h" -#include "math/device_precision.cuh" +#include "kabsch.cuh" #include "math/math_utils.cuh" #include "math/vector3d.cuh" #include "predict.cc" @@ -42,6 +50,26 @@ using namespace std::chrono_literals; using mdspan_2d = std::experimental::mdspan>; +// Conversion helpers for Eigen to CUDA vector types +/** + * @brief Convert Eigen::Vector3d to fastvec::Vector3D + */ +inline fastvec::Vector3D to_vector3d(const Eigen::Vector3d &v) { + return fastvec::make_vector3d(static_cast(v.x()), + static_cast(v.y()), + static_cast(v.z())); +} + +/** + * @brief Convert mdspan row to fastvec::Vector3D + */ +template +inline fastvec::Vector3D to_vector3d(const MdspanType &mdspan, size_t row) { + return fastvec::make_vector3d(static_cast(mdspan(row, 0)), + static_cast(mdspan(row, 1)), + static_cast(mdspan(row, 2))); +} + // Global stop token for picking up user cancellation std::stop_source global_stop; @@ -343,28 +371,6 @@ int main(int argc, char **argv) { #pragma endregion Predict or extract predictions - // TODO: Improve this hacky conversion from double to scalar_t (float) - std::vector s1_vectors_converted_data(s1_vectors.extent(0) * 3); - mdspan_type s1_vectors_converted( - s1_vectors_converted_data.data(), s1_vectors.extent(0), 3); - //size_t num_reflections = s1_vectors.extent(0); - - // Direct pointer access conversion loop -> compiler should optimize this - const double *src = s1_vectors.data_handle(); - scalar_t *dst = s1_vectors_converted_data.data(); - for (size_t i = 0; i < num_reflections * 3; ++i) { - dst[i] = static_cast(src[i]); - } - - std::vector phi_positions_converted_data(phi_column.extent(0)); - // mdspan_type phi_positions_converted(phi_positions_converted_data.data(), phi_column.extent(0)); - // Direct pointer access conversion loop -> compiler should optimize this - src = phi_column.data_handle(); - dst = phi_positions_converted_data.data(); - for (size_t i = 0; i < phi_column.extent(0); ++i) { - dst[i] = static_cast(src[i]); - } - #pragma region Image Reading and Threading // Now set up for multi-threaded image reading and processing logger.info("Setting up image reading and threading"); @@ -379,67 +385,67 @@ int main(int argc, char **argv) { // Call the CPU-based extent function // Note: compute_kabsch_bounding_boxes expects double precision mdspan, // so we pass the original double precision s1_vectors and phi_column - // std::vector computed_bboxes = - // compute_kabsch_bounding_boxes(s0, - // rotation_axis, - // s1_vectors, - // phi_column, - // num_reflections, - // sigma_b, - // sigma_m, - // panel, - // scan, - // beam); + std::vector computed_bboxes = + compute_kabsch_bounding_boxes(s0, + rotation_axis, + s1_vectors, + phi_column, + num_reflections, + sigma_b, + sigma_m, + panel, + scan, + beam); logger.info("Bounding box computation completed"); // Convert BoundingBoxExtents to flat array format for storage - // std::vector computed_bbox_data(num_reflections * 6); - // for (size_t i = 0; i < num_reflections; ++i) { - // const int step = 6 * i; - // computed_bbox_data[step + 0] = computed_bboxes[i].x_min; - // computed_bbox_data[step + 1] = computed_bboxes[i].x_max; - // computed_bbox_data[step + 2] = computed_bboxes[i].y_min; - // computed_bbox_data[step + 3] = computed_bboxes[i].y_max; - // computed_bbox_data[step + 4] = static_cast(computed_bboxes[i].z_min); - // computed_bbox_data[step + 5] = static_cast(computed_bboxes[i].z_max); - // } + std::vector computed_bbox_data(num_reflections * 6); + for (size_t i = 0; i < num_reflections; ++i) { + const int step = 6 * i; + computed_bbox_data[step + 0] = computed_bboxes[i].x_min; + computed_bbox_data[step + 1] = computed_bboxes[i].x_max; + computed_bbox_data[step + 2] = computed_bboxes[i].y_min; + computed_bbox_data[step + 3] = computed_bboxes[i].y_max; + computed_bbox_data[step + 4] = static_cast(computed_bboxes[i].z_min); + computed_bbox_data[step + 5] = static_cast(computed_bboxes[i].z_max); + } // Map reflections by z layer (image number) - // logger.info("Mapping reflections by image number (z layer)"); - // std::unordered_map> reflections_by_image; + logger.info("Mapping reflections by image number (z layer)"); + std::unordered_map> reflections_by_image; - // for (size_t refl_id = 0; refl_id < num_reflections; ++refl_id) { - // const auto &bbox = computed_bboxes[refl_id]; + for (size_t refl_id = 0; refl_id < num_reflections; ++refl_id) { + const auto &bbox = computed_bboxes[refl_id]; - // // Add this reflection to all images it spans - // for (int z = bbox.z_min; z <= bbox.z_max; ++z) { - // reflections_by_image[z].push_back(refl_id); - // } - // } + // Add this reflection to all images it spans + for (int z = bbox.z_min; z <= bbox.z_max; ++z) { + reflections_by_image[z].push_back(refl_id); + } + } - // logger.info("Reflections mapped across {} unique images", - // reflections_by_image.size()); + logger.info("Reflections mapped across {} unique images", + reflections_by_image.size()); // Log some statistics about the mapping - // if (!reflections_by_image.empty()) { - // size_t min_refls_per_image = std::numeric_limits::max(); - // size_t max_refls_per_image = 0; - // size_t total_refls = 0; - - // for (const auto &[image, refls] : reflections_by_image) { - // min_refls_per_image = std::min(min_refls_per_image, refls.size()); - // max_refls_per_image = std::max(max_refls_per_image, refls.size()); - // total_refls += refls.size(); - // } - - // double avg_refls_per_image = - // static_cast(total_refls) / reflections_by_image.size(); - // logger.info("Reflections per image: min={}, max={}, avg={:.1f}", - // min_refls_per_image, - // max_refls_per_image, - // avg_refls_per_image); - // } + if (!reflections_by_image.empty()) { + size_t min_refls_per_image = std::numeric_limits::max(); + size_t max_refls_per_image = 0; + size_t total_refls = 0; + + for (const auto &[image, refls] : reflections_by_image) { + min_refls_per_image = std::min(min_refls_per_image, refls.size()); + max_refls_per_image = std::max(max_refls_per_image, refls.size()); + total_refls += refls.size(); + } + + double avg_refls_per_image = + static_cast(total_refls) / reflections_by_image.size(); + logger.info("Reflections per image: min={}, max={}, avg={:.1f}", + min_refls_per_image, + max_refls_per_image, + avg_refls_per_image); + } // Get threading parameters uint32_t num_cpu_threads = parser.get("threads"); @@ -480,17 +486,89 @@ int main(int argc, char **argv) { double time_waiting_for_images = 0.0; +#pragma region Prep GPU Data Buffers + // Get detector d_matrix and flatten for GPU + Eigen::Matrix3d d_matrix_eigen = panel.get_d_matrix(); + std::vector d_matrix_flat(9); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + d_matrix_flat[i * 3 + j] = static_cast(d_matrix_eigen(i, j)); + } + } + + // Convert s1_vectors to Vector3D array for GPU + std::vector s1_vectors_vec(num_reflections); + for (size_t i = 0; i < num_reflections; ++i) { + s1_vectors_vec[i] = to_vector3d(s1_vectors, i); + } + + // phi_positions_converted_data already contains phi values (column index 2 of xyzcal.mm) + // Need to extract just the phi component (3rd column) + std::vector phi_values_vec(num_reflections); + for (size_t i = 0; i < num_reflections; ++i) { + phi_values_vec[i] = static_cast(phi_column(i, 2)); + } + + // Allocate and copy GPU buffers (shared across all threads) + DeviceBuffer d_d_matrix(9); + DeviceBuffer d_s1_vectors(num_reflections); + DeviceBuffer d_phi_values(num_reflections); + DeviceBuffer d_bboxes(num_reflections); + + d_d_matrix.assign(d_matrix_flat.data()); + d_s1_vectors.assign(s1_vectors_vec.data()); + d_phi_values.assign(phi_values_vec.data()); + d_bboxes.assign(computed_bboxes.data()); + + // Convert beam parameters to Vector3D + fastvec::Vector3D s0_vec = to_vector3d(s0); + fastvec::Vector3D rot_axis_vec = to_vector3d(rotation_axis); + scalar_t wavelength = static_cast(wl); + scalar_t osc_start_scalar = static_cast(osc_start); + scalar_t osc_width_scalar = static_cast(osc_width); + + // Summation integration parameters + // delta_b and delta_m define the foreground ellipsoid extent in Kabsch space + constexpr scalar_t n_sigma = 3.0; // Default number of sigma for foreground cutoff + const scalar_t delta_b = n_sigma * static_cast(sigma_b); + const scalar_t delta_m = n_sigma * static_cast(sigma_m); + logger.info("Summation integration: delta_b={:.6f}, delta_m={:.6f} (n_sigma={})", + delta_b, + delta_m, + n_sigma); + + // Allocate accumulator buffers for summation integration (shared across all threads) + // These persist across all images and accumulate atomically + DeviceBuffer d_foreground_sum(num_reflections); + DeviceBuffer d_foreground_count(num_reflections); + DeviceBuffer d_background_sum(num_reflections); + DeviceBuffer d_background_count(num_reflections); + + // Zero-initialize the accumulator buffers + cudaMemset(d_foreground_sum.data(), 0, num_reflections * sizeof(accumulator_t)); + cudaMemset(d_foreground_count.data(), 0, num_reflections * sizeof(uint32_t)); + cudaMemset(d_background_sum.data(), 0, num_reflections * sizeof(accumulator_t)); + cudaMemset(d_background_count.data(), 0, num_reflections * sizeof(uint32_t)); + cuda_throw_error(); +#pragma endregion Prep GPU Data Buffers + +#pragma region Thread launch + logger.info("Starting image reading and processing threads"); // Spawn the reader threads std::vector threads; for (int thread_id = 0; thread_id < num_cpu_threads; ++thread_id) { threads.emplace_back([&, thread_id]() { auto stop_token = global_stop.get_token(); + CudaStream stream; // Per-thread CUDA stream - // Full image buffers for decompression - auto decompressed_image = make_cuda_pinned_malloc(width * height); + // Full image buffers for decompression (pinned memory for faster transfer) + auto host_image = make_cuda_pinned_malloc(width * height); auto raw_chunk_buffer = std::vector(width * height * sizeof(pixel_t)); + // Device memory for GPU processing + auto device_image = PitchedMalloc(width, height); + // Let all threads do setup tasks before reading starts cpu_sync.arrive_and_wait(); @@ -503,11 +581,11 @@ int main(int argc, char **argv) { } // Check if this image has any reflections - // if (reflections_by_image.find(image_num) - // == reflections_by_image.end()) { - // completed_images += 1; - // continue; - // } + if (reflections_by_image.find(image_num) + == reflections_by_image.end()) { + completed_images += 1; + continue; + } { std::scoped_lock lock(reader_mutex); @@ -561,11 +639,11 @@ int main(int argc, char **argv) { break; } - // Decompress the data + // Decompress the data into pinned host memory switch (reader.get_raw_chunk_compression()) { case Reader::ChunkCompression::BITSHUFFLE_LZ4: bshuf_decompress_lz4(buffer.data() + 12, - decompressed_image.get(), + host_image.get(), width * height, sizeof(pixel_t), 0); @@ -573,14 +651,57 @@ int main(int argc, char **argv) { case Reader::ChunkCompression::BYTE_OFFSET_32: decompress_byte_offset( buffer, - {decompressed_image.get(), + {host_image.get(), static_cast::size_type>(width * height)}); break; } - // TODO: image processing here - // decompressed_image.get() contains the decompressed pixel data (width * height pixels) - // reflections_by_image[image_num] contains the reflection IDs for this image + // Copy image data to device memory + cudaMemcpy2DAsync(device_image.get(), + device_image.pitch_bytes(), + host_image.get(), + width * sizeof(pixel_t), + width * sizeof(pixel_t), + height, + cudaMemcpyHostToDevice, + stream); + cuda_throw_error(); + + // Get reflection indices for this image and copy to device + const auto &refl_indices = reflections_by_image[image_num]; + size_t num_refls_this_image = refl_indices.size(); + + // Allocate per-image device buffer for reflection indices + DeviceBuffer d_reflection_indices(num_refls_this_image); + d_reflection_indices.assign(refl_indices.data()); + + // Launch Kabsch transform kernel for this image + // This computes Kabsch coordinates and atomically accumulates + // foreground/background intensities for summation integration + compute_kabsch_transform(device_image.get(), + device_image.pitch_bytes(), + width, + height, + image_num, + d_d_matrix.data(), + wavelength, + osc_start_scalar, + osc_width_scalar, + image_range_start, + s0_vec, + rot_axis_vec, + d_s1_vectors.data(), + d_phi_values.data(), + d_bboxes.data(), + d_reflection_indices.data(), + num_refls_this_image, + delta_b, + delta_m, + d_foreground_sum.data(), + d_foreground_count.data(), + d_background_sum.data(), + d_background_count.data(), + stream); logger.trace("Thread {} loaded image {}", thread_id, image_num); completed_images += 1; @@ -610,8 +731,457 @@ int main(int argc, char **argv) { logger.info("Total time waiting for images: {:.2f} s", time_waiting_for_images); } +#pragma region Summation Integration Finalization + // Host-side reduction and finalization + // Copy accumulator buffers back from GPU and compute final intensities + logger.info("Finalizing summation integration for {} reflections", num_reflections); + + // Allocate host buffers for results + std::vector h_foreground_sum(num_reflections); + std::vector h_foreground_count(num_reflections); + std::vector h_background_sum(num_reflections); + std::vector h_background_count(num_reflections); + + // Copy results from device to host + d_foreground_sum.extract(h_foreground_sum.data()); + d_foreground_count.extract(h_foreground_count.data()); + d_background_sum.extract(h_background_sum.data()); + d_background_count.extract(h_background_count.data()); + + // Compute final intensities for each reflection + std::vector intensities(num_reflections); + std::vector variances(num_reflections); + std::vector sigmas(num_reflections); // σ(I) = √Var(I) + std::vector backgrounds(num_reflections); // b̄ (background mean per pixel) + std::vector background_sigmas(num_reflections); // σ(b̄) + size_t valid_reflections = 0; + + for (size_t i = 0; i < num_reflections; ++i) { + uint32_t fg_count = h_foreground_count[i]; + uint32_t bg_count = h_background_count[i]; + + if (fg_count == 0) { + // No foreground pixels - reflection not measured + intensities[i] = 0.0; + variances[i] = -1.0; // Flag as invalid + sigmas[i] = -1.0; + backgrounds[i] = 0.0; + background_sigmas[i] = -1.0; + continue; + } + + double fg_sum = h_foreground_sum[i]; + + // Background mean b̄ (simple mean for now + double bg_mean = (bg_count > 0) ? h_background_sum[i] / bg_count : 0.0; + + // Subtract background: I = Σcᵢ(fg) − n_fg · b̄ + double background_total = bg_mean * fg_count; + double intensity = fg_sum - background_total; + + // Variance estimation + // + // I = Σcᵢ(fg) - B, where B = n_fg · b̄ (total background under foreground) + // + // Var(I) = |I| + |B| · (1 + n_fg / n_bg) + // + // The |I| term is the Poisson variance of the net signal. + // The |B| term accounts for the Poisson variance of the background, + // with the (1 + n_fg/n_bg) factor propagating the uncertainty in + // the background estimate from n_bg pixels to n_fg pixels. + // + double fg_bg_ratio = + (bg_count > 0) ? static_cast(fg_count) / bg_count : 0.0; + double variance = + std::abs(intensity) + std::abs(background_total) * (1.0 + fg_bg_ratio); + double bg_variance = std::abs(background_total) * (1.0 + fg_bg_ratio); + + intensities[i] = intensity; + variances[i] = variance; + sigmas[i] = (variance > 0.0) ? std::sqrt(variance) : 0.0; + backgrounds[i] = bg_mean; + background_sigmas[i] = (bg_variance > 0.0) ? std::sqrt(bg_variance) : 0.0; + valid_reflections++; + } + + logger.info("Summation integration complete: {} valid reflections out of {}", + valid_reflections, + num_reflections); + + // Log some statistics + if (valid_reflections > 0) { + double sum_intensity = 0.0; + double sum_sigma = 0.0; + double sum_bg = 0.0; + double max_intensity = -std::numeric_limits::infinity(); + double min_intensity = std::numeric_limits::infinity(); + size_t n_positive_isigma = 0; + double sum_i_over_sigma = 0.0; + for (size_t i = 0; i < num_reflections; ++i) { + if (variances[i] >= 0) { + sum_intensity += intensities[i]; + sum_sigma += sigmas[i]; + sum_bg += backgrounds[i]; + max_intensity = std::max(max_intensity, intensities[i]); + min_intensity = std::min(min_intensity, intensities[i]); + if (sigmas[i] > 0.0) { + sum_i_over_sigma += intensities[i] / sigmas[i]; + n_positive_isigma++; + } + } + } + logger.info("Intensity statistics: min={:.1f}, max={:.1f}, mean={:.1f}", + min_intensity, + max_intensity, + sum_intensity / valid_reflections); + logger.info("Mean σ(I)={:.2f}, mean background={:.2f}", + sum_sigma / valid_reflections, + sum_bg / valid_reflections); + if (n_positive_isigma > 0) { + logger.info("Mean I/σ(I)={:.2f} ({} reflections with σ>0)", + sum_i_over_sigma / n_positive_isigma, + n_positive_isigma); + } + } + +#pragma endregion Summation Integration Finalization + #pragma endregion Image Reading and Threading return 0; } +#pragma Thread launch #pragma endregion Application Entry + +#pragma region Bbox computation +/* + // Compute new bounding boxes using Kabsch coordinate system + + logger.info("Computing new Kabsch bounding boxes for {} reflections", + num_reflections); + + // Create output array for bounding boxes (6 values per reflection) + std::vector computed_bbox_data(num_reflections * 6); + + // TODO: Make these conversions better + + // Convert s1_vectors from double to scalar_t (float) format + std::vector s1_vectors_converted(num_reflections); + for (size_t i = 0; i < num_reflections; ++i) { + s1_vectors_converted[i] = + fastvec::make_vector3d(static_cast(s1_vectors(i, 0)), + static_cast(s1_vectors(i, 1)), + static_cast(s1_vectors(i, 2))); + } + + // Convert phi values from double to scalar_t + std::vector phi_values_converted(num_reflections); + for (size_t i = 0; i < num_reflections; ++i) { + phi_values_converted[i] = + static_cast(phi_column(i, 2)); // Extract phi (z-component) + } + + // Extract detector parameters (assuming you add accessors to Panel class) + DetectorParameters detector_params; + auto pixel_size_array = panel.get_pixel_size(); + detector_params.pixel_size[0] = static_cast(pixel_size_array[0]); + detector_params.pixel_size[1] = static_cast(pixel_size_array[1]); + + // TODO: Add these accessors to Panel class + detector_params.parallax_correction = panel.has_parallax_correction(); + detector_params.mu = static_cast(panel.get_mu()); + detector_params.thickness = static_cast(panel.get_thickness()); + + // Convert geometry vectors to CUDA format + auto fast_axis_eigen = panel.get_fast_axis(); + auto slow_axis_eigen = panel.get_slow_axis(); + auto origin_eigen = panel.get_origin(); + + detector_params.fast_axis = + fastvec::make_vector3d(static_cast(fast_axis_eigen.x()), + static_cast(fast_axis_eigen.y()), + static_cast(fast_axis_eigen.z())); + detector_params.slow_axis = + fastvec::make_vector3d(static_cast(slow_axis_eigen.x()), + static_cast(slow_axis_eigen.y()), + static_cast(slow_axis_eigen.z())); + detector_params.origin = + fastvec::make_vector3d(static_cast(origin_eigen.x()), + static_cast(origin_eigen.y()), + static_cast(origin_eigen.z())); + + // Get D-matrix inverse + auto d_matrix_inv = panel.get_d_matrix().inverse(); + std::vector d_matrix_inv_flat(9); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + d_matrix_inv_flat[i * 3 + j] = static_cast(d_matrix_inv(i, j)); + } + } + + // Compute new bounding boxes using CUDA with proper detector parameters + compute_bbox_extent( + s1_vectors_converted.data(), + phi_values_converted.data(), + fastvec::make_vector3d(s0.x(), s0.y(), s0.z()), + fastvec::make_vector3d(rotation_axis.x(), rotation_axis.y(), rotation_axis.z()), + sigma_b, + sigma_m, + static_cast(osc_start), + static_cast(osc_width), + image_range_start, + scan.get_image_range()[1], // image_range_end + static_cast(beam.get_wavelength()), + d_matrix_inv_flat.data(), + detector_params.pixel_size, + detector_params.parallax_correction, + detector_params.mu, + detector_params.thickness, + detector_params.fast_axis, + detector_params.slow_axis, + detector_params.origin, + computed_bbox_data.data(), + num_reflections); + + // Convert to reflection table format for comparison + // Data is already in the correct flat format from compute_bbox_extent + logger.info("Bounding box computation completed"); + + // Compare with existing bounding boxes + logger.info("Comparing computed bounding boxes with existing bbox column"); + logger.trace("First 5 bounding box comparisons:"); + for (size_t i = 0; i < std::min(5, num_reflections); ++i) { + // Existing bbox + double ex_x_min = bbox_column(i, 0); + double ex_x_max = bbox_column(i, 1); + double ex_y_min = bbox_column(i, 2); + double ex_y_max = bbox_column(i, 3); + int ex_z_min = static_cast(bbox_column(i, 4)); + int ex_z_max = static_cast(bbox_column(i, 5)); + + // Computed bbox (from flat array) + double comp_x_min = computed_bbox_data[i * 6 + 0]; + double comp_x_max = computed_bbox_data[i * 6 + 1]; + double comp_y_min = computed_bbox_data[i * 6 + 2]; + double comp_y_max = computed_bbox_data[i * 6 + 3]; + int comp_z_min = static_cast(computed_bbox_data[i * 6 + 4]); + int comp_z_max = static_cast(computed_bbox_data[i * 6 + 5]); + + logger.trace("bbox[{}]: existing x=[{:.1f},{:.1f}] y=[{:.1f},{:.1f}] z=[{},{}]", + i, + ex_x_min, + ex_x_max, + ex_y_min, + ex_y_max, + ex_z_min, + ex_z_max); + logger.trace("bbox[{}]: computed x=[{:.1f},{:.1f}] y=[{:.1f},{:.1f}] z=[{},{}]", + i, + comp_x_min, + comp_x_max, + comp_y_min, + comp_y_max, + comp_z_min, + comp_z_max); + } + +#pragma endregion Bbox computation +#pragma region Kabsch tf + + logger.info( + "Computing Kabsch coordinates for voxel centers within existing bounding boxes"); + + std::vector voxel_kabsch_coords; // ε₁, ε₂, ε₃ for each voxel center + std::vector voxel_reflection_ids; // Which reflection each voxel belongs to + std::vector voxel_positions; // x, y, z positions for each voxel center + std::vector voxel_s1_lengths; // |s₁| for each voxel center + + // Convert global vectors to CUDA format once + fastvec::Vector3D s0_cuda = fastvec::make_vector3d(s0.x(), s0.y(), s0.z()); + fastvec::Vector3D rotation_axis_cuda = + fastvec::make_vector3d(rotation_axis.x(), rotation_axis.y(), rotation_axis.z()); + + // Process each reflection's existing bounding box + for (size_t refl_id = 0; refl_id < num_reflections; ++refl_id) { + // Extract bounding box from existing column (format: x_min, x_max, y_min, y_max, z_min, z_max) + double x_min = bbox_column(refl_id, 0); + double x_max = bbox_column(refl_id, 1); + double y_min = bbox_column(refl_id, 2); + double y_max = bbox_column(refl_id, 3); + int z_min = static_cast(bbox_column(refl_id, 4)); + int z_max = static_cast(bbox_column(refl_id, 5)); + + // Debug: Calculate expected voxel count + int x_count = static_cast(x_max) - static_cast(x_min); + int y_count = static_cast(y_max) - static_cast(y_min); + int z_count = z_max - z_min; + int expected_voxels = x_count * y_count * z_count; + + logger.trace( + "Reflection {}: bbox x=[{:.1f},{:.1f}] y=[{:.1f},{:.1f}] z=[{},{}]", + refl_id, + x_min, + x_max, + y_min, + y_max, + z_min, + z_max); + logger.trace("Expected voxels: {} x {} x {} = {}", + x_count, + y_count, + z_count, + expected_voxels); + + // Get reflection centroid data + Eigen::Vector3d s1_c_eigen( + s1_vectors(refl_id, 0), s1_vectors(refl_id, 1), s1_vectors(refl_id, 2)); + fastvec::Vector3D s1_c_cuda = + fastvec::make_vector3d(s1_c_eigen.x(), s1_c_eigen.y(), s1_c_eigen.z()); + double phi_c = phi_column(refl_id, 2); + + logger.trace( + "Processing reflection {} with existing bbox x=[{:.1f},{:.1f}] " + "y=[{:.1f},{:.1f}] z=[{},{}]", + refl_id, + x_min, + x_max, + y_min, + y_max, + z_min, + z_max); + + // Collect all voxel data for this reflection for batch processing + std::vector batch_s_pixels; + std::vector batch_phi_pixels; + std::vector> batch_voxel_coords; + + // Iterate through all voxel centers in the bounding box + for (int z = z_min; z < z_max; ++z) { + double phi_pixel = + osc_start + (z - image_range_start + 1.5) * osc_width / 180.0 * M_PI; + + for (int y = static_cast(y_min); y < static_cast(y_max); ++y) { + for (int x = static_cast(x_min); x < static_cast(x_max); + ++x) { + // Use voxel center coordinates + // double voxel_x = x + 0.5; + // double voxel_y = y + 0.5; + double voxel_z = 1; //z + 0.5; + + std::array xy_mm = panel.px_to_mm( + static_cast(x) + 0.5, static_cast(y) + 0.5); + + double voxel_x = xy_mm[0]; + double voxel_y = xy_mm[1]; + + // Convert detector coordinates to lab coordinate using panel geometry + Vector3d lab_coord = + panel.get_d_matrix() * Vector3d(voxel_x, voxel_y, 1.0); + + // logger.trace( + // "Voxel center ({}, {}, {}): lab_coord = ({:.6f}, {:.6f}, {:.6f})", + // voxel_x, + // voxel_y, + // voxel_z, + // lab_coord.x(), + // lab_coord.y(), + // lab_coord.z()); + + // Convert lab coordinate to reciprocal space vector + Eigen::Vector3d s_pixel_eigen = lab_coord.normalized() / wl; + fastvec::Vector3D s_pixel_cuda = fastvec::make_vector3d( + s_pixel_eigen.x(), s_pixel_eigen.y(), s_pixel_eigen.z()); + + // Store for batch processing + batch_s_pixels.push_back(s_pixel_cuda); + batch_phi_pixels.push_back(static_cast(phi_pixel)); + batch_voxel_coords.emplace_back(voxel_x, voxel_y, voxel_z); + } + } + } + + // Process all voxels for this reflection with CUDA if we have any + if (!batch_s_pixels.empty()) { + size_t num_voxels = batch_s_pixels.size(); + + // Output arrays + std::vector batch_eps_results(num_voxels); + std::vector batch_s1_len_results(num_voxels); + + // Call CUDA function for voxel processing + compute_kabsch_transform(batch_s_pixels.data(), + batch_phi_pixels.data(), + s1_c_cuda, + static_cast(phi_c), + s0_cuda, + rotation_axis_cuda, + batch_eps_results.data(), + batch_s1_len_results.data(), + num_voxels); + + // Store results + for (size_t i = 0; i < num_voxels; ++i) { + const auto &[voxel_x, voxel_y, voxel_z] = batch_voxel_coords[i]; + const auto &eps = batch_eps_results[i]; + + voxel_kabsch_coords.insert(voxel_kabsch_coords.end(), + {eps.x, eps.y, eps.z}); + voxel_reflection_ids.push_back(static_cast(refl_id)); + voxel_positions.insert(voxel_positions.end(), + {voxel_x, voxel_y, voxel_z}); + voxel_s1_lengths.push_back( + static_cast(batch_s1_len_results[i])); + } + } + } + + size_t actual_voxels = voxel_reflection_ids.size(); + logger.info("Processed {} voxel centers, computed {} Kabsch coordinates", + actual_voxels, + voxel_kabsch_coords.size() / 3); + + // Add debugging information + logger.info( + "Vector sizes: kabsch_coords={}, reflection_ids={}, positions={}, s1_lengths={}", + voxel_kabsch_coords.size(), + voxel_reflection_ids.size(), + voxel_positions.size(), + voxel_s1_lengths.size()); + + logger.info( + "Expected sizes: kabsch_coords={}, reflection_ids={}, positions={}, " + "s1_lengths={}", + actual_voxels * 3, + actual_voxels, + actual_voxels * 3, + actual_voxels); + +#pragma endregion Kabsch tf +#pragma region Application Output + + // Add computed bounding boxes to reflection table for comparison + // computed_bbox_data is already std::vector + reflections.add_column("computed_bbox", + std::vector{num_reflections, 6}, + computed_bbox_data); + + // Create voxel data table + ReflectionTable voxel_table; + voxel_table.add_column( + "kabsch_coordinates", std::vector{actual_voxels, 3}, voxel_kabsch_coords); + voxel_table.add_column( + "reflection_id", + std::vector{actual_voxels, 1}, + std::vector(voxel_reflection_ids.begin(), voxel_reflection_ids.end())); + voxel_table.add_column( + "pixel_coordinates", std::vector{actual_voxels, 3}, voxel_positions); + voxel_table.add_column( + "voxel_s1_length", std::vector{actual_voxels, 1}, voxel_s1_lengths); + + // Write output files + reflections.write("output_reflections.h5"); + voxel_table.write("voxel_kabsch_data.h5"); + + logger.info("Results saved to output_reflections.h5 and voxel_kabsch_data.h5"); +*/ +#pragma endregion Application Output diff --git a/integrator/integrator.cu b/integrator/integrator.cu new file mode 100644 index 00000000..cbf4c4b0 --- /dev/null +++ b/integrator/integrator.cu @@ -0,0 +1,5 @@ +/** + * @file integrator.cu + */ + +#include "integrator.cuh" \ No newline at end of file diff --git a/integrator/integrator.cuh b/integrator/integrator.cuh new file mode 100644 index 00000000..8cd5f72e --- /dev/null +++ b/integrator/integrator.cuh @@ -0,0 +1,7 @@ +/** + * @file integrator.cuh + */ + +#pragma once + +// Nothing yet \ No newline at end of file diff --git a/integrator/kabsch.cu b/integrator/kabsch.cu new file mode 100644 index 00000000..493b62eb --- /dev/null +++ b/integrator/kabsch.cu @@ -0,0 +1,601 @@ +/** + * @file kabsch.cu + * @brief CUDA implementation of Kabsch coordinate transformations for + * integration + * + * This file implements GPU-accelerated coordinate transformations used + * in the Kabsch algorithm for data processing. The + * Kabsch transformation converts pixel coordinates from reciprocal + * space into a geometry-invariant local coordinate system, enabling + * efficient reflection profile integration and summation. + * + * The coordinate system is defined by three basis vectors: + * - e₁: Perpendicular to the scattering plane (s₁ᶜ × s₀) + * - e₂: Within the scattering plane, orthogonal to e₁ (s₁ᶜ × e₁) + * - e₃: Bisects the incident and diffracted beam directions (s₁ᶜ + s₀) + * + * The resulting Kabsch coordinates (ε₁, ε₂, ε₃) represent: + * - ε₁: Displacement perpendicular to the scattering plane + * - ε₂: Displacement within the scattering plane (with rotation correction) + * - ε₃: Displacement along the rotation axis (scaled by geometry factor ζ) + * + * This implementation uses modern C++ RAII principles with DeviceBuffer + * for automatic GPU memory management and the custom Vector3D class for + * mathematical operations. All vector operations are optimized for both + * host and device execution contexts. + * + * @author Dimitrios Vlachos + * @date 2025 + * @see Vector3D for mathematical vector operations + * @see DeviceBuffer for GPU memory management + * + * References: + * - Kabsch, W. (2010). Acta Cryst. D66, 133–144 + * + * GOAL: Determine foreground/background pixels and atomically aggregate + * intensities per reflection within the kernel, avoiding the need to return + * all epsilon values (too much data). + * + * TODO: + * 2. Fix unit test + * - Remove deprecated kernel + * - Refactor unit test to utilise the new kernel and host wrapper + * + * 3. Dials-data? + */ + +#include + +#include + +#include "cuda_common.hpp" +#include "device_common.cuh" +#include "extent.hpp" +#include "h5read.h" +#include "integrator.cuh" +#include "kabsch.cuh" +#include "math/math_utils.cuh" + +using namespace fastvec; + +// Block dimensions for the Kabsch transform kernel. +// Used by both the kernel (shared memory sizing) and host wrapper (grid launch). +// +// Each block of KABSCH_BLOCK_X × KABSCH_BLOCK_Y threads maps 1:1 to corners. +// A pixel's voxel has 4 xy-corners: (px,py), (px+1,py), (px,py+1), (px+1,py+1). +// All 4 must be in the same block's shared memory so the pixel thread can read +// them after __syncthreads(). This means a block of 32×16 corners can only +// service (32-1)×(16-1) = 31×15 complete pixels — the rightmost column and +// bottom row of threads are "helper" corners that exist solely so the last +// interior pixel has a right/bottom neighbour to read. +// +// The grid is launched with a stride of (BLOCK-1) so adjacent blocks overlap +// by one column/row of corners. That single shared column/row is computed +// by both blocks (redundantly), which costs ~3-6% extra work but keeps +// everything in fast shared memory within a single kernel launch. +constexpr int KABSCH_BLOCK_X = 32; +constexpr int KABSCH_BLOCK_Y = 16; + +// Maximum number of reflections that can overlap a single block's footprint +constexpr size_t MAX_BLOCK_REFLECTIONS = 64; + +/** + * @brief Check if a pixel is in the foreground region of a reflection + * + * Uses the ellipsoid condition in Kabsch space to classify pixels: + * ε₁²/δ_B² + ε₂²/δ_B² + ε₃²/δ_M² ≤ 1.0 + * + * @param eps Kabsch coordinates (ε₁, ε₂, ε₃) + * @param delta_b Foreground extent in e₁/e₂ directions (n_sigma × σ_D) + * @param delta_m Foreground extent in e₃ direction (n_sigma × σ_M) + * @return true if pixel is foreground, false if background + */ +__device__ __forceinline__ bool is_foreground(const Vector3D &eps, + scalar_t delta_b, + scalar_t delta_m) { + // Precompute inverse squares to avoid repeated division + // Division is more expensive than multiplication on GPU + // -> Verify in profiler how compiler optimises this PTX + scalar_t inv_delta_b_sq = scalar_t(1.0) / (delta_b * delta_b); + scalar_t inv_delta_m_sq = scalar_t(1.0) / (delta_m * delta_m); + + // Ellipsoid condition: ε₁²/δ_B² + ε₂²/δ_B² + ε₃²/δ_M² ≤ 1.0 + scalar_t ellipsoid_val = (eps.x * eps.x + eps.y * eps.y) * inv_delta_b_sq + + (eps.z * eps.z) * inv_delta_m_sq; + + return ellipsoid_val <= scalar_t(1.0); +} + +/** + * @brief Transform a pixel from reciprocal space into the local Kabsch + coordinate frame + * + * Given a predicted reflection centre and a pixel's position in + * reciprocal space, this function calculates the local Kabsch + * coordinates (ε₁, ε₂, ε₃), which represent displacements along a + * non-orthonormal basis defined by the scattering geometry. + * + * This is used to determine whether a pixel falls within the profile of + * a reflection in Kabsch space, which allows summation or profile + * integration to proceed in a geometry-invariant coordinate frame. + * + * @param s0 Incident beam vector (s₀), units of 1/Å + * @param s1_c Predicted diffracted vector at the reflection centre (s₁ᶜ), units of 1/Å + * @param phi_c Rotation angle at the reflection centre (φᶜ), in radians + * @param s_pixel Diffracted vector at the current pixel (S′), units of 1/Å + * @param phi_pixel Rotation angle at the pixel (φ′), in radians + * @param rot_axis Unit goniometer rotation axis vector (m₂) + * @param s1_len_out Output parameter for magnitude of s₁ᶜ (|s₁|) + * @return Transformed coordinates in Kabsch space (ε₁, ε₂, ε₃) + */ +__device__ Vector3D pixel_to_kabsch(const Vector3D &s0, + const Vector3D &s1_c, + scalar_t phi_c, + const Vector3D &s_pixel, + scalar_t phi_pixel, + const Vector3D &rot_axis, + scalar_t &s1_len_out) { + // Define the local Kabsch basis vectors: + + // e1 is perpendicular to the scattering plane + Vector3D e1 = normalized(cross(s1_c, s0)); + + // e2 lies within the scattering plane, orthogonal to e1 + Vector3D e2 = normalized(cross(s1_c, e1)); + + // e3 bisects the angle between s0 and s1 + Vector3D e3 = normalized(s1_c + s0); + + // Compute the length of the predicted diffracted vector (|s₁|) + scalar_t s1_len = norm(s1_c); + s1_len_out = s1_len; + + // Rotation offset between the pixel and reflection centre + scalar_t dphi = phi_pixel - phi_c; + + // Compute the predicted diffracted vector at φ′ + Vector3D s1_phi_prime = s1_c + e3 * dphi; + + // Difference vector between pixel's s′ and the φ′-adjusted centroid + Vector3D deltaS = s_pixel - s1_phi_prime; + + // ε₁: displacement along e1, normalised by |s₁| + scalar_t eps1 = dot(e1, deltaS) / s1_len; + + // ε₂: displacement along e2, normalised by |s₁| + scalar_t eps2 = dot(e2, deltaS) / s1_len; + + // ε₃: displacement along rotation axis, scaled by ζ = m₂ · e₁ + scalar_t zeta = dot(rot_axis, e1); + scalar_t eps3 = zeta * dphi; + + return make_vector3d(eps1, eps2, eps3); +} + +/** + * @brief CUDA kernel with 8-corner voxel foreground classification + * + * Each thread represents a corner in the (width+1)×(height+1) corner grid. + * Blocks use overlapping tiles: KABSCH_BLOCK_X × KABSCH_BLOCK_Y threads + * cover that many corners, yielding (KABSCH_BLOCK_X-1) × (KABSCH_BLOCK_Y-1) + * complete pixels per block. Adjacent blocks share one row/column of corners. + * + * Algorithm: + * 1. Every thread computes Kabsch coordinates for its corner at two φ values + * (lower and upper z-faces of the voxel) and writes foreground flags to + * shared memory. + * 2. After sync, interior threads (representing complete pixels) check all + * 8 voxel corners. If ANY corner is foreground, the pixel is foreground; + * otherwise it's background. + * + * @param d_image Pointer to image data + * @param image_pitch Pitch of the image data in bytes + * @param width Image width in pixels + * @param height Image height in pixels + * @param image_num Current image number in the sequence + * @param d_matrix Detector coordinate transformation matrix (3x3) + * @param wavelength X-ray wavelength in Angstroms + * @param osc_start Starting oscillation angle in degrees + * @param osc_width Oscillation width per image in degrees + * @param image_range_start First image number in the range + * @param s0 Incident beam vector + * @param rot_axis Rotation axis vector + * @param d_s1_vectors Array of s1 vectors for each reflection + * @param d_phi_values Array of phi values for each reflection + * @param d_bboxes Array of bounding box structs for each reflection + * @param d_reflection_indices Indices of reflections touching this image + * @param num_reflections_this_image Number of reflections to process + * @param delta_b Foreground extent in e₁/e₂ directions (n_sigma × σ_D) + * @param delta_m Foreground extent in e₃ direction (n_sigma × σ_M) + * @param d_foreground_sum Output: accumulated foreground intensities per reflection + * @param d_foreground_count Output: foreground pixel counts per reflection + * @param d_background_sum Output: accumulated background intensities per reflection + * @param d_background_count Output: background pixel counts per reflection + */ +__global__ void kabsch_transform(pixel_t *d_image, + size_t image_pitch, + uint32_t width, + uint32_t height, + int image_num, + const scalar_t *d_matrix, + scalar_t wavelength, + scalar_t osc_start, + scalar_t osc_width, + int image_range_start, + Vector3D s0, + Vector3D rot_axis, + const Vector3D *d_s1_vectors, + const scalar_t *d_phi_values, + const BoundingBoxExtents *d_bboxes, + const size_t *d_reflection_indices, + size_t num_reflections_this_image, + // Summation integration parameters + scalar_t delta_b, + scalar_t delta_m, + // Output accumulators (atomically updated) + accumulator_t *d_foreground_sum, + uint32_t *d_foreground_count, + accumulator_t *d_background_sum, + uint32_t *d_background_count) { + // ── Shared Memory ────────────────────────────────────────────── + // Foreground status for each corner at three phi values: + // [0] = phi_low, [1] = phi_high, [2] = phi_c (centre-in-slice override) + __shared__ bool s_fg[3][KABSCH_BLOCK_Y][KABSCH_BLOCK_X]; + + // Block-level reflection list: indices into the global reflection arrays + __shared__ size_t s_block_refl[MAX_BLOCK_REFLECTIONS]; + __shared__ size_t s_num_block_refl; + + const int tx = threadIdx.x; // Thread-local x index (also indexes shared memory) + const int ty = threadIdx.y; // Thread-local y index + + // Global corner coordinates. + // The stride between block origins is (BLOCK-1), NOT BLOCK. Compare: + // Normal: gx = blockIdx.x * blockDim.x + tx (stride 32) + // Here: gx = blockIdx.x * (KABSCH_BLOCK_X - 1) + tx (stride 31) + // + // This means Block 0 covers corners 0-31, Block 1 covers 31-62, etc. + // Corner 31 is computed by BOTH blocks — that's the overlap. It ensures + // pixel 30 (in Block 0) can read its right-side corner (31) from shared + // memory, and pixel 31 (in Block 1) can read its left-side corner (31) + // from its own block's shared memory. + + const int gx = blockIdx.x * (KABSCH_BLOCK_X - 1) + tx; // Global x coordinate + const int gy = blockIdx.y * (KABSCH_BLOCK_Y - 1) + ty; // Global y coordinate + + // ── Per-Corner Setup ──────────────────────────────────────────── + // Two phi values define the z-extent of the voxel on this image + const scalar_t phi_low = degrees_to_radians( + osc_start + (static_cast(image_num - image_range_start) * osc_width)); + const scalar_t phi_high = degrees_to_radians( + osc_start + + (static_cast(image_num - image_range_start + 1) * osc_width)); + + // s_pixel depends only on detector position (gx, gy), not phi + const bool corner_valid = (gx >= 0) && (gx <= static_cast(width)) && (gy >= 0) + && (gy <= static_cast(height)); + + Vector3D s_pixel = make_vector3d(0, 0, 0); + if (corner_valid) { + Vector3D lab_coord = + make_vector3d(d_matrix[0] * gx + d_matrix[1] * gy + d_matrix[2], + d_matrix[3] * gx + d_matrix[4] * gy + d_matrix[5], + d_matrix[6] * gx + d_matrix[7] * gy + d_matrix[8]); + s_pixel = normalized(lab_coord) / wavelength; + } + + // ── Block Reflection Filter ───────────────────────────────────── + // Thread 0 scans the image-level reflection list and keeps only those + // whose bbox overlaps this block's corner footprint. + if (tx == 0 && ty == 0) { + s_num_block_refl = 0; + + const int bx_min = static_cast(blockIdx.x) * (KABSCH_BLOCK_X - 1); + const int bx_max = bx_min + KABSCH_BLOCK_X - 1; + const int by_min = static_cast(blockIdx.y) * (KABSCH_BLOCK_Y - 1); + const int by_max = by_min + KABSCH_BLOCK_Y - 1; + + for (size_t i = 0; i < num_reflections_this_image; ++i) { + const size_t refl_idx = d_reflection_indices[i]; + const BoundingBoxExtents &bbox = d_bboxes[refl_idx]; + + // Does the reflection's bbox overlap this block AND current image? + const bool overlaps = + (bx_max >= bbox.x_min && bx_min <= bbox.x_max) + && (by_max >= bbox.y_min && by_min <= bbox.y_max) + && (image_num >= bbox.z_min && image_num < bbox.z_max); + + if (overlaps && s_num_block_refl < MAX_BLOCK_REFLECTIONS) { + s_block_refl[s_num_block_refl++] = refl_idx; + } + } + } + + __syncthreads(); + + // Early exit if no reflections overlap this block + if (s_num_block_refl == 0) return; + // Pitched image accessor + size_t elem_pitch = image_pitch / sizeof(pixel_t); + PitchedArray2D image(d_image, &elem_pitch); + + // Does this thread represent a complete pixel? + // A pixel at (tx, ty) reads corners (tx, ty), (tx+1, ty), (tx, ty+1), + // (tx+1, ty+1) from shared memory. For tx+1 and ty+1 to be valid + // indices, we need tx < 31 and ty < 15. Threads on the right column + // (tx == 31) and bottom row (ty == 15) only contribute their corner + // during Phase 1 — they don't process a pixel in Phase 2. + const bool is_pixel_thread = (tx < KABSCH_BLOCK_X - 1) && (ty < KABSCH_BLOCK_Y - 1); + const bool pixel_in_image = is_pixel_thread && (gx < static_cast(width)) + && (gy < static_cast(height)); + + // ── Reflection Processing Loop ────────────────────────────────── + // Process each reflection: compute corner flags, sync, then accumulate pixels + for (size_t r = 0; r < s_num_block_refl; ++r) { + const size_t refl_idx = s_block_refl[r]; + const Vector3D &s1_c = d_s1_vectors[refl_idx]; + const scalar_t phi_c = d_phi_values[refl_idx]; + const BoundingBoxExtents &bbox = d_bboxes[refl_idx]; + + // Every thread writes its corner's foreground flag to shared memory. + // Per-corner bbox check avoids needless Kabsch computation. + const bool in_bbox = corner_valid && (gx >= bbox.x_min && gx <= bbox.x_max) + && (gy >= bbox.y_min && gy <= bbox.y_max); + + if (in_bbox) { + scalar_t s1_len; // unused here, required by pixel_to_kabsch + + // Lower z-face (phi_low) + Vector3D eps_lo = + pixel_to_kabsch(s0, s1_c, phi_c, s_pixel, phi_low, rot_axis, s1_len); + s_fg[0][ty][tx] = is_foreground(eps_lo, delta_b, delta_m); + + // Upper z-face (phi_high) + Vector3D eps_hi = + pixel_to_kabsch(s0, s1_c, phi_c, s_pixel, phi_high, rot_axis, s1_len); + s_fg[1][ty][tx] = is_foreground(eps_hi, delta_b, delta_m); + + // Centre z-slice: evaluate at phi_c where ε₃ = 0 + // This catches reflections entirely contained within the voxel's + // z-extent — the xy ellipsoid check still applies, so only pixels + // spatially close to the reflection centre are marked foreground. + const bool centre_in_z_slice = (phi_c >= phi_low && phi_c <= phi_high); + if (centre_in_z_slice) { + Vector3D eps_c = + pixel_to_kabsch(s0, s1_c, phi_c, s_pixel, phi_c, rot_axis, s1_len); + // eps_c.z should be ~0; check only ε₁²/δ_B² + ε₂²/δ_B² ≤ 1 + s_fg[2][ty][tx] = is_foreground(eps_c, delta_b, delta_m); + } else { + s_fg[2][ty][tx] = false; + } + } else { + s_fg[0][ty][tx] = false; + s_fg[1][ty][tx] = false; + s_fg[2][ty][tx] = false; + } + + __syncthreads(); + + // Interior threads check all 12 voxel corners (8 original + 4 at phi_c) + if (pixel_in_image) { + bool any_fg = false; +#pragma unroll + for (int z = 0; z < 3; ++z) { + // if any corner at this z-slice is foreground, the pixel is foreground + any_fg |= s_fg[z][ty][tx] | s_fg[z][ty][tx + 1] | s_fg[z][ty + 1][tx] + | s_fg[z][ty + 1][tx + 1]; + } + + // Only accumulate if the pixel centre is within the bbox + // (pixels outside the bbox are irrelevant to this reflection) + const bool px_in_bbox = (gx >= bbox.x_min && gx < bbox.x_max) + && (gy >= bbox.y_min && gy < bbox.y_max); + + if (px_in_bbox) { + const accumulator_t pixel_value = + static_cast(image(gx, gy)); + + if (any_fg) { + atomicAdd(&d_foreground_sum[refl_idx], pixel_value); + atomicAdd(&d_foreground_count[refl_idx], 1u); + } else { + atomicAdd(&d_background_sum[refl_idx], pixel_value); + atomicAdd(&d_background_count[refl_idx], 1u); + } + } + } + + __syncthreads(); // Barrier before next reflection overwrites s_fg + } +} + +/** + * @brief Host wrapper function for image-based Kabsch computation + * + * Launches the 8-corner Kabsch kernel using overlapping tiles. Each block + * of KABSCH_BLOCK_X × KABSCH_BLOCK_Y threads covers that many corners, + * processing (KABSCH_BLOCK_X-1) × (KABSCH_BLOCK_Y-1) complete pixels. + */ +void compute_kabsch_transform(pixel_t *d_image, + size_t image_pitch, + uint32_t width, + uint32_t height, + int image_num, + const scalar_t *d_matrix, + scalar_t wavelength, + scalar_t osc_start, + scalar_t osc_width, + int image_range_start, + Vector3D s0, + Vector3D rot_axis, + const Vector3D *d_s1_vectors, + const scalar_t *d_phi_values, + const BoundingBoxExtents *d_bboxes, + const size_t *d_reflection_indices, + size_t num_reflections_this_image, + scalar_t delta_b, + scalar_t delta_m, + accumulator_t *d_foreground_sum, + uint32_t *d_foreground_count, + accumulator_t *d_background_sum, + uint32_t *d_background_count, + cudaStream_t stream) { + // Configure kernel launch parameters. + // Every block is always the full 32×16 = 512 threads. + dim3 blockDim(KABSCH_BLOCK_X, KABSCH_BLOCK_Y); + + // Each block processes (BLOCK-1) complete pixels per dimension: + // 31 pixels in x, 15 pixels in y. + // So the number of blocks needed is ceil(image_pixels / pixels_per_block): + // gridDim.x = ceil(width / 31) + // gridDim.y = ceil(height / 15) + // Edge blocks may extend past the image boundary — those threads + // simply fail the bounds check in the kernel and write false / skip. + dim3 gridDim(ceil_div(width, static_cast(KABSCH_BLOCK_X - 1)), + ceil_div(height, static_cast(KABSCH_BLOCK_Y - 1))); + + // Launch the summation integration kernel + kabsch_transform<<>>(d_image, + image_pitch, + width, + height, + image_num, + d_matrix, + wavelength, + osc_start, + osc_width, + image_range_start, + s0, + rot_axis, + d_s1_vectors, + d_phi_values, + d_bboxes, + d_reflection_indices, + num_reflections_this_image, + delta_b, + delta_m, + d_foreground_sum, + d_foreground_count, + d_background_sum, + d_background_count); + + // Check for kernel launch errors + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + throw std::runtime_error( + fmt::format("Kernel launch failed: {}", cudaGetErrorString(err))); + } +} + +// DEPRECATED +/** + * @brief CUDA kernel to compute pixel-to-Kabsch transformations for + * multiple voxels + * + * This kernel transforms voxel coordinates from reciprocal space to + * Kabsch coordinates. + * Each thread processes one voxel, computing its Kabsch coordinates + * relative to a reflection center. + * + * @param s_pixels Array of s_pixel vectors (different for each voxel) + * @param phi_pixels Array of phi_pixel angles (different for each voxel) + * @param s1_c Reflection center s1 vector (same for all voxels in this batch) + * @param phi_c Reflection center phi angle (same for all voxels in this batch) + * @param s0 Initial scattering vector + * @param rot_axis Rotation axis vector + * @param eps_array Output array for Kabsch coordinates + * @param s1_len_array Output array for s1 lengths + * @param n Number of voxels to process + */ +__global__ void kabsch_transform_flat(const Vector3D *__restrict__ s_pixels, + const scalar_t *__restrict__ phi_pixels, + Vector3D s1_c, + scalar_t phi_c, + Vector3D s0, + Vector3D rot_axis, + Vector3D *eps_array, + scalar_t *s1_len_array, + size_t n) { + // Calculate global thread index + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= n) return; // Bounds check + + // Get input data for this voxel + Vector3D s_pixel = s_pixels[i]; + scalar_t phi_pixel = phi_pixels[i]; + + // Compute Kabsch transformation using the device function + scalar_t s1_len; + Vector3D eps = + pixel_to_kabsch(s0, s1_c, phi_c, s_pixel, phi_pixel, rot_axis, s1_len); + + // Store results + eps_array[i] = eps; + s1_len_array[i] = s1_len; +} + +/** + * @brief Host wrapper function for voxel Kabsch computation + * + * @param h_s_pixels Host array of s_pixel vectors (different for each voxel) + * @param h_phi_pixels Host array of phi_pixel angles (different for each voxel) + * @param s1_c Reflection center s1 vector (same for all voxels in this reflection) + * @param phi_c Reflection center phi angle (same for all voxels in this reflection) + * @param s0 Initial scattering vector + * @param rot_axis Rotation axis vector + * @param h_eps Host array to store output Kabsch coordinates + * @param h_s1_len Host array to store output s1 lengths + * @param n Number of voxels + */ +void compute_kabsch_transform(const Vector3D *h_s_pixels, + const scalar_t *h_phi_pixels, + Vector3D s1_c, + scalar_t phi_c, + Vector3D s0, + Vector3D rot_axis, + Vector3D *h_eps, + scalar_t *h_s1_len, + size_t n) { + // Create RAII device buffers + DeviceBuffer d_s_pixels(n); // Input s_pixel vectors + DeviceBuffer d_phi_pixels(n); // Input phi_pixel angles + DeviceBuffer d_eps(n); // Output Kabsch coordinates + DeviceBuffer d_s1_len(n); // Output s1 lengths + + // Copy input data from host to device + d_s_pixels.assign(h_s_pixels); + d_phi_pixels.assign(h_phi_pixels); + + // Configure kernel launch parameters + int threadsPerBlock = 256; + int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; + + // Launch kernel + kabsch_transform_flat<<>>(d_s_pixels.data(), + d_phi_pixels.data(), + s1_c, + phi_c, + s0, + rot_axis, + d_eps.data(), + d_s1_len.data(), + n); + + // Check for kernel launch errors + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + throw std::runtime_error( + fmt::format("Kernel launch failed: {}", cudaGetErrorString(err))); + } + + // Wait for kernel completion + err = cudaDeviceSynchronize(); + if (err != cudaSuccess) { + throw std::runtime_error( + fmt::format("Kernel execution failed: {}", cudaGetErrorString(err))); + } + + // Copy results back to host + d_eps.extract(h_eps); + d_s1_len.extract(h_s1_len); +} \ No newline at end of file diff --git a/integrator/kabsch.cuh b/integrator/kabsch.cuh new file mode 100644 index 00000000..3fa12f69 --- /dev/null +++ b/integrator/kabsch.cuh @@ -0,0 +1,113 @@ +/** + * @file kabsch.cuh + * @brief Header file for CUDA Kabsch coordinate transformation functions + * + * This header declares the interface for GPU-accelerated coordinate transformations + * used in the Kabsch algorithm for data processing. The functions + * convert pixel coordinates from reciprocal space into a geometry-invariant local + * coordinate system for efficient reflection profile integration. + * + * The Kabsch transformation creates a local coordinate system defined by: + * - e₁: Perpendicular to the scattering plane (s₁ᶜ × s₀) + * - e₂: Within the scattering plane, orthogonal to e₁ (s₁ᶜ × e₁) + * - e₃: Bisects the incident and diffracted beam directions (s₁ᶜ + s₀) + * + * @author Dimitrios Vlachos + * @date 2025 + * @see kabsch.cu for implementation details + * @see Vector3D for mathematical vector operations + * @see DeviceBuffer for GPU memory management + */ + +#pragma once + +#include + +#include + +#include "extent.hpp" +#include "h5read.h" +#include "math/device_precision.cuh" +#include "math/vector3d.cuh" + +/** + * @brief Host wrapper function for image-based Kabsch computation + * + * Launches a kernel over the entire image grid to compute Kabsch coordinates + * for pixels that fall within reflection bounding boxes, classifying each + * pixel as foreground or background and atomically accumulating intensities + * for summation integration. + * + * @param d_image Device pointer to image data (pitched allocation) + * @param image_pitch Pitch in bytes of the device image allocation + * @param width Image width in pixels + * @param height Image height in pixels + * @param image_num Current image/frame number + * @param d_matrix Detector matrix (3x3 flattened) for pixel to lab conversion + * @param wavelength Beam wavelength for s_pixel normalization + * @param osc_start Oscillation start angle (radians) + * @param osc_width Oscillation width per image (radians) + * @param image_range_start First image number in the scan + * @param s0 Incident beam vector + * @param rot_axis Rotation axis vector + * @param d_s1_vectors Device array of s1_c vectors for each reflection + * @param d_phi_values Device array of phi_c values for each reflection + * @param d_bboxes Device array of BoundingBoxExtents structs for each reflection + * @param d_reflection_indices Device array of reflection indices for this image + * @param num_reflections_this_image Number of reflections touching this image + * @param delta_b Foreground extent in e₁/e₂ directions (n_sigma × σ_D), radians + * @param delta_m Foreground extent in e₃ direction (n_sigma × σ_M), radians + * @param d_foreground_sum Device array to accumulate foreground intensities per reflection + * @param d_foreground_count Device array to count foreground pixels per reflection + * @param d_background_sum Device array to accumulate background intensities per reflection + * @param d_background_count Device array to count background pixels per reflection + * @param stream CUDA stream for async execution + */ +void compute_kabsch_transform(pixel_t *d_image, + size_t image_pitch, + uint32_t width, + uint32_t height, + int image_num, + const scalar_t *d_matrix, + scalar_t wavelength, + scalar_t osc_start, + scalar_t osc_width, + int image_range_start, + fastvec::Vector3D s0, + fastvec::Vector3D rot_axis, + const fastvec::Vector3D *d_s1_vectors, + const scalar_t *d_phi_values, + const BoundingBoxExtents *d_bboxes, + const size_t *d_reflection_indices, + size_t num_reflections_this_image, + scalar_t delta_b, + scalar_t delta_m, + accumulator_t *d_foreground_sum, + uint32_t *d_foreground_count, + accumulator_t *d_background_sum, + uint32_t *d_background_count, + cudaStream_t stream); + +// DEPRECATED +/** + * @brief Host wrapper function for voxel Kabsch computation + * + * @param h_s_pixels Host array of s_pixel vectors (different for each voxel) + * @param h_phi_pixels Host array of phi_pixel angles (different for each voxel) + * @param s1_c Reflection center s1 vector (same for all voxels in this reflection) + * @param phi_c Reflection center phi angle (same for all voxels in this reflection) + * @param s0 Initial scattering vector + * @param rot_axis Rotation axis vector + * @param h_eps Host array to store output Kabsch coordinates + * @param h_s1_len Host array to store output s1 lengths + * @param n Number of voxels + */ +void compute_kabsch_transform(const fastvec::Vector3D *h_s_pixels, + const scalar_t *h_phi_pixels, + fastvec::Vector3D s1_c, + scalar_t phi_c, + fastvec::Vector3D s0, + fastvec::Vector3D rot_axis, + fastvec::Vector3D *h_eps, + scalar_t *h_s1_len, + size_t n); \ No newline at end of file diff --git a/integrator/tests/CMakeLists.txt b/integrator/tests/CMakeLists.txt new file mode 100644 index 00000000..492ea5ed --- /dev/null +++ b/integrator/tests/CMakeLists.txt @@ -0,0 +1,31 @@ +include(GoogleTest) + +add_executable(test_extent test_extent.cc ../extent.cc) +target_include_directories(test_extent PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) +target_link_libraries(test_extent PRIVATE + ffs_common + dx2 + GTest::gtest_main + Eigen3::Eigen + spdlog::spdlog + nlohmann_json::nlohmann_json +) + +add_executable(test_kabsch test_kabsch.cc ../kabsch.cu) +set_source_files_properties(../kabsch.cu PROPERTIES LANGUAGE CUDA) +target_include_directories(test_kabsch PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..) +target_link_libraries(test_kabsch PRIVATE + ffs_common + dx2 + h5read + GTest::gtest_main + Eigen3::Eigen + spdlog::spdlog + nlohmann_json::nlohmann_json + CUDA::cudart + lodepng +) +set_property(TARGET test_kabsch PROPERTY CUDA_SEPARABLE_COMPILATION ON) + +gtest_discover_tests(test_extent PROPERTIES LABELS "integrator-tests") +gtest_discover_tests(test_kabsch PROPERTIES LABELS "integrator-tests") \ No newline at end of file diff --git a/integrator/tests/data/indexed.expt b/integrator/tests/data/indexed.expt new file mode 100644 index 00000000..d35ba516 --- /dev/null +++ b/integrator/tests/data/indexed.expt @@ -0,0 +1,716 @@ +{ + "__id__": "ExperimentList", + "experiment": [ + { + "__id__": "Experiment", + "identifier": "215394b2-07ff-52c4-2596-1bc0ddcc36dd", + "beam": 0, + "detector": 0, + "goniometer": 0, + "scan": 0, + "crystal": 0, + "imageset": 0 + } + ], + "history": [ + "2025-11-27T16:32:24Z|dials.import|v3.25.dev0", + "2025-11-27T16:33:00Z|dials.index|v3.25.dev0" + ], + "imageset": [ + { + "__id__": "ImageSequence", + "template": "/dls/science/groups/scisoft/DIALS/dials_data/thaumatin_i03_rotation/thau_2_1.nxs", + "single_file_indices": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99 + ], + "mask": null, + "gain": null, + "pedestal": null, + "dx": null, + "dy": null, + "params": { + "dynamic_shadowing": "Auto", + "multi_panel": false + } + } + ], + "beam": [ + { + "__id__": "monochromatic", + "direction": [ + -0.0034229667619783467, + -0.0, + 0.999994141632113 + ], + "wavelength": 0.9762458439949315, + "divergence": 0.0, + "sigma_divergence": 0.0, + "polarization_normal": [ + 0.0, + 1.0, + 0.0 + ], + "polarization_fraction": 0.999, + "flux": 0.0, + "transmission": 1.0, + "probe": "x-ray", + "sample_to_source_distance": 0.0 + } + ], + "detector": [ + { + "panels": [ + { + "name": "/entry/instrument/detector/module", + "type": "SENSOR_PAD", + "fast_axis": [ + 0.9999343565703017, + 0.008972928699089156, + 0.007125243918478585 + ], + "slow_axis": [ + 0.009011312015482328, + -0.9999449607587793, + -0.005373240073119384 + ], + "origin": [ + -154.36992588196173, + 164.3947680648576, + -198.15267891180412 + ], + "raw_image_offset": [ + 0, + 0 + ], + "image_size": [ + 4148, + 4362 + ], + "pixel_size": [ + 0.075, + 0.075 + ], + "trusted_range": [ + 0.0, + 46051.0 + ], + "thickness": 0.45000000000000007, + "material": "Si", + "mu": 3.9219876752936167, + "identifier": "", + "mask": [], + "gain": 1.0, + "pedestal": 0.0, + "px_mm_strategy": { + "type": "ParallaxCorrectedPxMmStrategy" + } + } + ], + "hierarchy": { + "name": "", + "type": "", + "fast_axis": [ + 1.0, + 0.0, + 0.0 + ], + "slow_axis": [ + 0.0, + 1.0, + 0.0 + ], + "origin": [ + 0.0, + 0.0, + 0.0 + ], + "raw_image_offset": [ + 0, + 0 + ], + "image_size": [ + 0, + 0 + ], + "pixel_size": [ + 0.0, + 0.0 + ], + "trusted_range": [ + 0.0, + 0.0 + ], + "thickness": 0.0, + "material": "", + "mu": 0.0, + "identifier": "", + "mask": [], + "gain": 1.0, + "pedestal": 0.0, + "px_mm_strategy": { + "type": "SimplePxMmStrategy" + }, + "children": [ + { + "panel": 0 + } + ] + } + } + ], + "goniometer": [ + { + "axes": [ + [ + 1.0, + -0.0025, + 0.0056 + ], + [ + -0.006, + -0.0264, + -0.9996 + ], + [ + 1.0, + 0.0, + 0.0 + ] + ], + "angles": [ + 0.0, + 0.0, + 0.0 + ], + "names": [ + "phi", + "chi", + "omega" + ], + "scan_axis": 2 + } + ], + "scan": [ + { + "image_range": [ + 1, + 100 + ], + "batch_offset": 0, + "properties": { + "epochs": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "exposure_time": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "oscillation": [ + 0.0, + 0.09999999999999964, + 0.1999999999999993, + 0.29999999999999893, + 0.3999999999999986, + 0.4999999999999982, + 0.5999999999999979, + 0.6999999999999975, + 0.7999999999999972, + 0.8999999999999969, + 0.9999999999999964, + 1.0999999999999959, + 1.1999999999999957, + 1.2999999999999954, + 1.399999999999995, + 1.4999999999999944, + 1.5999999999999943, + 1.699999999999994, + 1.7999999999999938, + 1.899999999999993, + 1.999999999999993, + 2.0999999999999925, + 2.1999999999999917, + 2.299999999999992, + 2.3999999999999915, + 2.499999999999991, + 2.5999999999999908, + 2.6999999999999904, + 2.79999999999999, + 2.8999999999999897, + 2.999999999999989, + 3.099999999999989, + 3.1999999999999886, + 3.2999999999999883, + 3.399999999999988, + 3.4999999999999876, + 3.5999999999999877, + 3.699999999999987, + 3.799999999999986, + 3.899999999999986, + 3.999999999999986, + 4.099999999999985, + 4.199999999999985, + 4.299999999999985, + 4.3999999999999835, + 4.499999999999984, + 4.599999999999984, + 4.699999999999983, + 4.799999999999983, + 4.899999999999983, + 4.999999999999982, + 5.099999999999982, + 5.1999999999999815, + 5.299999999999981, + 5.399999999999981, + 5.4999999999999805, + 5.59999999999998, + 5.69999999999998, + 5.799999999999979, + 5.899999999999979, + 5.999999999999978, + 6.099999999999978, + 6.199999999999978, + 6.299999999999978, + 6.399999999999977, + 6.499999999999976, + 6.5999999999999766, + 6.699999999999976, + 6.799999999999976, + 6.8999999999999755, + 6.999999999999975, + 7.099999999999975, + 7.199999999999975, + 7.299999999999973, + 7.399999999999974, + 7.499999999999974, + 7.599999999999972, + 7.699999999999973, + 7.799999999999972, + 7.899999999999971, + 7.999999999999972, + 8.099999999999971, + 8.19999999999997, + 8.29999999999997, + 8.39999999999997, + 8.499999999999968, + 8.59999999999997, + 8.699999999999969, + 8.799999999999967, + 8.899999999999968, + 8.999999999999968, + 9.099999999999968, + 9.199999999999967, + 9.299999999999967, + 9.399999999999967, + 9.499999999999966, + 9.599999999999966, + 9.699999999999966, + 9.799999999999965, + 9.899999999999965 + ] + }, + "valid_image_ranges": {} + } + ], + "crystal": [ + { + "__id__": "crystal", + "real_space_a": [ + -18.3617922001806, + -1.3985262449809124, + -54.866705726818 + ], + "real_space_b": [ + -32.09880690850544, + 47.2184015325075, + 9.47764074028691 + ], + "real_space_c": [ + 115.77476247140888, + 86.89078795711316, + -40.81230881268615 + ], + "space_group_hall_symbol": " P 1", + "B_covariance": [ + 1.4129660242383127e-11, + -9.629396138302801e-28, + 3.5689987328237737e-28, + -2.4872540268126837e-12, + 7.280944537835667e-12, + 3.5689987328237737e-28, + 3.646488633397222e-12, + -5.820541202322426e-13, + 2.914308627849801e-12, + -9.629396138302803e-28, + 8.726639160350873e-44, + -2.778693699413534e-44, + 3.40351166637644e-28, + -5.955194306558846e-28, + -2.778693699413534e-44, + -3.2261385665732015e-28, + 1.1706473372740525e-28, + -2.2689755947169165e-28, + 3.5689987328237737e-28, + -2.778693699413534e-44, + 1.0321997888456188e-44, + -7.002309989901791e-29, + 2.013272961354654e-28, + 1.0321997888456188e-44, + 1.2902506870731577e-28, + -2.5570263336226253e-29, + 8.42855090597776e-29, + -2.4872540268126837e-12, + 3.4035116663764394e-28, + -7.002309989901791e-29, + 3.0011386068406224e-12, + -2.0716802701697863e-12, + -7.002309989901791e-29, + -3.140331758403637e-13, + 7.074979130795366e-13, + -5.717820023517858e-13, + 7.280944537835668e-12, + -5.955194306558845e-28, + 2.0132729613546538e-28, + -2.0716802701697863e-12, + 4.324985322609059e-12, + 2.0132729613546538e-28, + 1.9954853137207196e-12, + -5.377998581508811e-13, + 1.6439621307599652e-12, + 3.5689987328237737e-28, + -2.778693699413534e-44, + 1.0321997888456188e-44, + -7.002309989901791e-29, + 2.013272961354654e-28, + 1.0321997888456188e-44, + 1.2902506870731577e-28, + -2.5570263336226253e-29, + 8.42855090597776e-29, + 3.6464886333972215e-12, + -3.226138566573202e-28, + 1.2902506870731577e-28, + -3.140331758403637e-13, + 1.9954853137207196e-12, + 1.2902506870731577e-28, + 3.3244312330737377e-12, + -6.388567421433584e-13, + 1.0535696398108259e-12, + -5.820541202322426e-13, + 1.1706473372740525e-28, + -2.5570263336226253e-29, + 7.074979130795364e-13, + -5.377998581508811e-13, + -2.5570263336226253e-29, + -6.388567421433584e-13, + 4.1810623500928776e-13, + -2.0879704543407345e-13, + 2.9143086278498007e-12, + -2.2689755947169165e-28, + 8.42855090597776e-29, + -5.717820023517859e-13, + 1.6439621307599652e-12, + 8.42855090597776e-29, + 1.0535696398108257e-12, + -2.0879704543407345e-13, + 6.882434112305724e-13 + ] + } + ], + "profile": [], + "scaling_model": [] +} \ No newline at end of file diff --git a/integrator/tests/data/predicted.refl b/integrator/tests/data/predicted.refl new file mode 100644 index 00000000..9fc1e414 Binary files /dev/null and b/integrator/tests/data/predicted.refl differ diff --git a/integrator/tests/data/simple_integrated.refl b/integrator/tests/data/simple_integrated.refl new file mode 100644 index 00000000..8ae4d3e4 Binary files /dev/null and b/integrator/tests/data/simple_integrated.refl differ diff --git a/integrator/tests/test_extent.cc b/integrator/tests/test_extent.cc new file mode 100644 index 00000000..7d2cbffa --- /dev/null +++ b/integrator/tests/test_extent.cc @@ -0,0 +1,402 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "extent.hpp" +#include "ffs_logger.hpp" +#include "math/math_utils.cuh" + +/** + * @brief Validate computed bounding boxes against existing bbox column + * + * Compares the first N bounding boxes and writes results to a validation file. + * + * @param reflections The reflection table containing existing bbox column + * @param computed_bbox_data Flat array of computed bounding boxes (6 values per reflection) + * @param num_reflections Total number of reflections + * @param num_to_display Number of bounding boxes to display for comparison + * @return 0 on success, 1 on error + */ +int validate_bounding_boxes(ReflectionTable &reflections, + const std::vector &computed_bbox_data, + size_t num_reflections, + size_t num_to_display = 10, + const std::string &data_path = "") { + logger.info( + "Validation mode: comparing computed bounding boxes with existing bbox column"); + + // Load existing bounding boxes + auto bbox_column_opt = reflections.column(data_path + "bbox"); + if (!bbox_column_opt) { + logger.error("Column 'bbox' not found in reflection data for validation."); + return 1; + } + auto bbox_column = *bbox_column_opt; + + // Statistics for comparison + double total_x_min_diff = 0.0, total_x_max_diff = 0.0; + double total_y_min_diff = 0.0, total_y_max_diff = 0.0; + double total_z_min_diff = 0.0, total_z_max_diff = 0.0; + double max_x_min_diff = 0.0, max_x_max_diff = 0.0; + double max_y_min_diff = 0.0, max_y_max_diff = 0.0; + double max_z_min_diff = 0.0, max_z_max_diff = 0.0; + + logger.info("First {} bounding box comparisons:", num_to_display); + for (size_t i = 0; i < num_reflections; ++i) { + // Existing bbox + double ex_x_min = bbox_column(i, 0); + double ex_x_max = bbox_column(i, 1); + double ex_y_min = bbox_column(i, 2); + double ex_y_max = bbox_column(i, 3); + int ex_z_min = static_cast(bbox_column(i, 4)); + int ex_z_max = static_cast(bbox_column(i, 5)); + + // Computed bbox (from flat array) + double comp_x_min = computed_bbox_data[i * 6 + 0]; + double comp_x_max = computed_bbox_data[i * 6 + 1]; + double comp_y_min = computed_bbox_data[i * 6 + 2]; + double comp_y_max = computed_bbox_data[i * 6 + 3]; + int comp_z_min = static_cast(computed_bbox_data[i * 6 + 4]); + int comp_z_max = static_cast(computed_bbox_data[i * 6 + 5]); + + // Compute differences + double diff_x_min = std::abs(comp_x_min - ex_x_min); + double diff_x_max = std::abs(comp_x_max - ex_x_max); + double diff_y_min = std::abs(comp_y_min - ex_y_min); + double diff_y_max = std::abs(comp_y_max - ex_y_max); + double diff_z_min = std::abs(comp_z_min - ex_z_min); + double diff_z_max = std::abs(comp_z_max - ex_z_max); + + // Accumulate statistics + total_x_min_diff += diff_x_min; + total_x_max_diff += diff_x_max; + total_y_min_diff += diff_y_min; + total_y_max_diff += diff_y_max; + total_z_min_diff += diff_z_min; + total_z_max_diff += diff_z_max; + + max_x_min_diff = std::max(max_x_min_diff, diff_x_min); + max_x_max_diff = std::max(max_x_max_diff, diff_x_max); + max_y_min_diff = std::max(max_y_min_diff, diff_y_min); + max_y_max_diff = std::max(max_y_max_diff, diff_y_max); + max_z_min_diff = std::max(max_z_min_diff, diff_z_min); + max_z_max_diff = std::max(max_z_max_diff, diff_z_max); + + // Display first N comparisons + if (i < num_to_display) { + logger.info( + "bbox[{}]: existing x=[{:.1f},{:.1f}] y=[{:.1f},{:.1f}] z=[{},{}]", + i, + ex_x_min, + ex_x_max, + ex_y_min, + ex_y_max, + ex_z_min, + ex_z_max); + logger.info( + "bbox[{}]: computed x=[{:.1f},{:.1f}] y=[{:.1f},{:.1f}] z=[{},{}]", + i, + comp_x_min, + comp_x_max, + comp_y_min, + comp_y_max, + comp_z_min, + comp_z_max); + logger.info( + "bbox[{}]: diff x=[{:.1f},{:.1f}] y=[{:.1f},{:.1f}] " + "z=[{:.1f},{:.1f}]", + i, + diff_x_min, + diff_x_max, + diff_y_min, + diff_y_max, + diff_z_min, + diff_z_max); + } + } + + // Compute and display average differences + double n = static_cast(num_reflections); + logger.info("\n=== Bounding Box Comparison Statistics ==="); + logger.info("Mean absolute differences:"); + logger.info(" x_min: {:.3f} pixels, x_max: {:.3f} pixels", + total_x_min_diff / n, + total_x_max_diff / n); + logger.info(" y_min: {:.3f} pixels, y_max: {:.3f} pixels", + total_y_min_diff / n, + total_y_max_diff / n); + logger.info(" z_min: {:.3f} frames, z_max: {:.3f} frames", + total_z_min_diff / n, + total_z_max_diff / n); + logger.info("Maximum absolute differences:"); + logger.info( + " x_min: {:.3f} pixels, x_max: {:.3f} pixels", max_x_min_diff, max_x_max_diff); + logger.info( + " y_min: {:.3f} pixels, y_max: {:.3f} pixels", max_y_min_diff, max_y_max_diff); + logger.info( + " z_min: {:.3f} frames, z_max: {:.3f} frames", max_z_min_diff, max_z_max_diff); + + // Add computed bounding boxes to reflection table for comparison + reflections.add_column( + "computed_bbox", std::vector{num_reflections, 6}, computed_bbox_data); + + // Write output file with comparison + reflections.write("validation_reflections.h5"); + logger.info("Validation results saved to validation_reflections.h5"); + + return 0; +} + +// Test fixture for integrator tests with file paths +class ExtentValidationTest : public ::testing::Test { + protected: + void SetUp() override { + // Get paths relative to test directory + test_dir = std::filesystem::path(__FILE__).parent_path(); + predicted_refl = test_dir / "data" / "predicted.refl"; + indexed_expt = test_dir / "data" / "indexed.expt"; + integrated_truth_refl = test_dir / "data" / "simple_integrated.refl"; + } + + std::filesystem::path test_dir; + std::filesystem::path predicted_refl; + std::filesystem::path indexed_expt; + std::filesystem::path integrated_truth_refl; +}; + +TEST_F(ExtentValidationTest, ComputeKabschBoundingBoxes) { + // Check if test files exist + ASSERT_TRUE(std::filesystem::exists(predicted_refl)) + << "Reflection file not found: " << predicted_refl; + ASSERT_TRUE(std::filesystem::exists(indexed_expt)) + << "Experiment file not found: " << indexed_expt; + ASSERT_TRUE(std::filesystem::exists(integrated_truth_refl)) + << "Integrated reflection file not found: " << integrated_truth_refl; + + // Load source reflection data (indexed reflections for computing) + logger.info("Loading indexed data from file: {}", predicted_refl.string()); + ReflectionTable reflections(predicted_refl.string()); + + // Load integrated reflections for comparison (contains bbox) + logger.info("Loading integrated data for comparison from: {}", + integrated_truth_refl.string()); + ReflectionTable integrated_reflections(integrated_truth_refl.string()); + + // Log available columns in integrated reflections + // logger.info("Available columns in integrated reflections:"); + // for (const auto &name : integrated_reflections.get_column_names()) { + // logger.info(" - {}", name); + // } + + // HDF5 data path prefixes for reflection columns + const std::string indexed_data_path = ""; + const std::string integrated_data_path = ""; + + // Extract required columns from indexed reflections + auto s1_vectors_opt = reflections.column(indexed_data_path + "s1"); + ASSERT_TRUE(s1_vectors_opt.has_value()) + << "Column 's1' not found in reflection data."; + auto s1_vectors = *s1_vectors_opt; + + auto phi_column_opt = reflections.column(indexed_data_path + "xyzcal.mm"); + ASSERT_TRUE(phi_column_opt.has_value()) + << "Column 'xyzcal.mm' not found for phi positions."; + auto phi_column = *phi_column_opt; + + // Extract bbox from integrated reflections for comparison + auto bbox_column_opt = + integrated_reflections.column(integrated_data_path + "bbox"); + ASSERT_TRUE(bbox_column_opt.has_value()) + << "Column 'bbox' not found in integrated reflection data."; + auto bbox_column = *bbox_column_opt; + + size_t num_reflections = s1_vectors.extent(0); + logger.info("Processing {} reflections", num_reflections); + + // Parse experiment list from JSON + std::ifstream f(indexed_expt); + nlohmann::json elist_json_obj; + try { + elist_json_obj = nlohmann::json::parse(f); + } catch (nlohmann::json::parse_error &ex) { + FAIL() << "Failed to parse experiment file: " << ex.what(); + } + + // Construct Experiment object + Experiment expt; + try { + expt = Experiment(elist_json_obj); + } catch (const std::invalid_argument &ex) { + FAIL() << "Failed to construct Experiment: " << ex.what(); + } + + // Extract experimental components + MonochromaticBeam beam = expt.beam(); + Goniometer gonio = expt.goniometer(); + const Panel &panel = expt.detector().panels()[0]; + const Scan &scan = expt.scan(); + + Eigen::Vector3d s0 = beam.get_s0(); + Eigen::Vector3d rotation_axis = gonio.get_rotation_axis(); + + // Use baseline values for sigma_b and sigma_m + double sigma_b = degrees_to_radians(0.01); + double sigma_m = degrees_to_radians(0.01); + + // Compute bounding boxes using CPU-based Kabsch coordinate system + logger.info("Computing Kabsch bounding boxes for {} reflections", num_reflections); + + std::vector computed_bboxes = + compute_kabsch_bounding_boxes(s0, + rotation_axis, + s1_vectors, + phi_column, + num_reflections, + sigma_b, + sigma_m, + panel, + scan, + beam); + + ASSERT_EQ(computed_bboxes.size(), num_reflections) + << "Computed bboxes size mismatch"; + + // Convert BoundingBoxExtents to flat array format + std::vector computed_bbox_data(num_reflections * 6); + for (size_t i = 0; i < num_reflections; ++i) { + computed_bbox_data[i * 6 + 0] = computed_bboxes[i].x_min; + computed_bbox_data[i * 6 + 1] = computed_bboxes[i].x_max; + computed_bbox_data[i * 6 + 2] = computed_bboxes[i].y_min; + computed_bbox_data[i * 6 + 3] = computed_bboxes[i].y_max; + computed_bbox_data[i * 6 + 4] = static_cast(computed_bboxes[i].z_min); + computed_bbox_data[i * 6 + 5] = static_cast(computed_bboxes[i].z_max); + } + + // Validate against existing bounding boxes from integrated reflections + int result = validate_bounding_boxes(integrated_reflections, + computed_bbox_data, + num_reflections, + 10, + integrated_data_path); + EXPECT_EQ(result, 0) << "Validation failed"; + + // Check for verbose mode via environment variable + bool verbose = std::getenv("VERBOSE_TEST") != nullptr; + + // Track mismatches for summary + size_t total_mismatches = 0; + size_t x_min_mismatches = 0, x_max_mismatches = 0; + size_t y_min_mismatches = 0, y_max_mismatches = 0; + size_t z_min_mismatches = 0, z_max_mismatches = 0; + + // Compare computed bounding boxes with existing ones - fail if any differences + for (size_t i = 0; i < num_reflections; ++i) { + const auto &bbox = computed_bboxes[i]; + + // Check that bounding boxes are reasonable + EXPECT_LE(bbox.x_min, bbox.x_max) << "Invalid x bounds for reflection " << i; + EXPECT_LE(bbox.y_min, bbox.y_max) << "Invalid y bounds for reflection " << i; + EXPECT_LE(bbox.z_min, bbox.z_max) << "Invalid z bounds for reflection " << i; + + // Check that bounding boxes have non-zero size + EXPECT_GT(bbox.x_max - bbox.x_min, 0) << "Zero width x for reflection " << i; + EXPECT_GT(bbox.y_max - bbox.y_min, 0) << "Zero width y for reflection " << i; + EXPECT_GT(bbox.z_max - bbox.z_min, 0) << "Zero width z for reflection " << i; + + // Compare with existing bbox - fail on any difference + double ex_x_min = bbox_column(i, 0); + double ex_x_max = bbox_column(i, 1); + double ex_y_min = bbox_column(i, 2); + double ex_y_max = bbox_column(i, 3); + int ex_z_min = static_cast(bbox_column(i, 4)); + int ex_z_max = static_cast(bbox_column(i, 5)); + + bool has_mismatch = false; + + if (bbox.x_min != ex_x_min) { + x_min_mismatches++; + has_mismatch = true; + if (verbose) { + EXPECT_EQ(bbox.x_min, ex_x_min) + << "x_min mismatch for reflection " << i; + } + } + if (bbox.x_max != ex_x_max) { + x_max_mismatches++; + has_mismatch = true; + if (verbose) { + EXPECT_EQ(bbox.x_max, ex_x_max) + << "x_max mismatch for reflection " << i; + } + } + if (bbox.y_min != ex_y_min) { + y_min_mismatches++; + has_mismatch = true; + if (verbose) { + EXPECT_EQ(bbox.y_min, ex_y_min) + << "y_min mismatch for reflection " << i; + } + } + if (bbox.y_max != ex_y_max) { + y_max_mismatches++; + has_mismatch = true; + if (verbose) { + EXPECT_EQ(bbox.y_max, ex_y_max) + << "y_max mismatch for reflection " << i; + } + } + if (bbox.z_min != ex_z_min) { + z_min_mismatches++; + has_mismatch = true; + if (verbose) { + EXPECT_EQ(bbox.z_min, ex_z_min) + << "z_min mismatch for reflection " << i; + } + } + if (bbox.z_max != ex_z_max) { + z_max_mismatches++; + has_mismatch = true; + if (verbose) { + EXPECT_EQ(bbox.z_max, ex_z_max) + << "z_max mismatch for reflection " << i; + } + } + + if (has_mismatch) { + total_mismatches++; + } + } + + // Print summary + logger.info("\n=== Bounding Box Comparison Summary ==="); + logger.info("Total reflections checked: {}", num_reflections); + logger.info("Reflections with mismatches: {}", total_mismatches); + if (total_mismatches > 0) { + logger.info("Mismatches by component:"); + logger.info(" x_min: {} mismatches", x_min_mismatches); + logger.info(" x_max: {} mismatches", x_max_mismatches); + logger.info(" y_min: {} mismatches", y_min_mismatches); + logger.info(" y_max: {} mismatches", y_max_mismatches); + logger.info(" z_min: {} mismatches", z_min_mismatches); + logger.info(" z_max: {} mismatches", z_max_mismatches); + logger.info("\nSet VERBOSE_TEST=1 to see detailed mismatch output"); + } + + // Fail test if any mismatches found + EXPECT_EQ(total_mismatches, 0) + << "Found " << total_mismatches << " reflections with bounding box mismatches"; + + logger.info("Validation complete for {} reflections", num_reflections); +} + +TEST(IntegratorTest, BasicTest) { + EXPECT_TRUE(true); +} diff --git a/integrator/tests/test_kabsch.cc b/integrator/tests/test_kabsch.cc new file mode 100644 index 00000000..cf112f60 --- /dev/null +++ b/integrator/tests/test_kabsch.cc @@ -0,0 +1,189 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ffs_logger.hpp" +#include "kabsch.cuh" +#include "math/vector3d.cuh" + +// Test fixture for Kabsch tests with file paths +class KabschTest : public ::testing::Test { + protected: + void SetUp() override { + // Get paths relative to test directory + test_dir = std::filesystem::path(__FILE__).parent_path(); + predicted_refl = test_dir / "data" / "predicted.refl"; + indexed_expt = test_dir / "data" / "indexed.expt"; + } + + std::filesystem::path test_dir; + std::filesystem::path predicted_refl; + std::filesystem::path indexed_expt; +}; + +TEST_F(KabschTest, ComputeKabschTransform) { + // Check if test files exist + ASSERT_TRUE(std::filesystem::exists(predicted_refl)) + << "Reflection file not found: " << predicted_refl; + ASSERT_TRUE(std::filesystem::exists(indexed_expt)) + << "Experiment file not found: " << indexed_expt; + + // Load reflection data + logger.info("Loading reflection data from file: {}", predicted_refl.string()); + ReflectionTable reflections(predicted_refl.string()); + + const std::string data_path = ""; + + // Extract required columns + auto s1_vectors_opt = reflections.column(data_path + "s1"); + ASSERT_TRUE(s1_vectors_opt.has_value()) + << "Column 's1' not found in reflection data."; + auto s1_vectors = *s1_vectors_opt; + + auto phi_column_opt = reflections.column(data_path + "xyzcal.mm"); + ASSERT_TRUE(phi_column_opt.has_value()) + << "Column 'xyzcal.mm' not found for phi positions."; + auto phi_column = *phi_column_opt; + + size_t num_reflections = s1_vectors.extent(0); + logger.info("Processing {} reflections for Kabsch transformation", num_reflections); + + // Parse experiment file + std::ifstream f(indexed_expt); + nlohmann::json elist_json_obj; + try { + elist_json_obj = nlohmann::json::parse(f); + } catch (nlohmann::json::parse_error &ex) { + FAIL() << "Failed to parse experiment file: " << ex.what(); + } + + // Construct Experiment object + Experiment expt; + try { + expt = Experiment(elist_json_obj); + } catch (const std::invalid_argument &ex) { + FAIL() << "Failed to construct Experiment: " << ex.what(); + } + + // Extract experimental components + MonochromaticBeam beam = expt.beam(); + Goniometer gonio = expt.goniometer(); + const Panel &panel = expt.detector().panels()[0]; + const Scan &scan = expt.scan(); + + Eigen::Vector3d s0_eigen = beam.get_s0(); + Eigen::Vector3d rotation_axis_eigen = gonio.get_rotation_axis(); + double wl = beam.get_wavelength(); + double osc_start = scan.get_oscillation()[0]; + double osc_width = scan.get_oscillation()[1]; + int image_range_start = scan.get_image_range()[0]; + + // Convert to CUDA format + fastvec::Vector3D s0_cuda = + fastvec::make_vector3d(s0_eigen.x(), s0_eigen.y(), s0_eigen.z()); + fastvec::Vector3D rotation_axis_cuda = fastvec::make_vector3d( + rotation_axis_eigen.x(), rotation_axis_eigen.y(), rotation_axis_eigen.z()); + + logger.info("Testing Kabsch transformation for first reflection"); + + // Test on the first reflection + size_t refl_id = 0; + + // Get reflection centroid data + Eigen::Vector3d s1_c_eigen( + s1_vectors(refl_id, 0), s1_vectors(refl_id, 1), s1_vectors(refl_id, 2)); + fastvec::Vector3D s1_c_cuda = + fastvec::make_vector3d(s1_c_eigen.x(), s1_c_eigen.y(), s1_c_eigen.z()); + double phi_c = phi_column(refl_id, 2); + + logger.info("Reflection {} centroid: s1=({:.6f}, {:.6f}, {:.6f}), phi={:.6f}", + refl_id, + s1_c_eigen.x(), + s1_c_eigen.y(), + s1_c_eigen.z(), + phi_c); + + // Create test voxels around the reflection centroid + std::vector test_s_pixels; + std::vector test_phi_pixels; + + // Test with a few voxel positions + const int num_test_voxels = 5; + for (int i = 0; i < num_test_voxels; ++i) { + // Create slight variations around the centroid + double x_offset = 100.0 + i * 10.0; // pixels + double y_offset = 200.0 + i * 10.0; // pixels + int z_offset = i; // frames + + // Convert pixel coordinates to lab frame + std::array xy_mm = panel.px_to_mm(x_offset, y_offset); + Eigen::Vector3d lab_coord = + panel.get_d_matrix() * Eigen::Vector3d(xy_mm[0], xy_mm[1], 1.0); + + // Convert to reciprocal space vector + Eigen::Vector3d s_pixel_eigen = lab_coord.normalized() / wl; + fastvec::Vector3D s_pixel_cuda = fastvec::make_vector3d( + s_pixel_eigen.x(), s_pixel_eigen.y(), s_pixel_eigen.z()); + + // Calculate phi for this voxel + double phi_pixel = + osc_start + (z_offset - image_range_start + 1.5) * osc_width / 180.0 * M_PI; + + test_s_pixels.push_back(s_pixel_cuda); + test_phi_pixels.push_back(static_cast(phi_pixel)); + + logger.info("Test voxel {}: pixel=({:.1f}, {:.1f}, {}), phi={:.6f}", + i, + x_offset, + y_offset, + z_offset, + phi_pixel); + } + + // Allocate output arrays + std::vector kabsch_results(num_test_voxels); + std::vector s1_len_results(num_test_voxels); + + // Call CUDA Kabsch transformation + logger.info("Calling compute_kabsch_transform with {} test voxels", + num_test_voxels); + compute_kabsch_transform(test_s_pixels.data(), + test_phi_pixels.data(), + s1_c_cuda, + static_cast(phi_c), + s0_cuda, + rotation_axis_cuda, + kabsch_results.data(), + s1_len_results.data(), + num_test_voxels); + + logger.info("Kabsch transformation completed successfully"); + + // Validate results + for (int i = 0; i < num_test_voxels; ++i) { + const auto &eps = kabsch_results[i]; + scalar_t s1_len = s1_len_results[i]; + + logger.info("Voxel {}: Kabsch coords ε=({:.6f}, {:.6f}, {:.6f}), |s₁|={:.6f}", + i, + eps.x, + eps.y, + eps.z, + s1_len); + + // Check results + + // Check that s1 length is positive + EXPECT_GT(s1_len, 0.0f) << "|s₁| should be positive for voxel " << i; + } + + logger.info("Kabsch transformation test completed successfully"); +} diff --git a/tests/test_integrator.py b/tests/test_integrator.py new file mode 100644 index 00000000..2f752c73 --- /dev/null +++ b/tests/test_integrator.py @@ -0,0 +1,172 @@ +""" +Tests for the fast-feedback integrator. + +Test command: +bin/integrator +../integrator/tests/comp/filtered.refl +../integrator/tests/comp/indexed.expt --sigma_m 0.002 --sigma_b 0.0002 +""" + +import os +import subprocess +from pathlib import Path + +import h5py + + +def test_integrator_basic(dials_data, tmp_path): + """Test basic integrator functionality with reflection and experiment files.""" + + integrator_path: str | Path | None = os.getenv("INTEGRATOR") + assert integrator_path, "INTEGRATOR environment variable not set" + + # /dls/science/groups/scisoft/DIALS/dials_data/thaumatin_i03_rotation/ + dials_data_path = dials_data("thaumatin_i03_rotation", pathlib=True) + assert dials_data_path + + integration_data_path = Path("tests/thaumatin_i03_rotation.data").resolve() + assert integration_data_path.exists() + + # sigma b: 0.022098 degrees = 0.0003856828581 radians + # sigma m: 0.105343 degrees = 0.0018385821939 radians + command = [ + integrator_path, + "--reflection", + str(integration_data_path / "indexed.refl"), + "--experiment", + str(integration_data_path / "indexed.expt"), + "--images", + dials_data_path / "thau_2_1.nxs", + "--sigma_m", + "0.00183", + "--sigma_b", + "0.00038", + ] + + proc = subprocess.run( + command, + capture_output=True, + cwd=tmp_path, + ) + # assert not proc.stderr, f"Error: {proc.stderr.decode()}" + assert proc.returncode == 0, f"Non-zero return code: {proc.returncode}" + + # Validate output files exist + output_reflections = tmp_path / "output_reflections.h5" + voxel_kabsch_data = tmp_path / "voxel_kabsch_data.h5" + + assert output_reflections.exists(), "output_reflections.h5 not created" + assert voxel_kabsch_data.exists(), "voxel_kabsch_data.h5 not created" + + # Validate reflection data structure + with h5py.File(output_reflections, "r") as f: + # Navigate to the processing group + processing_group = f["dials/processing/group_0"] + + # Check that computed_bbox column was added + assert "computed_bbox" in processing_group, ( + "computed_bbox column not found in output" + ) + + # Check original columns are preserved + assert "s1" in processing_group, "s1 column missing from output" + assert "xyzcal.mm" in processing_group, "xyzcal.mm column missing from output" + assert "bbox" in processing_group, "bbox column missing from output" + + # Validate computed_bbox has correct shape + bbox_data = processing_group["bbox"][:] + computed_bbox_data = processing_group["computed_bbox"][:] + + assert bbox_data.shape == computed_bbox_data.shape, ( + f"Shape mismatch: bbox {bbox_data.shape} vs computed_bbox {computed_bbox_data.shape}" + ) + + # Basic sanity check: bounding boxes should have reasonable values + assert computed_bbox_data.shape[1] == 6, "Computed bbox should have 6 columns" + + # Check that x_min < x_max and y_min < y_max for computed bboxes + x_min, x_max = computed_bbox_data[:, 0], computed_bbox_data[:, 1] + y_min, y_max = computed_bbox_data[:, 2], computed_bbox_data[:, 3] + z_min, z_max = computed_bbox_data[:, 4], computed_bbox_data[:, 5] + + # Debug: Print some computed bbox values + print("\nComputed bbox statistics:") + print(f"Number of reflections: {len(computed_bbox_data)}") + print(f"x_min range: {x_min.min():.6e} to {x_min.max():.6e}") + print(f"x_max range: {x_max.min():.6e} to {x_max.max():.6e}") + print(f"y_min range: {y_min.min():.6e} to {y_min.max():.6e}") + print(f"y_max range: {y_max.min():.6e} to {y_max.max():.6e}") + print(f"z_min range: {z_min.min():.6e} to {z_min.max():.6e}") + print(f"z_max range: {z_max.min():.6e} to {z_max.max():.6e}") + + # Show first few computed bboxes + print("\nFirst 5 computed bboxes:") + for i in range(min(5, len(computed_bbox_data))): + print( + f" [{i}]: x=({x_min[i]:.6e}, {x_max[i]:.6e}), y=({y_min[i]:.6e}, {y_max[i]:.6e}), z=({z_min[i]:.6e}, {z_max[i]:.6e})" + ) + + # Compare with original bbox for context + orig_bbox = processing_group["bbox"][:] + orig_x_min, orig_x_max = orig_bbox[:, 0], orig_bbox[:, 1] + print( + f"\nOriginal bbox x_min range: {orig_x_min.min():.6e} to {orig_x_min.max():.6e}" + ) + print( + f"Original bbox x_max range: {orig_x_max.min():.6e} to {orig_x_max.max():.6e}" + ) + + # Count invalid bboxes + invalid_x = ~(x_min < x_max) + invalid_y = ~(y_min < y_max) + invalid_z = ~(z_min < z_max) + print("\nInvalid bbox counts:") + print(f"Invalid x bounds: {invalid_x.sum()} / {len(computed_bbox_data)}") + print(f"Invalid y bounds: {invalid_y.sum()} / {len(computed_bbox_data)}") + print(f"Invalid z bounds: {invalid_z.sum()} / {len(computed_bbox_data)}") + + assert (x_min < x_max).all(), "Invalid x bounds in computed bbox" + assert (y_min < y_max).all(), "Invalid y bounds in computed bbox" + assert (z_min < z_max).all(), "Invalid z bounds in computed bbox" + + # Validate voxel data structure + with h5py.File(voxel_kabsch_data, "r") as f: + # Navigate to the processing group + processing_group = f["dials/processing/group_0"] + + # Check expected columns exist + expected_columns = [ + "kabsch_coordinates", + "reflection_id", + "pixel_coordinates", + "voxel_s1_length", + ] + for col in expected_columns: + assert col in processing_group, f"Column {col} not found in voxel data" + + # Check data consistency + kabsch_coords = processing_group["kabsch_coordinates"][:] + reflection_ids = processing_group["reflection_id"][:] + pixel_coords = processing_group["pixel_coordinates"][:] + s1_lengths = processing_group["voxel_s1_length"][:] + + num_voxels = len(reflection_ids) + assert kabsch_coords.shape == (num_voxels, 3), ( + f"Kabsch coordinates shape mismatch: {kabsch_coords.shape}" + ) + assert pixel_coords.shape == (num_voxels, 3), ( + f"Pixel coordinates shape mismatch: {pixel_coords.shape}" + ) + assert len(s1_lengths) == num_voxels, ( + f"S1 lengths count mismatch: {len(s1_lengths)} vs {num_voxels}" + ) + + # Validate that s1_lengths are positive + assert (s1_lengths > 0).all(), "All s1 lengths should be positive" + + # Validate that reflection IDs are non-negative integers + assert (reflection_ids >= 0).all(), "Reflection IDs should be non-negative" + + print( + f"Successfully validated {num_voxels} voxels across {len(set(reflection_ids.flatten()))} reflections" + )