fix: preserve uint64 ID type in HDF5 extension check

jay-tau · jay-tau · commit 3142d26a68df · 2025-05-08T10:43:16.000-06:00
Avoids using `iterrows()` which implicitly casts Darshan record IDs to float64,
potentially breaking file_map lookups for large IDs. Replaced with `itertuples()`
to maintain original dtype and re-enabled `break` for early exit.
diff --git a/drishti/handlers/darshan_util.py b/drishti/handlers/darshan_util.py
@@ -1037,10 +1037,12 @@ def has_hdf5_extension(self) -> bool:
         if self._has_hdf5_extension is None:
             self._has_hdf5_extension = False
             mpi_df = self.report.records[ModuleType.MPIIO].to_df()
-            for index, row in mpi_df['counters'].iterrows():
-                if self.file_map[int(row['id'])].endswith('.h5') or self.file_map[int(row['id'])].endswith('.hdf5'):
+            # for index, row in mpi_df['counters'].iterrows(): # Implicitly converts all data to np.float64. Problematic for id (np.uint64)
+            for row in mpi_df['counters'].itertuples(index=False):
+                # if self.file_map[int(row['id'])].endswith('.h5') or self.file_map[int(row['id'])].endswith('.hdf5'):
+                if self.file_map[row.id].endswith('.h5') or self.file_map[row.id].endswith('.hdf5'):
                     self._has_hdf5_extension = True
-                    # break
+                    break
         return self._has_hdf5_extension
 
     @cached_property