PERF: fixed performance issue on large Pandas DataFrames

gdementen · gdementen · commit 1cdb0f9b764a · 2025-10-23T10:55:11.000+02:00
diff --git a/larray_editor/arrayadapter.py b/larray_editor/arrayadapter.py
@@ -1748,7 +1748,17 @@ def get_hlabels_values(self, start, stop):
             return [index.values]
 
     def get_values(self, h_start, v_start, h_stop, v_stop):
-        return self.sorted_data.iloc[v_start:v_stop, h_start:h_stop].values
+        # Sadly, as of Pandas 2.2.3, the previous version of this code:
+        #     df.iloc[v_start:v_stop, h_start:h_stop].values
+        # first copies all mentioned columns in their entirety, then take the
+        # subset of the rows (then converts to a numpy array)
+        # As a workaround, we first take each *single* column in its entirety
+        # which, in most case, is a view, then take the row slice
+        # (then recombine using numpy stack)
+        df = self.sorted_data
+        columns = [df.iloc[:, i].values for i in range(h_start, h_stop)]
+        chunks = [col[v_start:v_stop] for col in columns]
+        return np.stack(chunks, axis=1, dtype=object)
 
     def can_sort_hlabel(self, row_idx, col_idx):
         # allow sorting on columns but not rows