diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000000000..2ee2528d444a0 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "WebFetch(domain:github.com)" + ], + "deny": [], + "ask": [] + } +} diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c14389d753aac..ad277d57a90b8 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -207,8 +207,10 @@ def sorted_labels(self) -> list[np.ndarray]: def _make_sorted_values(self, values: np.ndarray) -> np.ndarray: indexer, _ = self._indexer_and_to_sort - sorted_values = algos.take_nd(values, indexer, axis=0) - return sorted_values + if self.sort: + sorted_values = algos.take_nd(values, indexer, axis=0) + return sorted_values + return values def _make_selectors(self) -> None: new_levels = self.new_index_levels @@ -566,6 +568,14 @@ def unstack( def unstack( obj: Series | DataFrame, level, fill_value=None, sort: bool = True ) -> Series | DataFrame: + if not sort: + warnings.warn( + "The 'sort=False' parameter in unstack is deprecated and will be " + "removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if isinstance(level, (tuple, list)): if len(level) != 1: # _unstack_multiple only handles MultiIndexes, diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index a6587ff486d8a..3382c08d69189 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -2779,3 +2779,38 @@ def test_stack_preserves_na(dtype, na_value, test_multiindex): ) expected = Series(1, index=expected_index) tm.assert_series_equal(result, expected) + + +def test_unstack_sort_false_with_value_counts(): + # GH#62816 + # Test that unstack(sort=False) correctly aligns column labels with data values + # Previously, column labels were reordered but data values were not, causing misalignment + df = DataFrame( + { + "Department": ["Finance", "Finance", "HR", "HR"], + "Gender": ["Male", "Female", "Male", "Female"], + "Location": ["NY", "CA", "NY", "CA"], + } + ) + + # Create a value_counts Series with specific order + result_series = df.value_counts(subset=["Department", "Gender", "Location"]) + + # Unstack with sort=False should preserve the order of values + result = result_series.unstack(fill_value=0, sort=False) + + # Verify that the data values match their column labels + # The key test is that column order matches the data order + for col in result.columns: + for idx in result.index: + # Reconstruct the original MultiIndex tuple + full_idx = (*idx, col) + if full_idx in result_series.index: + expected_val = result_series[full_idx] + actual_val = result.loc[idx, col] + assert actual_val == expected_val, ( + f"Mismatch at {idx}, {col}: expected {expected_val}, got {actual_val}" + ) + else: + # Should be fill_value (0) if not in original series + assert result.loc[idx, col] == 0