-
-
Couldn't load subscription status.
- Fork 19.2k
FIX: unstack(sort=False) data misalignment (#62816) #62818
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| { | ||
| "permissions": { | ||
| "allow": [ | ||
| "WebFetch(domain:github.com)" | ||
| ], | ||
| "deny": [], | ||
| "ask": [] | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -207,8 +207,10 @@ def sorted_labels(self) -> list[np.ndarray]: | |
|
|
||
| def _make_sorted_values(self, values: np.ndarray) -> np.ndarray: | ||
| indexer, _ = self._indexer_and_to_sort | ||
| sorted_values = algos.take_nd(values, indexer, axis=0) | ||
| return sorted_values | ||
| if self.sort: | ||
| sorted_values = algos.take_nd(values, indexer, axis=0) | ||
| return sorted_values | ||
| return values | ||
|
|
||
| def _make_selectors(self) -> None: | ||
| new_levels = self.new_index_levels | ||
|
|
@@ -566,6 +568,14 @@ def unstack( | |
| def unstack( | ||
| obj: Series | DataFrame, level, fill_value=None, sort: bool = True | ||
| ) -> Series | DataFrame: | ||
| if not sort: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was indeed an intention to deprecate sort, but then it was discovered that In any case, introducing a deprecation and fixing a bug should be different PRs (assuming it's possible). |
||
| warnings.warn( | ||
| "The 'sort=False' parameter in unstack is deprecated and will be " | ||
| "removed in a future version.", | ||
| FutureWarning, | ||
| stacklevel=find_stack_level(), | ||
| ) | ||
|
|
||
| if isinstance(level, (tuple, list)): | ||
| if len(level) != 1: | ||
| # _unstack_multiple only handles MultiIndexes, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2779,3 +2779,38 @@ def test_stack_preserves_na(dtype, na_value, test_multiindex): | |
| ) | ||
| expected = Series(1, index=expected_index) | ||
| tm.assert_series_equal(result, expected) | ||
|
|
||
|
|
||
| def test_unstack_sort_false_with_value_counts(): | ||
| # GH#62816 | ||
| # Test that unstack(sort=False) correctly aligns column labels with data values | ||
| # Previously, column labels were reordered but data values were not, causing misalignment | ||
| df = DataFrame( | ||
| { | ||
| "Department": ["Finance", "Finance", "HR", "HR"], | ||
| "Gender": ["Male", "Female", "Male", "Female"], | ||
| "Location": ["NY", "CA", "NY", "CA"], | ||
| } | ||
| ) | ||
|
|
||
| # Create a value_counts Series with specific order | ||
| result_series = df.value_counts(subset=["Department", "Gender", "Location"]) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Produce the input data directly, don't rely on other pandas methods (as much as is reasonable - some methods certainly have be called). |
||
|
|
||
| # Unstack with sort=False should preserve the order of values | ||
| result = result_series.unstack(fill_value=0, sort=False) | ||
|
Comment on lines
+2799
to
+2800
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove comments that repeat the code. |
||
|
|
||
| # Verify that the data values match their column labels | ||
| # The key test is that column order matches the data order | ||
| for col in result.columns: | ||
| for idx in result.index: | ||
|
Comment on lines
+2802
to
+2805
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add |
||
| # Reconstruct the original MultiIndex tuple | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add |
||
| full_idx = (*idx, col) | ||
| if full_idx in result_series.index: | ||
| expected_val = result_series[full_idx] | ||
| actual_val = result.loc[idx, col] | ||
| assert actual_val == expected_val, ( | ||
| f"Mismatch at {idx}, {col}: expected {expected_val}, got {actual_val}" | ||
| ) | ||
| else: | ||
| # Should be fill_value (0) if not in original series | ||
| assert result.loc[idx, col] == 0 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you detail what this does.