From deea1a0c2ba12e16d3513f4314754434dbc3985f Mon Sep 17 00:00:00 2001 From: Khemkaran Date: Tue, 22 Jul 2025 11:38:40 +0530 Subject: [PATCH 1/5] Fix issue 61917, modified uniqe() func in interval.py --- pandas/core/arrays/interval.py | 15 ++++++++------ pandas/tests/arrays/interval/test_interval.py | 20 +++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 4bcbe2eedee47..5c986d9a48d50 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1990,12 +1990,15 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: - # No overload variant of "__getitem__" of "ExtensionArray" matches argument - # type "Tuple[slice, int]" - nc = unique( - self._combined.view("complex128")[:, 0] # type: ignore[call-overload] - ) - nc = nc[:, None] + # Using .view("complex128") with negatives causes issues. # GH#61917 + combined = self._combined + if not combined.flags.c_contiguous: + combined = np.ascontiguousarray(combined) + structured = combined.view( + [("left", combined.dtype), ("right", combined.dtype)] + )[:, 0] + unique_structured = unique(structured) + nc = unique_structured.view(combined.dtype) return self._from_combined(nc) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 8e13dcf25ceba..2cdeb9ae1701c 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -111,6 +111,26 @@ def test_shift_datetime(self): with pytest.raises(TypeError, match=msg): a.shift(1, fill_value=np.timedelta64("NaT", "ns")) + def test_unique_with_negatives(self): + # GH#61917 + idx_pos = IntervalIndex.from_tuples( + [(3, 4), (3, 4), (2, 3), (2, 3), (1, 2), (1, 2)] + ) + result = idx_pos.unique() + assert result.shape == (3,), f"Expected shape (3,), got {result.shape}" + + idx_neg = IntervalIndex.from_tuples( + [(-4, -3), (-4, -3), (-3, -2), (-3, -2), (-2, -1), (-2, -1)] + ) + result = idx_neg.unique() + assert result.shape == (3,), f"Expected shape (3,), got {result.shape}" + + idx_mix = IntervalIndex.from_tuples( + [(1, 2), (0, 1), (-1, 0), (-2, -1), (-3, -2), (-3, -2)] + ) + result = idx_mix.unique() + assert result.shape == (5,), f"Expected shape (5,), got {result.shape}" + class TestSetitem: def test_set_na(self, left_right_dtypes): From 606d1c5f9abe455f88820e64f05f332dbefc7654 Mon Sep 17 00:00:00 2001 From: Khemkaran Date: Tue, 22 Jul 2025 20:17:41 +0530 Subject: [PATCH 2/5] modified unique() in interval.py to handle object case --- pandas/core/arrays/interval.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 5c986d9a48d50..de1f946aa25e4 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1992,13 +1992,18 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: def unique(self) -> IntervalArray: # Using .view("complex128") with negatives causes issues. # GH#61917 combined = self._combined - if not combined.flags.c_contiguous: - combined = np.ascontiguousarray(combined) - structured = combined.view( - [("left", combined.dtype), ("right", combined.dtype)] - )[:, 0] - unique_structured = unique(structured) - nc = unique_structured.view(combined.dtype) + combined = np.ascontiguousarray(combined) + if combined.dtype == np.object_: + nc = unique( + self._combined.view("complex128")[:, 0] # type: ignore[call-overload] + ) + nc = nc[:, None] + else: + structured = combined.view( + [("left", combined.dtype), ("right", combined.dtype)] + )[:, 0] + unique_structured = unique(structured) + nc = unique_structured.view(combined.dtype) return self._from_combined(nc) From eae020c336dc9afd5e48237dda3f054afc7907a7 Mon Sep 17 00:00:00 2001 From: Khemkaran Date: Thu, 24 Jul 2025 22:54:37 +0530 Subject: [PATCH 3/5] modified unique() function in arrays/interval.py --- pandas/core/arrays/interval.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index de1f946aa25e4..10a8313851396 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1990,20 +1990,20 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: - # Using .view("complex128") with negatives causes issues. # GH#61917 - combined = self._combined - combined = np.ascontiguousarray(combined) - if combined.dtype == np.object_: + # No overload variant of "__getitem__" of "ExtensionArray" matches argument + # type "Tuple[slice, int]" + if needs_i8_conversion(self._left.dtype): nc = unique( self._combined.view("complex128")[:, 0] # type: ignore[call-overload] ) - nc = nc[:, None] else: - structured = combined.view( - [("left", combined.dtype), ("right", combined.dtype)] - )[:, 0] - unique_structured = unique(structured) - nc = unique_structured.view(combined.dtype) + nc = unique( + # Using .view("complex128") with negatives causes issues. + # GH#61917 + (np.array(self._combined[:, 0], dtype=complex)) + + (1j * np.array(self._combined[:, 1], dtype=complex)) + ) + nc = nc[:, None] return self._from_combined(nc) From 9be72c7e06fcfe932786ee2635d94b1d07993d9a Mon Sep 17 00:00:00 2001 From: Khemkaran Date: Thu, 31 Jul 2025 20:06:06 +0530 Subject: [PATCH 4/5] make changes to unique() in interval.py --- pandas/core/arrays/interval.py | 5 ++++- pandas/tests/arrays/interval/test_interval.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 10a8313851396..112d2c682b4e5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1971,10 +1971,10 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: """ Create a new IntervalArray with our dtype from a 1D complex128 ndarray. """ - nc = combined.view("i8").reshape(-1, 2) dtype = self._left.dtype if needs_i8_conversion(dtype): + nc = combined.view("i8").reshape(-1, 2) assert isinstance(self._left, (DatetimeArray, TimedeltaArray)) new_left: DatetimeArray | TimedeltaArray | np.ndarray = type( self._left @@ -1985,6 +1985,9 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: )._from_sequence(nc[:, 1], dtype=dtype) else: assert isinstance(dtype, np.dtype) + nc = np.hstack( + [np.real(combined).astype(dtype), np.imag(combined).astype(dtype)] + ).reshape(-1, 2) new_left = nc[:, 0].view(dtype) new_right = nc[:, 1].view(dtype) return self._shallow_copy(left=new_left, right=new_right) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 2cdeb9ae1701c..5c31545bbb6f9 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -117,19 +117,24 @@ def test_unique_with_negatives(self): [(3, 4), (3, 4), (2, 3), (2, 3), (1, 2), (1, 2)] ) result = idx_pos.unique() - assert result.shape == (3,), f"Expected shape (3,), got {result.shape}" + expected = IntervalIndex.from_tuples([(3, 4), (2, 3), (1, 2)]) + tm.assert_index_equal(result, expected) idx_neg = IntervalIndex.from_tuples( [(-4, -3), (-4, -3), (-3, -2), (-3, -2), (-2, -1), (-2, -1)] ) result = idx_neg.unique() - assert result.shape == (3,), f"Expected shape (3,), got {result.shape}" + expected = IntervalIndex.from_tuples([(-4, -3), (-3, -2), (-2, -1)]) + tm.assert_index_equal(result, expected) idx_mix = IntervalIndex.from_tuples( [(1, 2), (0, 1), (-1, 0), (-2, -1), (-3, -2), (-3, -2)] ) result = idx_mix.unique() - assert result.shape == (5,), f"Expected shape (5,), got {result.shape}" + expected = IntervalIndex.from_tuples( + [(1, 2), (0, 1), (-1, 0), (-2, -1), (-3, -2)] + ) + tm.assert_index_equal(result, expected) class TestSetitem: From fce684ff163dc67cb2e890b0befbf4d482f1f696 Mon Sep 17 00:00:00 2001 From: Khemkaran Date: Fri, 1 Aug 2025 13:00:27 +0530 Subject: [PATCH 5/5] added complex array creation logic in _combined directly --- pandas/core/arrays/interval.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 112d2c682b4e5..097baa44c6042 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1932,8 +1932,8 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: if self.dtype == values.dtype: # GH#38353 instead of casting to object, operating on a # complex128 ndarray is much more performant. - left = self._combined.view("complex128") - right = values._combined.view("complex128") + left = self._combined + right = values._combined # error: Argument 1 to "isin" has incompatible type # "Union[ExtensionArray, ndarray[Any, Any], # ndarray[Any, dtype[Any]]]"; expected @@ -1941,7 +1941,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: # _NestedSequence[_SupportsArray[dtype[Any]]], bool, # int, float, complex, str, bytes, _NestedSequence[ # Union[bool, int, float, complex, str, bytes]]]" - return np.isin(left, right).ravel() # type: ignore[arg-type] + return np.isin(left, right).ravel() elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion( values.left.dtype @@ -1963,8 +1963,11 @@ def _combined(self) -> IntervalSide: comb = left._concat_same_type( # type: ignore[union-attr] [left, right], axis=1 ) + comb = comb.view("complex128")[:, 0] else: - comb = np.concatenate([left, right], axis=1) + comb = (np.array(left.ravel(), dtype=complex)) + ( + 1j * np.array(right.ravel(), dtype=complex) + ) return comb def _from_combined(self, combined: np.ndarray) -> IntervalArray: @@ -1985,27 +1988,14 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray: )._from_sequence(nc[:, 1], dtype=dtype) else: assert isinstance(dtype, np.dtype) - nc = np.hstack( - [np.real(combined).astype(dtype), np.imag(combined).astype(dtype)] - ).reshape(-1, 2) - new_left = nc[:, 0].view(dtype) - new_right = nc[:, 1].view(dtype) + new_left = np.real(combined).astype(dtype).ravel() + new_right = np.imag(combined).astype(dtype).ravel() return self._shallow_copy(left=new_left, right=new_right) def unique(self) -> IntervalArray: # No overload variant of "__getitem__" of "ExtensionArray" matches argument # type "Tuple[slice, int]" - if needs_i8_conversion(self._left.dtype): - nc = unique( - self._combined.view("complex128")[:, 0] # type: ignore[call-overload] - ) - else: - nc = unique( - # Using .view("complex128") with negatives causes issues. - # GH#61917 - (np.array(self._combined[:, 0], dtype=complex)) - + (1j * np.array(self._combined[:, 1], dtype=complex)) - ) + nc = unique(self._combined) nc = nc[:, None] return self._from_combined(nc)