From 9901aa6440d0c11359b7f41d091e5f2e98644884 Mon Sep 17 00:00:00 2001 From: myenugula Date: Sun, 6 Apr 2025 01:07:18 +0800 Subject: [PATCH 1/3] BUG: Fix DatetimeIndex timezone preservation when joining indexes with same timezone but different units --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/indexes/base.py | 12 ++++-- pandas/tests/indexes/datetimes/test_setops.py | 40 +++++++++++++++++++ 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e6fafc8b1b14c..556245aaca353 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -668,7 +668,7 @@ Timedelta Timezones ^^^^^^^^^ -- +- Bug in :meth:`DatetimeIndex.union`, :meth:`DatetimeIndex.intersection`, and :meth:`DatetimeIndex.symmetric_difference` changing timezone to UTC when merging two DatetimeIndex objects with the same timezone but different units (:issue:`60080`) - Numeric diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ff3879018674e..8f2d72b6fbc64 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2961,10 +2961,14 @@ def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index and self.tz is not None and other.tz is not None ): - # GH#39328, GH#45357 - left = self.tz_convert("UTC") - right = other.tz_convert("UTC") - return left, right + # GH#39328, GH#45357, GH#60080 + # If both timezones are the same, no need to convert to UTC + if self.tz == other.tz: + return self, other + else: + left = self.tz_convert("UTC") + right = other.tz_convert("UTC") + return left, right return self, other @final diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7ef6efad0ff6f..dd314ce454e4c 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -201,6 +201,46 @@ def test_union_same_timezone_different_units(self): expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us") tm.assert_index_equal(result, expected) + def test_setops_same_nonzero_timezone_different_units(self): + # GH 60080 - fix timezone being changed to UTC when units differ + # but timezone is the same + tz = "UTC+05:00" + idx1 = date_range("2000-01-01", periods=3, tz=tz).as_unit("us") + idx2 = date_range("2000-01-01", periods=3, tz=tz).as_unit("ns") + + # Check pre-conditions + assert idx1.tz == idx2.tz + assert idx1.dtype != idx2.dtype # Different units + + # Test union preserves timezone when units differ + result = idx1.union(idx2) + expected = date_range("2000-01-01", periods=3, tz=tz).as_unit("ns") + tm.assert_index_equal(result, expected) + assert result.tz == idx1.tz # Original timezone is preserved + + # Test with different dates to ensure it's not just returning one of the inputs + idx3 = date_range("2000-01-03", periods=3, tz=tz).as_unit("us") + result = idx1.union(idx3) + expected = DatetimeIndex( + ["2000-01-01", "2000-01-02", "2000-01-03", "2000-01-04", "2000-01-05"], + tz=tz, + ).as_unit("us") + tm.assert_index_equal(result, expected) + assert result.tz == idx1.tz # Original timezone is preserved + + # Test intersection + result = idx1.intersection(idx2) + expected = date_range("2000-01-01", periods=3, tz=tz).as_unit("ns") + tm.assert_index_equal(result, expected) + assert result.tz == idx1.tz # Original timezone is preserved + + # Test symmetric_difference + idx4 = date_range("2000-01-02", periods=3, tz=tz).as_unit("ns") + result = idx1.symmetric_difference(idx4) + expected = DatetimeIndex(["2000-01-01", "2000-01-04"], tz=tz).as_unit("ns") + tm.assert_index_equal(result, expected) + assert result.tz == idx1.tz # Original timezone is preserved + # TODO: moved from test_datetimelike; de-duplicate with version below def test_intersection2(self): first = date_range("2020-01-01", periods=10) From 6e86773c9c151df75b9cbc4a41e8a14fb0d21820 Mon Sep 17 00:00:00 2001 From: myenugula Date: Fri, 25 Apr 2025 17:21:53 +0800 Subject: [PATCH 2/3] TST: Split timezone preservation test into separate tests Address review comments on PR #60080 by splitting the comprehensive test into separate focused tests for each set operation (union, intersection, symmetric_difference). --- pandas/tests/indexes/datetimes/test_setops.py | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index dd314ce454e4c..076d5f5950bac 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -201,7 +201,7 @@ def test_union_same_timezone_different_units(self): expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us") tm.assert_index_equal(result, expected) - def test_setops_same_nonzero_timezone_different_units(self): + def test_union_same_nonzero_timezone_different_units(self): # GH 60080 - fix timezone being changed to UTC when units differ # but timezone is the same tz = "UTC+05:00" @@ -218,8 +218,14 @@ def test_setops_same_nonzero_timezone_different_units(self): tm.assert_index_equal(result, expected) assert result.tz == idx1.tz # Original timezone is preserved - # Test with different dates to ensure it's not just returning one of the inputs + def test_union_different_dates_same_timezone_different_units(self): + # GH 60080 - fix timezone being changed to UTC when units differ + # but timezone is the same + tz = "UTC+05:00" + idx1 = date_range("2000-01-01", periods=3, tz=tz).as_unit("us") idx3 = date_range("2000-01-03", periods=3, tz=tz).as_unit("us") + + # Test with different dates to ensure it's not just returning one of the inputs result = idx1.union(idx3) expected = DatetimeIndex( ["2000-01-01", "2000-01-02", "2000-01-03", "2000-01-04", "2000-01-05"], @@ -228,14 +234,35 @@ def test_setops_same_nonzero_timezone_different_units(self): tm.assert_index_equal(result, expected) assert result.tz == idx1.tz # Original timezone is preserved + def test_intersection_same_timezone_different_units(self): + # GH 60080 - fix timezone being changed to UTC when units differ + # but timezone is the same + tz = "UTC+05:00" + idx1 = date_range("2000-01-01", periods=3, tz=tz).as_unit("us") + idx2 = date_range("2000-01-01", periods=3, tz=tz).as_unit("ns") + + # Check pre-conditions + assert idx1.tz == idx2.tz + assert idx1.dtype != idx2.dtype # Different units + # Test intersection result = idx1.intersection(idx2) expected = date_range("2000-01-01", periods=3, tz=tz).as_unit("ns") tm.assert_index_equal(result, expected) assert result.tz == idx1.tz # Original timezone is preserved - # Test symmetric_difference + def test_symmetric_difference_same_timezone_different_units(self): + # GH 60080 - fix timezone being changed to UTC when units differ + # but timezone is the same + tz = "UTC+05:00" + idx1 = date_range("2000-01-01", periods=3, tz=tz).as_unit("us") idx4 = date_range("2000-01-02", periods=3, tz=tz).as_unit("ns") + + # Check pre-conditions + assert idx1.tz == idx4.tz + assert idx1.dtype != idx4.dtype # Different units + + # Test symmetric_difference result = idx1.symmetric_difference(idx4) expected = DatetimeIndex(["2000-01-01", "2000-01-04"], tz=tz).as_unit("ns") tm.assert_index_equal(result, expected) From 9360ba03b7ea64a62a6c6548849aba8faf6f9316 Mon Sep 17 00:00:00 2001 From: myenugula Date: Sun, 27 Apr 2025 18:44:47 +0800 Subject: [PATCH 3/3] Remove unnecessary assert result.tz == idx1.tz --- pandas/tests/indexes/datetimes/test_setops.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 076d5f5950bac..7a68cb867c94e 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -216,7 +216,6 @@ def test_union_same_nonzero_timezone_different_units(self): result = idx1.union(idx2) expected = date_range("2000-01-01", periods=3, tz=tz).as_unit("ns") tm.assert_index_equal(result, expected) - assert result.tz == idx1.tz # Original timezone is preserved def test_union_different_dates_same_timezone_different_units(self): # GH 60080 - fix timezone being changed to UTC when units differ @@ -232,7 +231,6 @@ def test_union_different_dates_same_timezone_different_units(self): tz=tz, ).as_unit("us") tm.assert_index_equal(result, expected) - assert result.tz == idx1.tz # Original timezone is preserved def test_intersection_same_timezone_different_units(self): # GH 60080 - fix timezone being changed to UTC when units differ @@ -249,7 +247,6 @@ def test_intersection_same_timezone_different_units(self): result = idx1.intersection(idx2) expected = date_range("2000-01-01", periods=3, tz=tz).as_unit("ns") tm.assert_index_equal(result, expected) - assert result.tz == idx1.tz # Original timezone is preserved def test_symmetric_difference_same_timezone_different_units(self): # GH 60080 - fix timezone being changed to UTC when units differ @@ -266,7 +263,6 @@ def test_symmetric_difference_same_timezone_different_units(self): result = idx1.symmetric_difference(idx4) expected = DatetimeIndex(["2000-01-01", "2000-01-04"], tz=tz).as_unit("ns") tm.assert_index_equal(result, expected) - assert result.tz == idx1.tz # Original timezone is preserved # TODO: moved from test_datetimelike; de-duplicate with version below def test_intersection2(self):