diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e6fafc8b1b14c..7b252a0e8a03e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -416,6 +416,7 @@ Other Deprecations - Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`) - Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`) - Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`) +- Deprecated empty string in :class:`Timestamp` (:issue:`61149`) - Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`) - Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) - Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 23197b9a55afc..9c56f96fc138b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2587,6 +2587,14 @@ class Timestamp(_Timestamp): tzinfo is None): return ts_input elif isinstance(ts_input, str): + if ts_input == "": + warnings.warn( + "Passing an empty string to Timestamp is deprecated and will raise " + "a ValueError in a future version.", + FutureWarning, + stacklevel = find_stack_level() + ) + # User passed a date string to parse. # Check that the user didn't also pass a date attribute kwarg. if any(arg is not None for arg in _date_attributes): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8838fc7eed2f7..aea75d29e361e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -39,7 +39,10 @@ to_timedelta, ) import pandas._testing as tm -from pandas.api.types import is_scalar +from pandas.api.types import ( + is_datetime64_any_dtype, + is_scalar, +) from pandas.core.indexing import _one_ellipsis_message from pandas.tests.indexing.common import check_indexing_smoketest_or_raises @@ -2084,7 +2087,21 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): assert key not in index # otherwise test is invalid # TODO: using a tuple key breaks here in many cases - exp_index = index.insert(len(index), key) + msg = "Passing an empty string to Timestamp" + contains_datetime = False + if isinstance(index, MultiIndex): + for i in range(index.nlevels): + if is_datetime64_any_dtype(index.get_level_values(i)): + contains_datetime = True + break + if contains_datetime: + with tm.assert_produces_warning(FutureWarning, match=msg): + exp_index = index.insert(len(index), key) + else: + exp_index = index.insert(len(index), key) + else: + exp_index = index.insert(len(index), key) + if isinstance(index, MultiIndex): assert exp_index[-1][0] == key else: @@ -2094,19 +2111,32 @@ def test_loc_setitem_with_expansion_nonunique_index(self, index): # Add new row, but no new columns df = orig.copy() - df.loc[key, 0] = N + if contains_datetime: + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[key, 0] = N + else: + df.loc[key, 0] = N tm.assert_frame_equal(df, expected) # add new row on a Series ser = orig.copy()[0] - ser.loc[key] = N + if contains_datetime: + with tm.assert_produces_warning(FutureWarning, match=msg): + ser.loc[key] = N + else: + ser.loc[key] = N + tm.assert_frame_equal(df, expected) # the series machinery lets us preserve int dtype instead of float expected = expected[0].astype(np.int64) tm.assert_series_equal(ser, expected) # add new row and new column df = orig.copy() - df.loc[key, 1] = N + if contains_datetime: + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[key, 1] = N + else: + df.loc[key, 1] = N expected = DataFrame( {0: list(arr) + [np.nan], 1: [np.nan] * N + [float(N)]}, index=exp_index, diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 6d762fdeb8d79..d38387e523da9 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -302,7 +302,9 @@ def test_to_csv_date_format_in_categorical(self): ser = pd.Series(pd.to_datetime(["2021-03-27", pd.NaT], format="%Y-%m-%d")) ser = ser.astype("category") expected = tm.convert_rows_list_to_csv_str(["0", "2021-03-27", '""']) - assert ser.to_csv(index=False) == expected + msg = "Passing an empty string to Timestamp" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert ser.to_csv(index=False) == expected ser = pd.Series( pd.date_range( @@ -310,7 +312,9 @@ def test_to_csv_date_format_in_categorical(self): ).append(pd.DatetimeIndex([pd.NaT])) ) ser = ser.astype("category") - assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected + msg = "Passing an empty string to Timestamp" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert ser.to_csv(index=False, date_format="%Y-%m-%d") == expected def test_to_csv_float_ea_float_format(self): # GH#45991 diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index b20df43dd49a6..c274670e2acdc 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -109,7 +109,13 @@ def test_nat_vector_field_access(): "value", [None, np.nan, iNaT, float("nan"), NaT, "NaT", "nat", "", "NAT"] ) def test_identity(klass, value): - assert klass(value) is NaT + if value == "" and klass == Timestamp: + msg = "Passing an empty string to Timestamp" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = klass(value) + else: + result = klass(value) + assert result is NaT @pytest.mark.parametrize("klass", [Timestamp, Timedelta]) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 2c97c4a32e0aa..d7721b008ccad 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -62,6 +62,11 @@ def test_constructor_float_not_round_with_YM_unit_raises(self): with pytest.raises(ValueError, match=msg): Timestamp(150.5, unit="M") + def test_constructor_with_empty_string(self): + msg = "Passing an empty string to Timestamp" + with tm.assert_produces_warning(FutureWarning, match=msg): + Timestamp("") + @pytest.mark.parametrize( "value, check_kwargs", [ diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a23e6d9b3973a..321a68cad2f38 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -313,7 +313,9 @@ def test_multiindex_dt_with_nan(self): names=[None, "Date"], ), ) - df = df.reset_index() + msg = "Passing an empty string to Timestamp" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = df.reset_index() result = df[df.columns[0]] expected = Series(["a", "b", "c", "d"], name=("sub", np.nan)) tm.assert_series_equal(result, expected)