Skip to content

Commit 00a7c41

Browse files
authored
REF: inline array_to_datetime64 cases, update tests (#63015)
1 parent 5641979 commit 00a7c41

File tree

10 files changed

+96
-107
lines changed

10 files changed

+96
-107
lines changed

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 63 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -333,58 +333,39 @@ cdef convert_to_timedelta64(object ts, str unit):
333333
334334
Handle these types of objects:
335335
- timedelta/Timedelta
336-
- timedelta64
337-
- an offset
338-
- np.int64 (with unit providing a possible modifier)
339-
- None/NaT
340336
341-
Return an ns based int64
337+
Return an timedelta64[ns] object
342338
"""
343339
# Caller is responsible for checking unit not in ["Y", "y", "M"]
344-
if checknull_with_nat_and_na(ts):
345-
return np.timedelta64(NPY_NAT, "ns")
346-
elif isinstance(ts, _Timedelta):
340+
if isinstance(ts, _Timedelta):
347341
# already in the proper format
348342
if ts._creso != NPY_FR_ns:
349343
ts = ts.as_unit("ns").asm8
350344
else:
351345
ts = np.timedelta64(ts._value, "ns")
352-
elif cnp.is_timedelta64_object(ts):
353-
ts = ensure_td64ns(ts)
354-
elif is_integer_object(ts):
355-
if ts == NPY_NAT:
356-
return np.timedelta64(NPY_NAT, "ns")
357-
else:
358-
ts = _maybe_cast_from_unit(ts, unit)
359-
elif is_float_object(ts):
360-
ts = _maybe_cast_from_unit(ts, unit)
361-
elif isinstance(ts, str):
362-
if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"):
363-
ts = parse_iso_format_string(ts)
364-
else:
365-
ts = parse_timedelta_string(ts)
366-
ts = np.timedelta64(ts, "ns")
367-
elif is_tick_object(ts):
368-
ts = np.timedelta64(ts.nanos, "ns")
369346

370-
if PyDelta_Check(ts):
347+
elif PyDelta_Check(ts):
371348
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
372349
elif not cnp.is_timedelta64_object(ts):
373350
raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}")
374351
return ts.astype("timedelta64[ns]")
375352

376353

377-
cdef _maybe_cast_from_unit(ts, str unit):
354+
cdef _numeric_to_td64ns(object item, str unit):
378355
# caller is responsible for checking
379356
# assert unit not in ["Y", "y", "M"]
357+
# assert is_integer_object(item) or is_float_object(item)
358+
if is_integer_object(item) and item == NPY_NAT:
359+
return np.timedelta64(NPY_NAT, "ns")
360+
380361
try:
381-
ts = cast_from_unit(ts, unit)
362+
item = cast_from_unit(item, unit)
382363
except OutOfBoundsDatetime as err:
383364
raise OutOfBoundsTimedelta(
384-
f"Cannot cast {ts} from {unit} to 'ns' without overflow."
365+
f"Cannot cast {item} from {unit} to 'ns' without overflow."
385366
) from err
386367

387-
ts = np.timedelta64(ts, "ns")
368+
ts = np.timedelta64(item, "ns")
388369
return ts
389370

390371

@@ -408,10 +389,11 @@ def array_to_timedelta64(
408389
cdef:
409390
Py_ssize_t i, n = values.size
410391
ndarray result = np.empty((<object>values).shape, dtype="m8[ns]")
411-
object item
392+
object item, td64ns_obj
412393
int64_t ival
413394
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
414395
cnp.flatiter it
396+
str parsed_unit = parse_timedelta_unit(unit or "ns")
415397

416398
if values.descr.type_num != cnp.NPY_OBJECT:
417399
# raise here otherwise we segfault below
@@ -431,70 +413,63 @@ def array_to_timedelta64(
431413
)
432414
cnp.PyArray_ITER_NEXT(it)
433415

434-
# Usually, we have all strings. If so, we hit the fast path.
435-
# If this path fails, we try conversion a different way, and
436-
# this is where all of the error handling will take place.
437-
try:
438-
for i in range(n):
439-
# Analogous to: item = values[i]
440-
item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
416+
for i in range(n):
417+
item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
441418

442-
ival = _item_to_timedelta64_fastpath(item)
419+
try:
420+
if checknull_with_nat_and_na(item):
421+
ival = NPY_NAT
443422

444-
# Analogous to: iresult[i] = ival
445-
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
423+
elif cnp.is_timedelta64_object(item):
424+
td64ns_obj = ensure_td64ns(item)
425+
ival = cnp.get_timedelta64_value(td64ns_obj)
446426

447-
cnp.PyArray_MultiIter_NEXT(mi)
427+
elif isinstance(item, _Timedelta):
428+
if item._creso != NPY_FR_ns:
429+
ival = item.as_unit("ns")._value
430+
else:
431+
ival = item._value
432+
433+
elif PyDelta_Check(item):
434+
# i.e. isinstance(item, timedelta)
435+
ival = delta_to_nanoseconds(item)
436+
437+
elif isinstance(item, str):
438+
if (
439+
(len(item) > 0 and item[0] == "P")
440+
or (len(item) > 1 and item[:2] == "-P")
441+
):
442+
ival = parse_iso_format_string(item)
443+
else:
444+
ival = parse_timedelta_string(item)
448445

449-
except (TypeError, ValueError):
450-
cnp.PyArray_MultiIter_RESET(mi)
446+
elif is_tick_object(item):
447+
ival = item.nanos
451448

452-
parsed_unit = parse_timedelta_unit(unit or "ns")
453-
for i in range(n):
454-
item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
449+
elif is_integer_object(item) or is_float_object(item):
450+
td64ns_obj = _numeric_to_td64ns(item, parsed_unit)
451+
ival = cnp.get_timedelta64_value(td64ns_obj)
455452

456-
ival = _item_to_timedelta64(item, parsed_unit, errors)
453+
else:
454+
raise TypeError(f"Invalid type for timedelta scalar: {type(item)}")
455+
456+
except ValueError as err:
457+
if errors == "coerce":
458+
ival = NPY_NAT
459+
elif "unit abbreviation w/o a number" in str(err):
460+
# re-raise with more pertinent message
461+
msg = f"Could not convert '{item}' to NumPy timedelta"
462+
raise ValueError(msg) from err
463+
else:
464+
raise
457465

458-
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
466+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
459467

460-
cnp.PyArray_MultiIter_NEXT(mi)
468+
cnp.PyArray_MultiIter_NEXT(mi)
461469

462470
return result
463471

464472

465-
cdef int64_t _item_to_timedelta64_fastpath(object item) except? -1:
466-
"""
467-
See array_to_timedelta64.
468-
"""
469-
if item is NaT:
470-
# we allow this check in the fast-path because NaT is a C-object
471-
# so this is an inexpensive check
472-
return NPY_NAT
473-
else:
474-
return parse_timedelta_string(item)
475-
476-
477-
cdef int64_t _item_to_timedelta64(
478-
object item,
479-
str parsed_unit,
480-
str errors
481-
) except? -1:
482-
"""
483-
See array_to_timedelta64.
484-
"""
485-
try:
486-
return cnp.get_timedelta64_value(convert_to_timedelta64(item, parsed_unit))
487-
except ValueError as err:
488-
if errors == "coerce":
489-
return NPY_NAT
490-
elif "unit abbreviation w/o a number" in str(err):
491-
# re-raise with more pertinent message
492-
msg = f"Could not convert '{item}' to NumPy timedelta"
493-
raise ValueError(msg) from err
494-
else:
495-
raise
496-
497-
498473
@cython.cpow(True)
499474
cdef int64_t parse_timedelta_string(str ts) except? -1:
500475
"""
@@ -2154,12 +2129,14 @@ class Timedelta(_Timedelta):
21542129
new_value = delta_to_nanoseconds(value, reso=new_reso)
21552130
return cls._from_value_and_reso(new_value, reso=new_reso)
21562131

2132+
elif checknull_with_nat_and_na(value):
2133+
return NaT
2134+
21572135
elif is_integer_object(value) or is_float_object(value):
21582136
# unit=None is de-facto 'ns'
21592137
unit = parse_timedelta_unit(unit)
2160-
value = convert_to_timedelta64(value, unit)
2161-
elif checknull_with_nat_and_na(value):
2162-
return NaT
2138+
value = _numeric_to_td64ns(value, unit)
2139+
21632140
else:
21642141
raise ValueError(
21652142
"Value must be Timedelta, string, integer, "

pandas/tests/arithmetic/test_datetime64.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,9 @@ def test_dt64arr_sub_timestamp_tzaware(self, box_with_array):
10111011

10121012
ser = tm.box_expected(ser, box_with_array)
10131013

1014-
delta_series = Series([np.timedelta64(0, "D"), np.timedelta64(1, "D")])
1014+
delta_series = Series(
1015+
[np.timedelta64(0, "D"), np.timedelta64(1, "D")], dtype="m8[ns]"
1016+
)
10151017
expected = tm.box_expected(delta_series, box_with_array)
10161018

10171019
tm.assert_equal(ser - ts, expected)

pandas/tests/arithmetic/test_period.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,7 +1641,9 @@ def test_pi_sub_period(self):
16411641
result = np.subtract(Period("2012-01", freq="M"), idx)
16421642
tm.assert_index_equal(result, exp)
16431643

1644-
exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx")
1644+
exp = TimedeltaIndex(
1645+
[np.nan, np.nan, np.nan, np.nan], name="idx", dtype="m8[ns]"
1646+
)
16451647
result = idx - Period("NaT", freq="M")
16461648
tm.assert_index_equal(result, exp)
16471649
assert result.freq == exp.freq
@@ -1655,7 +1657,7 @@ def test_pi_sub_pdnat(self):
16551657
idx = PeriodIndex(
16561658
["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx"
16571659
)
1658-
exp = TimedeltaIndex([pd.NaT] * 4, name="idx")
1660+
exp = TimedeltaIndex([pd.NaT] * 4, name="idx", dtype="m8[ns]")
16591661
tm.assert_index_equal(pd.NaT - idx, exp)
16601662
tm.assert_index_equal(idx - pd.NaT, exp)
16611663

@@ -1674,6 +1676,8 @@ def test_pi_sub_period_nat(self):
16741676
exp = pd.Index([12 * off, pd.NaT, 10 * off, 9 * off], name="idx")
16751677
tm.assert_index_equal(result, exp)
16761678

1677-
exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx")
1679+
exp = TimedeltaIndex(
1680+
[np.nan, np.nan, np.nan, np.nan], name="idx", dtype="m8[ns]"
1681+
)
16781682
tm.assert_index_equal(idx - Period("NaT", freq="M"), exp)
16791683
tm.assert_index_equal(Period("NaT", freq="M") - idx, exp)

pandas/tests/arithmetic/test_timedelta64.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,7 @@ def test_operators_timedelta64(self):
850850
assert rs.dtype == "timedelta64[ns]"
851851

852852
df = DataFrame({"A": v1})
853-
td = Series([timedelta(days=i) for i in range(3)])
853+
td = Series([timedelta(days=i) for i in range(3)], dtype="m8[ns]")
854854
assert td.dtype == "timedelta64[ns]"
855855

856856
# series on the rhs
@@ -875,7 +875,9 @@ def test_operators_timedelta64(self):
875875

876876
# datetimes on rhs
877877
result = df["A"] - datetime(2001, 1, 1)
878-
expected = Series([timedelta(days=4017 + i) for i in range(3)], name="A")
878+
expected = Series(
879+
[timedelta(days=4017 + i) for i in range(3)], name="A", dtype="m8[ns]"
880+
)
879881
tm.assert_series_equal(result, expected)
880882
assert result.dtype == "m8[ns]"
881883

@@ -1559,7 +1561,7 @@ def test_tdi_rmul_arraylike(self, other, box_with_array):
15591561

15601562
def test_td64arr_mul_bool_scalar_raises(self, box_with_array):
15611563
# GH#58054
1562-
ser = Series(np.arange(5) * timedelta(hours=1))
1564+
ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]")
15631565
obj = tm.box_expected(ser, box_with_array)
15641566

15651567
msg = r"Cannot multiply 'timedelta64\[ns\]' by bool"
@@ -1582,7 +1584,7 @@ def test_td64arr_mul_bool_scalar_raises(self, box_with_array):
15821584
)
15831585
def test_td64arr_mul_bool_raises(self, dtype, box_with_array):
15841586
# GH#58054
1585-
ser = Series(np.arange(5) * timedelta(hours=1))
1587+
ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]")
15861588
obj = tm.box_expected(ser, box_with_array)
15871589

15881590
other = Series(np.arange(5) < 0.5, dtype=dtype)
@@ -1611,7 +1613,7 @@ def test_td64arr_mul_bool_raises(self, dtype, box_with_array):
16111613
],
16121614
)
16131615
def test_td64arr_mul_masked(self, dtype, box_with_array):
1614-
ser = Series(np.arange(5) * timedelta(hours=1))
1616+
ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]")
16151617
obj = tm.box_expected(ser, box_with_array)
16161618

16171619
other = Series(np.arange(5), dtype=dtype)

pandas/tests/frame/test_reductions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -747,12 +747,14 @@ def test_operators_timedelta64(self):
747747

748748
# works when only those columns are selected
749749
result = mixed[["A", "B"]].min(axis=1)
750-
expected = Series([timedelta(days=-1)] * 3)
750+
expected = Series([timedelta(days=-1)] * 3, dtype="m8[ns]")
751751
tm.assert_series_equal(result, expected)
752752

753753
result = mixed[["A", "B"]].min()
754754
expected = Series(
755-
[timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], index=["A", "B"]
755+
[timedelta(seconds=5 * 60 + 5), timedelta(days=-1)],
756+
index=["A", "B"],
757+
dtype="m8[ns]",
756758
)
757759
tm.assert_series_equal(result, expected)
758760

pandas/tests/indexes/timedeltas/test_formats.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def test_repr_round_days_non_nano(self):
2222

2323
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
2424
def test_representation(self, method):
25-
idx1 = TimedeltaIndex([], freq="D")
25+
idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]")
2626
idx2 = TimedeltaIndex(["1 days"], freq="D")
2727
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
2828
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")
@@ -53,7 +53,7 @@ def test_representation(self, method):
5353

5454
# TODO: this is a Series.__repr__ test
5555
def test_representation_to_series(self):
56-
idx1 = TimedeltaIndex([], freq="D")
56+
idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]")
5757
idx2 = TimedeltaIndex(["1 days"], freq="D")
5858
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
5959
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")
@@ -83,7 +83,7 @@ def test_representation_to_series(self):
8383

8484
def test_summary(self):
8585
# GH#9116
86-
idx1 = TimedeltaIndex([], freq="D")
86+
idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]")
8787
idx2 = TimedeltaIndex(["1 days"], freq="D")
8888
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
8989
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")

pandas/tests/indexes/timedeltas/test_setops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def test_zero_length_input_index(self, sort):
160160
# if no overlap exists return empty index
161161
(
162162
timedelta_range("1 day", periods=10, freq="h", name="idx")[5:],
163-
TimedeltaIndex([], freq="h", name="idx"),
163+
TimedeltaIndex([], freq="h", name="idx", dtype="m8[ns]"),
164164
),
165165
],
166166
)

pandas/tests/io/json/test_pandas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,7 +1131,7 @@ def test_url(self, field, dtype, httpserver):
11311131
def test_timedelta(self):
11321132
converter = lambda x: pd.to_timedelta(x, unit="ms")
11331133

1134-
ser = Series([timedelta(23), timedelta(seconds=5)])
1134+
ser = Series([timedelta(23), timedelta(seconds=5)], dtype="m8[ns]")
11351135
assert ser.dtype == "timedelta64[ns]"
11361136

11371137
msg = (
@@ -1148,7 +1148,7 @@ def test_timedelta(self):
11481148
result = read_json(StringIO(ser.to_json()), typ="series").apply(converter)
11491149
tm.assert_series_equal(result, ser)
11501150

1151-
frame = DataFrame([timedelta(23), timedelta(seconds=5)])
1151+
frame = DataFrame([timedelta(23), timedelta(seconds=5)], dtype="m8[ns]")
11521152
assert frame[0].dtype == "timedelta64[ns]"
11531153

11541154
with tm.assert_produces_warning(Pandas4Warning, match=msg):

pandas/tests/series/test_arithmetic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ def test_sub_datetimelike_align(self):
307307
dt.iloc[2] = np.nan
308308
dt2 = dt[::-1]
309309

310-
expected = Series([timedelta(0), timedelta(0), pd.NaT])
310+
expected = Series([timedelta(0), timedelta(0), pd.NaT], dtype="m8[ns]")
311311
# name is reset
312312
result = dt2 - dt
313313
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)