Skip to content

Commit

Permalink
use mean of min/max years as offset in caclulation of datetime64 mean
Browse files Browse the repository at this point in the history
  • Loading branch information
kmuehlbauer committed Feb 7, 2025
1 parent d57f05c commit 86f37b1
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 6 deletions.
15 changes: 9 additions & 6 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,8 +549,8 @@ def array_any(array, axis=None, keepdims=False, **kwargs):
_mean = _create_nan_agg_method("mean", invariant_0d=True)


def _datetime_nanmin(array):
"""nanmin() function for datetime64.
def _datetime_nanreduce(array, func):
"""nanreduce() function for datetime64.
Caveats that this function deals with:
Expand All @@ -562,7 +562,7 @@ def _datetime_nanmin(array):
assert dtypes.is_datetime_like(dtype)
# (NaT).astype(float) does not produce NaN...
array = where(pandas_isnull(array), np.nan, array.astype(float))
array = min(array, skipna=True)
array = func(array, skipna=True)
if isinstance(array, float):
array = np.array(array)
# ...but (NaN).astype("M8") does produce NaT
Expand Down Expand Up @@ -597,7 +597,7 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
# Set offset to minimum if not given
if offset is None:
if dtypes.is_datetime_like(array.dtype):
offset = _datetime_nanmin(array)
offset = _datetime_nanreduce(array, min)
else:
offset = min(array)

Expand Down Expand Up @@ -717,8 +717,11 @@ def mean(array, axis=None, skipna=None, **kwargs):

array = asarray(array)
if dtypes.is_datetime_like(array.dtype):
offset = _datetime_nanmin(array)

dmin = _datetime_nanreduce(array, min).astype("datetime64[Y]").astype(int)
dmax = _datetime_nanreduce(array, max).astype("datetime64[Y]").astype(int)
offset = (
np.array((dmin + dmax) // 2).astype("datetime64[Y]").astype(array.dtype)
)
# From version 2025.01.2 xarray uses np.datetime64[unit], where unit
# is one of "s", "ms", "us", "ns".
# To not have to worry about the resolution, we just convert the output
Expand Down
13 changes: 13 additions & 0 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,19 @@ def test_cftime_datetime_mean(dask):
assert_equal(result, expected)


@pytest.mark.parametrize("dask", [False, True])
def test_mean_over_long_spanning_datetime64(dask) -> None:
if dask and not has_dask:
pytest.skip("requires dask")
array = np.array(["1678-01-01", "NaT", "2260-01-01"], dtype="datetime64[ns]")
da = DataArray(array, dims=["time"])
if dask:
da = da.chunk({"time": 2})
expected = DataArray(np.array("1969-01-01", dtype="datetime64[ns]"))
result = da.mean()
assert_equal(result, expected)


@requires_cftime
@requires_dask
def test_mean_over_non_time_dim_of_dataset_with_dask_backed_cftime_data():
Expand Down

0 comments on commit 86f37b1

Please sign in to comment.