Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,16 @@
from contextlib import suppress
from html import escape
from textwrap import dedent
from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, Union, overload
from typing import (
TYPE_CHECKING,
Any,
Concatenate,
Literal,
ParamSpec,
TypeVar,
Union,
overload,
)

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -925,6 +934,7 @@ def _resample(
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
restore_coord_dims: bool | None,
boundaries: Literal["exact", "trim"] | None = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we use drop_incomplete: bool like in SeasonResampler? Note that you could be missing data in the middle too...

**indexer_kwargs: ResampleCompatible | Resampler,
) -> T_Resample:
"""Returns a Resample object for performing resampling operations.
Expand Down Expand Up @@ -960,6 +970,11 @@ def _resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
boundaries : {"exact", "trim"}, optional
How to handle boundaries when the data doesn't evenly fit the resampling
frequency. If 'exact', a ValueError will be raised if the data doesn't
evenly fit. If 'trim', incomplete periods are dropped. If None (default),
uses the current behavior (includes incomplete periods).
**indexer_kwargs : {dim: freq}
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.
Expand Down Expand Up @@ -1107,7 +1122,12 @@ def _resample(
grouper: Resampler
if isinstance(freq, ResampleCompatible):
grouper = TimeResampler(
freq=freq, closed=closed, label=label, origin=origin, offset=offset
freq=freq,
closed=closed,
label=label,
origin=origin,
offset=offset,
boundaries=boundaries,
)
elif isinstance(freq, Resampler):
grouper = freq
Expand Down
7 changes: 7 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -7433,6 +7433,7 @@ def resample(
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
restore_coord_dims: bool | None = None,
boundaries: Literal["exact", "trim"] | None = None,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> DataArrayResample:
"""Returns a Resample object for performing resampling operations.
Expand Down Expand Up @@ -7468,6 +7469,11 @@ def resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
boundaries : {"exact", "trim"}, optional
How to handle boundaries when the data doesn't evenly fit the resampling
frequency. If 'exact', a ValueError will be raised if the data doesn't
evenly fit. If 'trim', incomplete periods are dropped. If None (default),
uses the current behavior (includes incomplete periods).
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.
Expand Down Expand Up @@ -7572,6 +7578,7 @@ def resample(
offset=offset,
origin=origin,
restore_coord_dims=restore_coord_dims,
boundaries=boundaries,
**indexer_kwargs,
)

Expand Down
7 changes: 7 additions & 0 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10371,6 +10371,7 @@ def resample(
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
restore_coord_dims: bool | None = None,
boundaries: Literal["exact", "trim"] | None = None,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> DatasetResample:
"""Returns a Resample object for performing resampling operations.
Expand Down Expand Up @@ -10406,6 +10407,11 @@ def resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
boundaries : {"exact", "trim"}, optional
How to handle boundaries when the data doesn't evenly fit the resampling
frequency. If 'exact', a ValueError will be raised if the data doesn't
evenly fit. If 'trim', incomplete periods are dropped. If None (default),
uses the current behavior (includes incomplete periods).
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.
Expand Down Expand Up @@ -10438,6 +10444,7 @@ def resample(
offset=offset,
origin=origin,
restore_coord_dims=restore_coord_dims,
boundaries=boundaries,
**indexer_kwargs,
)

Expand Down
40 changes: 40 additions & 0 deletions xarray/groupers.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,13 +484,19 @@ class TimeResampler(Resampler):
- 'end_day': `origin` is the ceiling midnight of the last day
offset : pd.Timedelta, datetime.timedelta, or str, default is None
An offset timedelta added to the origin.
boundaries : {"exact", "trim"}, optional
How to handle boundaries when the data doesn't evenly fit the resampling
frequency. If 'exact', a ValueError will be raised if the data doesn't
evenly fit. If 'trim', incomplete periods are dropped. If None (default),
uses the current behavior (includes incomplete periods).
"""

freq: ResampleCompatible
closed: SideOptions | None = field(default=None)
label: SideOptions | None = field(default=None)
origin: str | DatetimeLike = field(default="start_day")
offset: pd.Timedelta | datetime.timedelta | str | None = field(default=None)
boundaries: Literal["exact", "trim"] | None = field(default=None, kw_only=True)

index_grouper: CFTimeGrouper | pd.Grouper = field(init=False, repr=False)
group_as_index: pd.Index = field(init=False, repr=False)
Expand All @@ -502,6 +508,7 @@ def reset(self) -> Self:
label=self.label,
origin=self.origin,
offset=self.offset,
boundaries=self.boundaries,
)

def _init_properties(self, group: T_Group) -> None:
Expand Down Expand Up @@ -566,6 +573,39 @@ def factorize(self, group: T_Group) -> EncodedGroups:
self._init_properties(group)
full_index, first_items, codes_ = self._get_index_and_items()
sbins = first_items.values.astype(np.int64)

# Handle boundaries parameter for exact checking and trim logic
if self.boundaries == "exact":
# Check if data evenly fits the resampling frequency
counts = np.bincount(codes_)
expected_points = len(group) // len(first_items)
incomplete_periods = counts < expected_points

if np.any(incomplete_periods):
raise ValueError(
f"Data does not evenly fit the resampling frequency. "
f"Expected {expected_points} points per period, but found periods with "
f"{counts[incomplete_periods]} points. Use boundaries='trim' "
f"to handle incomplete periods."
)
elif self.boundaries == "trim":
# Apply trim logic: set codes to -1 for incomplete periods
counts = np.bincount(codes_)

if len(counts) > 0:
# Find the most common count (expected points per period)
unique_counts, count_frequencies = np.unique(counts, return_counts=True)
most_common_count = unique_counts[np.argmax(count_frequencies)]

# Identify incomplete periods
incomplete_periods = counts < most_common_count

if np.any(incomplete_periods):
# Find which data points belong to incomplete periods
incomplete_codes = np.where(incomplete_periods)[0]
# Set codes to -1 for points in incomplete periods
codes_[np.isin(codes_, incomplete_codes)] = -1

group_indices: GroupIndices = tuple(
list(itertools.starmap(slice, pairwise(sbins))) + [slice(sbins[-1], None)]
)
Expand Down
18 changes: 18 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2081,6 +2081,24 @@
expected = DataArray([np.nan, 1, 1], [("time", times[::4])])
assert_identical(result, expected)

def test_resample_boundaries(self) -> None:
"""Test the boundaries parameter for resample."""
# Create 31-day data with predictable values (0-30)
times = pd.date_range("2000-01-01", periods=31, freq="D")
array = DataArray(np.arange(31), [("time", times)])

# Test boundaries="trim" - drops incomplete periods
result_trim = array.resample(time="7D", boundaries="trim").mean()
assert len(result_trim.time) == 4

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.11 bare-minimum

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., 29.])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.11 bare-min-and-scipy

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., 29.])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12 all-but-dask

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., nan])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.13 all-but-numba

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., nan])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.11 min-all-deps

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., nan])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.11

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., nan])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.13

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., nan])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.13

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., nan])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time

Check failure on line 2092 in xarray/tests/test_groupby.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.11

TestDataArrayResample.test_resample_boundaries AssertionError: assert 5 == 4 + where 5 = len(<xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29) + where <xarray.DataArray 'time' (time: 5)> Size: 40B\narray(['2000-01-01T00:00:00.000000000', '2000-01-08T00:00:00.000000000',...000'], dtype='datetime64[ns]')\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29 = <xarray.DataArray (time: 5)> Size: 40B\narray([ 3., 10., 17., 24., nan])\nCoordinates:\n * time (time) datetime64[ns] 40B 2000-01-01 2000-01-08 ... 2000-01-29.time
expected_trim = np.array([3.0, 10.0, 17.0, 24.0])
np.testing.assert_array_equal(result_trim.values, expected_trim)

# Test boundaries="exact" - raises error for uneven data
with pytest.raises(
ValueError, match="Data does not evenly fit the resampling frequency"
):
array.resample(time="7D", boundaries="exact").mean()

def test_upsample(self) -> None:
times = pd.date_range("2000-01-01", freq="6h", periods=5)
array = DataArray(np.arange(5), [("time", times)])
Expand Down
Loading