Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable resampling on PeriodIndex #2687

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,7 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
# TODO support non-string indexer after removing the old API.

from .dataarray import DataArray
from .variable import IndexVariable
from .resample import RESAMPLE_DIM
from ..coding.cftimeindex import CFTimeIndex

Expand All @@ -745,10 +746,7 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
)
dim, freq = indexer.popitem()

dim_name = dim
dim_coord = self[dim]

if isinstance(self.indexes[dim_name], CFTimeIndex):
if isinstance(self.indexes[dim], CFTimeIndex):
raise NotImplementedError(
'Resample is currently not supported along a dimension '
'indexed by a CFTimeIndex. For certain kinds of downsampling '
Expand All @@ -761,12 +759,12 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
'errors.'
)

group = DataArray(dim_coord, coords=dim_coord.coords,
dims=dim_coord.dims, name=RESAMPLE_DIM)
group = IndexVariable(
data=self.indexes[dim], dims=[dim], name=RESAMPLE_DIM)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could alternatively fix this by replace dim_coord in the data argument with dim_coord.to_index(), e.g., compare:

ipdb> DataArray(dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM).to_index()
Index([2000-01-01 00:00, 2000-01-01 06:00, 2000-01-01 12:00, 2000-01-01 18:00,
       2000-01-02 00:00, 2000-01-02 06:00, 2000-01-02 12:00, 2000-01-02 18:00,
       2000-01-03 00:00, 2000-01-03 06:00],
      dtype='object', name='time')
ipdb> DataArray(dim_coord.to_index(), coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM).to_index()
PeriodIndex(['2000-01-01 00:00', '2000-01-01 06:00', '2000-01-01 12:00',
             '2000-01-01 18:00', '2000-01-02 00:00', '2000-01-02 06:00',
             '2000-01-02 12:00', '2000-01-02 18:00', '2000-01-03 00:00',
             '2000-01-03 06:00'],
            dtype='period[6H]', name='time', freq='6H')

We go to some effort to preserve pandas.Index classes when passed into DataArray/Variable but apparently don't do it consistently everywhere. So there's also probably a lower level fix somewhere (perhaps in xarray.core.variable.as_compatible_data?) to ensure that this happens more consistently.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great! I'll have a look

# TODO: to_offset() call required for pandas==0.19.2
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base,
loffset=pd.tseries.frequencies.to_offset(loffset))
resampler = self._resample_cls(self, group=group, dim=dim_name,
resampler = self._resample_cls(self, group=group, dim=dim,
grouper=grouper,
resample_dim=RESAMPLE_DIM)

Expand Down
9 changes: 7 additions & 2 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1789,12 +1789,17 @@ class IndexVariable(Variable):
unless another name is given.
"""

def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False, name=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather not add an explicit name parameter to IndexVariable.

If anything, I would think about removing the name property. Long term, I'd like to rename IndexVariable -> ImmutableVariable and move the pandas adapter logic into class that we can pass into the data argument of Variable.

super(IndexVariable, self).__init__(dims, data, attrs, encoding,
fastpath)
if self.ndim != 1:
raise ValueError('%s objects must be 1-dimensional' %
type(self).__name__)

if name is None:
self._name = self.dims[0]
else:
self._name = name

# Unlike in Variable, always eagerly load values into memory
if not isinstance(self._data, PandasIndexAdapter):
Expand Down Expand Up @@ -1956,7 +1961,7 @@ def get_level_variable(self, level):

@property
def name(self):
return self.dims[0]
return self._name

@name.setter
def name(self, value):
Expand Down
8 changes: 8 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2399,6 +2399,14 @@ def test_resample_keep_attrs(self):
attrs=array.attrs)
assert_identical(result, expected)

def test_resample_period_index(self):
times = pd.period_range('2000-01-01', freq='6H', periods=10)
array = DataArray(np.arange(10), [('time', times)])

actual = array.resample(time='24H').mean()
expected = DataArray(array.to_series().resample('24H').mean())
assert_identical(expected, actual)

def test_resample_skipna(self):
times = pd.date_range('2000-01-01', freq='6H', periods=10)
array = DataArray(np.ones(10), [('time', times)])
Expand Down