From fdfc0962e1ac2284a437637c611818c1b5b6a4eb Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 12 Jun 2024 13:46:31 -0400 Subject: [PATCH 01/15] Add days_in_year and decimal_year to dt accessor --- xarray/coding/calendar_ops.py | 69 ++++++++++++++++++++------------ xarray/core/accessor_dt.py | 21 ++++++++++ xarray/tests/test_accessor_dt.py | 13 ++++++ 3 files changed, 77 insertions(+), 26 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index c4fe9e1f4ae..3f55610777b 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -3,6 +3,8 @@ import numpy as np import pandas as pd +from xarray.core.common import full_like +from xarray.core.computation import where from xarray.coding.cftime_offsets import date_range_like, get_date_type from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import _should_cftime_be_used, convert_times @@ -22,14 +24,35 @@ ] -def _days_in_year(year, calendar, use_cftime=True): - """Return the number of days in the input year according to the input calendar.""" - date_type = get_date_type(calendar, use_cftime=use_cftime) - if year == -1 and calendar in _CALENDARS_WITHOUT_YEAR_ZERO: - difference = date_type(year + 2, 1, 1) - date_type(year, 1, 1) +def _leap_gregorian(years): + # A year is a leap year if either (i) it is divisible by 4 but not by 100 or (ii) it is divisible by 400 + return ((years % 4 == 0) & (years % 100 != 0)) | (years % 400 == 0) + + +def _leap_julian(years): + # A year is a leap year if it is divisible by 4, even if it is also divisible by 100 + return years % 4 == 0 + + +def _days_in_year(years, calendar): + """The number of days in each year for the corresponding calendar.""" + if calendar == 'standard': + return 365 + where(years < 1582, _leap_julian(years), _leap_gregorian(years)) * 1 + if calendar == 'proleptic_gregorian': + return 365 + _leap_gregorian(years) * 1 + if calendar == 'julian': + return 365 + _leap_julian(years) * 1 + if calendar in ['noleap', '365_day']: + const = 365 + elif calendar in ['all_leap', '366_day']: + const = 366 + elif calendar == '360_day': + const = 360 else: - difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1) - return difference.days + raise ValueError(f'Calendar {calendar} not recognized.') + if isinstance(years, (float, int)): + return const + return full_like(years, const) def convert_calendar( @@ -188,11 +211,7 @@ def convert_calendar( # Special case for conversion involving 360_day calendar if align_on == "year": # Instead of translating dates directly, this tries to keep the position within a year similar. - new_doy = time.groupby(f"{dim}.year").map( - _interpolate_day_of_year, - target_calendar=calendar, - use_cftime=use_cftime, - ) + new_doy = _interpolate_day_of_year(time, target_calendar=calendar) elif align_on == "random": # The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year. new_doy = time.groupby(f"{dim}.year").map( @@ -232,16 +251,13 @@ def convert_calendar( return out -def _interpolate_day_of_year(time, target_calendar, use_cftime): - """Returns the nearest day in the target calendar of the corresponding - "decimal year" in the source calendar. - """ - year = int(time.dt.year[0]) - source_calendar = time.dt.calendar +def _interpolate_day_of_year(times, target_calendar): + """Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar.""" + source_calendar = times.dt.calendar return np.round( - _days_in_year(year, target_calendar, use_cftime) - * time.dt.dayofyear - / _days_in_year(year, source_calendar, use_cftime) + _days_in_year(times.dt.year, target_calendar) + * times.dt.dayofyear + / _days_in_year(times.dt.year, source_calendar) ).astype(int) @@ -250,18 +266,18 @@ def _random_day_of_year(time, target_calendar, use_cftime): Removes Feb 29th and five other days chosen randomly within five sections of 72 days. """ - year = int(time.dt.year[0]) + year = time.dt.year[0] source_calendar = time.dt.calendar new_doy = np.arange(360) + 1 rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5) if source_calendar == "360_day": for idx in rm_idx: new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1 - if _days_in_year(year, target_calendar, use_cftime) == 366: + if _days_in_year(year, target_calendar) == 366: new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1 elif target_calendar == "360_day": new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1) - if _days_in_year(year, source_calendar, use_cftime) == 366: + if _days_in_year(year, source_calendar) == 366: new_doy = np.insert(new_doy, 60, -1) return new_doy[time.dt.dayofyear - 1] @@ -304,10 +320,11 @@ def _datetime_to_decimal_year(times, dim="time", calendar=None): """ from xarray.core.dataarray import DataArray - calendar = calendar or times.dt.calendar + if calendar is None: + calendar = times.dt.calendar if is_np_datetime_like(times.dtype): - times = times.copy(data=convert_times(times.values, get_date_type("standard"))) + times = times.copy(data=convert_times(times.values, get_date_type("proleptic_gregorian"))) def _make_index(time): year = int(time.dt.year[0]) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 41b982d268b..f19af846f24 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -7,6 +7,7 @@ import pandas as pd from xarray.coding.times import infer_calendar_name +from xarray.coding.calendar_ops import _days_in_year, _datetime_to_decimal_year from xarray.core import duck_array_ops from xarray.core.common import ( _contains_datetime_like_objects, @@ -533,6 +534,26 @@ def calendar(self) -> CFCalendar: """ return infer_calendar_name(self._obj.data) + @property + def days_in_year(self) -> T_DataArray: + """The number of days in the year.""" + obj_type = type(self._obj) + result = _days_in_year(self.year, self.calendar) + return obj_type( + result, name="days_in_year", coords=self._obj.coords, + dims=self._obj.dims, attrs=self._obj.attrs + ) + + @property + def to_decimal_year(self) -> T_DataArray: + """Convert the dates as a fractional year.""" + obj_type = type(self._obj) + result = _datetime_to_decimal_year(self._obj) + return obj_type( + result, name="decimal_year", coords=self._obj.coords, + dims=self._obj.dims, attrs=self._obj.attrs + ) + class TimedeltaAccessor(TimeAccessor[T_DataArray]): """Access Timedelta fields for DataArrays with Timedelta-like dtypes. diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 686bce943fa..d4dd7c9a9d2 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -141,6 +141,19 @@ def test_strftime(self) -> None: "2000-01-01 01:00:00" == self.data.time.dt.strftime("%Y-%m-%d %H:%M:%S")[1] ) + @pytest.mark.parametrize("calendar,expected", [('standard', 366), ('noleap', 365), ('360_day', 360), ('all_leap', 366)]) + def test_days_in_year(self, calendar, expected) -> None: + assert ( + self.data.convert_calendar(calendar, align_on='year').time.dt.days_in_year == expected + ).all() + + def test_decimal_year(self) -> None: + h_per_yr = 366 * 24 + np.testing.assert_array_equal( + self.data.time.dt.to_decimal_year[0:3], + [2000, 2000 + 1 / h_per_yr, 2000 + 2 / h_per_yr] + ) + def test_not_datetime_type(self) -> None: nontime_data = self.data.copy() int_data = np.arange(len(self.data.time)).astype("int8") From b62fc19776f8195c05e61c8d5d0c2c313307db50 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 12 Jun 2024 14:53:49 -0400 Subject: [PATCH 02/15] Upd whats new - add gregorian calendar - rename to decimal_year --- doc/whats-new.rst | 3 +++ xarray/coding/calendar_ops.py | 2 +- xarray/core/accessor_dt.py | 2 +- xarray/tests/test_accessor_dt.py | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 96005e17f78..270d9da152d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,9 @@ v2024.05.1 (unreleased) New Features ~~~~~~~~~~~~ +- Add :py:property:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:property:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:` `). + By `Pascal Bourgault `_. + Performance ~~~~~~~~~~~ diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 3f55610777b..d1da65d4768 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -36,7 +36,7 @@ def _leap_julian(years): def _days_in_year(years, calendar): """The number of days in each year for the corresponding calendar.""" - if calendar == 'standard': + if calendar in ['standard', 'gregorian']: return 365 + where(years < 1582, _leap_julian(years), _leap_gregorian(years)) * 1 if calendar == 'proleptic_gregorian': return 365 + _leap_gregorian(years) * 1 diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index f19af846f24..56089e1b230 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -545,7 +545,7 @@ def days_in_year(self) -> T_DataArray: ) @property - def to_decimal_year(self) -> T_DataArray: + def decimal_year(self) -> T_DataArray: """Convert the dates as a fractional year.""" obj_type = type(self._obj) result = _datetime_to_decimal_year(self._obj) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index d4dd7c9a9d2..bb253f7e5f1 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -150,7 +150,7 @@ def test_days_in_year(self, calendar, expected) -> None: def test_decimal_year(self) -> None: h_per_yr = 366 * 24 np.testing.assert_array_equal( - self.data.time.dt.to_decimal_year[0:3], + self.data.time.dt.decimal_year[0:3], [2000, 2000 + 1 / h_per_yr, 2000 + 2 / h_per_yr] ) From 6175a9621fd9d9bef460ec986ba3bff4082f8fd9 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 12 Jun 2024 14:55:34 -0400 Subject: [PATCH 03/15] Add to api.rst and pr number --- doc/api.rst | 2 ++ doc/whats-new.rst | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index a8f8ea7dd1c..8d024d02107 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -527,9 +527,11 @@ Datetimelike properties DataArray.dt.quarter DataArray.dt.days_in_month DataArray.dt.daysinmonth + DataArray.dt.days_in_year DataArray.dt.season DataArray.dt.time DataArray.dt.date + DataArray.dt.decimal_year DataArray.dt.calendar DataArray.dt.is_month_start DataArray.dt.is_month_end diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 270d9da152d..d10c281e1b4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,7 +23,7 @@ v2024.05.1 (unreleased) New Features ~~~~~~~~~~~~ -- Add :py:property:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:property:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:` `). +- Add :py:property:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:property:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:`9105`). By `Pascal Bourgault `_. Performance From c43d8f4210a68b0e34c3cf3c741555d58f32fcfd Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 12 Jun 2024 15:03:54 -0400 Subject: [PATCH 04/15] Add requires cftime decorators where needed --- xarray/tests/test_accessor_dt.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index bb253f7e5f1..a76548bbc3a 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -141,12 +141,14 @@ def test_strftime(self) -> None: "2000-01-01 01:00:00" == self.data.time.dt.strftime("%Y-%m-%d %H:%M:%S")[1] ) + @requires_cftime @pytest.mark.parametrize("calendar,expected", [('standard', 366), ('noleap', 365), ('360_day', 360), ('all_leap', 366)]) def test_days_in_year(self, calendar, expected) -> None: assert ( self.data.convert_calendar(calendar, align_on='year').time.dt.days_in_year == expected ).all() + @requires_cftime def test_decimal_year(self) -> None: h_per_yr = 366 * 24 np.testing.assert_array_equal( From 9c7eadf5bccdf448d9da580ef20edc8531708ec0 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 12 Aug 2024 15:00:17 -0400 Subject: [PATCH 05/15] Rewrite functions using suggestions from review --- xarray/coding/calendar_ops.py | 114 +++++++++++++++++----------------- xarray/coding/cftimeindex.py | 5 ++ xarray/core/accessor_dt.py | 24 ++++--- 3 files changed, 72 insertions(+), 71 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index a1e2dd478e8..bdc38bd7cdb 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -3,15 +3,18 @@ import numpy as np import pandas as pd -from xarray.core.common import full_like -from xarray.core.computation import where from xarray.coding.cftime_offsets import date_range_like, get_date_type from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _should_cftime_be_used, convert_times, ) -from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like +from xarray.core.common import ( + _contains_datetime_like_objects, + full_like, + is_np_datetime_like, +) +from xarray.core.computation import apply_ufunc try: import cftime @@ -27,37 +30,6 @@ ] -def _leap_gregorian(years): - # A year is a leap year if either (i) it is divisible by 4 but not by 100 or (ii) it is divisible by 400 - return ((years % 4 == 0) & (years % 100 != 0)) | (years % 400 == 0) - - -def _leap_julian(years): - # A year is a leap year if it is divisible by 4, even if it is also divisible by 100 - return years % 4 == 0 - - -def _days_in_year(years, calendar): - """The number of days in each year for the corresponding calendar.""" - if calendar in ['standard', 'gregorian']: - return 365 + where(years < 1582, _leap_julian(years), _leap_gregorian(years)) * 1 - if calendar == 'proleptic_gregorian': - return 365 + _leap_gregorian(years) * 1 - if calendar == 'julian': - return 365 + _leap_julian(years) * 1 - if calendar in ['noleap', '365_day']: - const = 365 - elif calendar in ['all_leap', '366_day']: - const = 366 - elif calendar == '360_day': - const = 360 - else: - raise ValueError(f'Calendar {calendar} not recognized.') - if isinstance(years, (float, int)): - return const - return full_like(years, const) - - def convert_calendar( obj, calendar, @@ -261,6 +233,18 @@ def convert_calendar( return out +def _is_leap_year(years, calendar): + func = np.vectorize(cftime.is_leap_year) + return func(years, calendar=calendar) + + +def _days_in_year(years, calendar): + """The number of days in the year according to given calendar.""" + if calendar == "360_day": + return full_like(years, 360) + return _is_leap_year(years, calendar).astype(int) + 365 + + def _interpolate_day_of_year(times, target_calendar): """Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar.""" source_calendar = times.dt.calendar @@ -320,33 +304,49 @@ def _convert_to_new_calendar_with_new_day_of_year( return np.nan -def _datetime_to_decimal_year(times, dim="time", calendar=None): - """Convert a datetime DataArray to decimal years according to its calendar or the given one. +def _yearstart_cftime(year, date_class): + return date_class(year, 1, 1) + + +def _yearstart_np(year, dtype): + return np.datetime64(int(year) - 1970, "Y").astype(dtype) + + +def _yearstart(times): + if times.dtype == "O": + return apply_ufunc( + _yearstart_cftime, + times.dt.year, + kwargs={"date_class": get_date_type(times.dt.calendar, True)}, + vectorize=True, + dask="parallelized", + ) + return apply_ufunc( + _yearstart_np, + times.dt.year, + kwargs={"dtype": times.dtype}, + vectorize=True, + dask="parallelized", + ) + + +def _decimal_year(times): + """Convert a datetime DataArray to decimal years according to its calendar. The decimal year of a timestamp is its year plus its sub-year component converted to the fraction of its year. Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar, 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ - from xarray.core.dataarray import DataArray - - if calendar is None: - calendar = times.dt.calendar - - if is_np_datetime_like(times.dtype): - times = times.copy(data=convert_times(times.values, get_date_type("proleptic_gregorian"))) - - def _make_index(time): - year = int(time.dt.year[0]) - doys = cftime.date2num(time, f"days since {year:04d}-01-01", calendar=calendar) - return DataArray( - year + doys / _days_in_year(year, calendar), - dims=(dim,), - coords=time.coords, - name=dim, + years = times.dt.year + deltas = times - _yearstart(times) + # astype on the data to avoid warning about timedelta64[D] being automatically converted to ns in Variable constructor. + days_in_years = deltas.copy( + data=times.dt.days_in_year.data.astype("timedelta64[D]").astype( + "timedelta64[ns]" ) - - return times.groupby(f"{dim}.year").map(_make_index) + ) + return years + deltas / days_in_years def interp_calendar(source, target, dim="time"): @@ -389,9 +389,7 @@ def interp_calendar(source, target, dim="time"): f"Both 'source.{dim}' and 'target' must contain datetime objects." ) - source_calendar = source[dim].dt.calendar target_calendar = target.dt.calendar - if ( source[dim].time.dt.year == 0 ).any() and target_calendar in _CALENDARS_WITHOUT_YEAR_ZERO: @@ -400,8 +398,8 @@ def interp_calendar(source, target, dim="time"): ) out = source.copy() - out[dim] = _datetime_to_decimal_year(source[dim], dim=dim, calendar=source_calendar) - target_idx = _datetime_to_decimal_year(target, dim=dim, calendar=target_calendar) + out[dim] = _decimal_year(source[dim]) + target_idx = _decimal_year(target) out = out.interp(**{dim: target_idx}) out[dim] = target return out diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index ef01f4cc79a..161f27af49e 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -801,6 +801,11 @@ def round(self, freq): """ return self._round_via_method(freq, _round_to_nearest_half_even) + @property + def is_leap_year(self): + func = np.vectorize(cftime.is_leap_year) + return func(self.year, calendar=self.calendar) + def _parse_iso8601_without_reso(date_type, datetime_str): date, _ = _parse_iso8601_with_reso(date_type, datetime_str) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 56089e1b230..6f713c828e5 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -6,11 +6,12 @@ import numpy as np import pandas as pd +from xarray.coding.calendar_ops import _decimal_year from xarray.coding.times import infer_calendar_name -from xarray.coding.calendar_ops import _days_in_year, _datetime_to_decimal_year from xarray.core import duck_array_ops from xarray.core.common import ( _contains_datetime_like_objects, + full_like, is_np_datetime_like, is_np_timedelta_like, ) @@ -537,22 +538,19 @@ def calendar(self) -> CFCalendar: @property def days_in_year(self) -> T_DataArray: """The number of days in the year.""" - obj_type = type(self._obj) - result = _days_in_year(self.year, self.calendar) - return obj_type( - result, name="days_in_year", coords=self._obj.coords, - dims=self._obj.dims, attrs=self._obj.attrs - ) + if self.calendar == "360_day": + result = full_like(self.year, 360) + else: + result = self.is_leap_year.astype(int) + 365 + newvar = self._obj.variable.copy(data=result, deep=False) + return self._obj._replace(newvar, name="days_in_year") @property def decimal_year(self) -> T_DataArray: """Convert the dates as a fractional year.""" - obj_type = type(self._obj) - result = _datetime_to_decimal_year(self._obj) - return obj_type( - result, name="decimal_year", coords=self._obj.coords, - dims=self._obj.dims, attrs=self._obj.attrs - ) + result = _decimal_year(self._obj) + newvar = self._obj.variable.copy(data=result, deep=False) + return self._obj._replace(newvar, name="decimal_year") class TimedeltaAccessor(TimeAccessor[T_DataArray]): From c1d257e3ab4be5d8bbe4b7d714dcd73dd8ca9d20 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 3 Sep 2024 11:21:59 -0400 Subject: [PATCH 06/15] cleaner custom date field - docstrings - remove bad merge --- doc/whats-new.rst | 3 --- xarray/coding/calendar_ops.py | 11 +++++------ xarray/core/accessor_dt.py | 16 +++++++++++++--- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 10628fff0de..4bef445e132 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -29,9 +29,6 @@ New Features Performance ~~~~~~~~~~~ -- Small optimization to the netCDF4 and h5netcdf backends (:issue:`9058`, :pull:`9067`). - By `Deepak Cherian `_. - - Make chunk manager an option in ``set_options`` (:pull:`9362`). By `Tom White `_. - Support for :ref:`grouping by multiple variables `. diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index df7cfb777ba..4d223104ade 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -1,5 +1,7 @@ from __future__ import annotations +import warnings + import numpy as np import pandas as pd @@ -340,12 +342,9 @@ def _decimal_year(times): """ years = times.dt.year deltas = times - _yearstart(times) - # astype on the data to avoid warning about timedelta64[D] being automatically converted to ns in Variable constructor. - days_in_years = deltas.copy( - data=times.dt.days_in_year.data.astype("timedelta64[D]").astype( - "timedelta64[ns]" - ) - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="Converting non-nanosecond") + days_in_years = times.dt.days_in_year.astype("timedelta64[D]") return years + deltas / days_in_years diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index ce249caa3ba..e73893d0f35 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -547,19 +547,29 @@ def calendar(self) -> CFCalendar: @property def days_in_year(self) -> T_DataArray: - """The number of days in the year.""" + """Each datetime as the year plus the fraction of the year elapsed.""" if self.calendar == "360_day": result = full_like(self.year, 360) else: result = self.is_leap_year.astype(int) + 365 - newvar = self._obj.variable.copy(data=result, deep=False) + newvar = Variable( + dims=self._obj.dims, + attrs=self._obj.attrs, + encoding=self._obj.encoding, + data=result, + ) return self._obj._replace(newvar, name="days_in_year") @property def decimal_year(self) -> T_DataArray: """Convert the dates as a fractional year.""" result = _decimal_year(self._obj) - newvar = self._obj.variable.copy(data=result, deep=False) + newvar = Variable( + dims=self._obj.dims, + attrs=self._obj.attrs, + encoding=self._obj.encoding, + data=result, + ) return self._obj._replace(newvar, name="decimal_year") From 8c73b025e54f7ab29163d8efafc28edc9cc587f7 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 3 Sep 2024 11:24:58 -0400 Subject: [PATCH 07/15] add new fields to dask access test --- xarray/tests/test_accessor_dt.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 315830eb8bb..8e8821267aa 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -143,10 +143,14 @@ def test_strftime(self) -> None: ) @requires_cftime - @pytest.mark.parametrize("calendar,expected", [('standard', 366), ('noleap', 365), ('360_day', 360), ('all_leap', 366)]) + @pytest.mark.parametrize( + "calendar,expected", + [("standard", 366), ("noleap", 365), ("360_day", 360), ("all_leap", 366)], + ) def test_days_in_year(self, calendar, expected) -> None: assert ( - self.data.convert_calendar(calendar, align_on='year').time.dt.days_in_year == expected + self.data.convert_calendar(calendar, align_on="year").time.dt.days_in_year + == expected ).all() @requires_cftime @@ -154,7 +158,7 @@ def test_decimal_year(self) -> None: h_per_yr = 366 * 24 np.testing.assert_array_equal( self.data.time.dt.decimal_year[0:3], - [2000, 2000 + 1 / h_per_yr, 2000 + 2 / h_per_yr] + [2000, 2000 + 1 / h_per_yr, 2000 + 2 / h_per_yr], ) def test_not_datetime_type(self) -> None: @@ -192,6 +196,8 @@ def test_not_datetime_type(self) -> None: "is_year_start", "is_year_end", "is_leap_year", + "days_in_year", + "decimal_year", ], ) def test_dask_field_access(self, field) -> None: From 3f429c9cf81ed0f78f59fdb45044ac4ad558f65f Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 3 Sep 2024 12:19:47 -0400 Subject: [PATCH 08/15] Revert to rollback method --- xarray/coding/calendar_ops.py | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 4d223104ade..136cbc03a41 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd -from xarray.coding.cftime_offsets import date_range_like, get_date_type +from xarray.coding.cftime_offsets import date_range_like, get_date_type, to_offset from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _should_cftime_be_used, @@ -306,30 +306,12 @@ def _convert_to_new_calendar_with_new_day_of_year( return np.nan -def _yearstart_cftime(year, date_class): - return date_class(year, 1, 1) - - -def _yearstart_np(year, dtype): - return np.datetime64(int(year) - 1970, "Y").astype(dtype) - - -def _yearstart(times): - if times.dtype == "O": - return apply_ufunc( - _yearstart_cftime, - times.dt.year, - kwargs={"date_class": get_date_type(times.dt.calendar, True)}, - vectorize=True, - dask="parallelized", - ) - return apply_ufunc( - _yearstart_np, - times.dt.year, - kwargs={"dtype": times.dtype}, - vectorize=True, - dask="parallelized", - ) +def _rollback(times, freq): + if times.dtype.kind == "M": + offset = pd.tseries.frequencies.to_offset(freq) + else: + offset = to_offset(freq) + return apply_ufunc(offset.rollback, times, vectorize=True, dask="parallelized") def _decimal_year(times): @@ -341,7 +323,8 @@ def _decimal_year(times): 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ years = times.dt.year - deltas = times - _yearstart(times) + floored_times = times.dt.floor("D") + deltas = times - _rollback(floored_times, "YS") with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="Converting non-nanosecond") days_in_years = times.dt.days_in_year.astype("timedelta64[D]") From d47147e800ea9bd6a25db612d94b05fac94a1fdb Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 3 Sep 2024 12:46:41 -0400 Subject: [PATCH 09/15] Revert "Revert to rollback method" This reverts commit 3f429c9cf81ed0f78f59fdb45044ac4ad558f65f. --- xarray/coding/calendar_ops.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 136cbc03a41..4d223104ade 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd -from xarray.coding.cftime_offsets import date_range_like, get_date_type, to_offset +from xarray.coding.cftime_offsets import date_range_like, get_date_type from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _should_cftime_be_used, @@ -306,12 +306,30 @@ def _convert_to_new_calendar_with_new_day_of_year( return np.nan -def _rollback(times, freq): - if times.dtype.kind == "M": - offset = pd.tseries.frequencies.to_offset(freq) - else: - offset = to_offset(freq) - return apply_ufunc(offset.rollback, times, vectorize=True, dask="parallelized") +def _yearstart_cftime(year, date_class): + return date_class(year, 1, 1) + + +def _yearstart_np(year, dtype): + return np.datetime64(int(year) - 1970, "Y").astype(dtype) + + +def _yearstart(times): + if times.dtype == "O": + return apply_ufunc( + _yearstart_cftime, + times.dt.year, + kwargs={"date_class": get_date_type(times.dt.calendar, True)}, + vectorize=True, + dask="parallelized", + ) + return apply_ufunc( + _yearstart_np, + times.dt.year, + kwargs={"dtype": times.dtype}, + vectorize=True, + dask="parallelized", + ) def _decimal_year(times): @@ -323,8 +341,7 @@ def _decimal_year(times): 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ years = times.dt.year - floored_times = times.dt.floor("D") - deltas = times - _rollback(floored_times, "YS") + deltas = times - _yearstart(times) with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="Converting non-nanosecond") days_in_years = times.dt.days_in_year.astype("timedelta64[D]") From 641380368f8268c2cb2141161bc7dccffcc28745 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 3 Sep 2024 14:28:46 -0400 Subject: [PATCH 10/15] explicit float cast? --- xarray/core/accessor_dt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index e73893d0f35..8bd4eee731f 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -563,7 +563,7 @@ def days_in_year(self) -> T_DataArray: @property def decimal_year(self) -> T_DataArray: """Convert the dates as a fractional year.""" - result = _decimal_year(self._obj) + result = _decimal_year(self._obj).astype(float) newvar = Variable( dims=self._obj.dims, attrs=self._obj.attrs, From 1189cbaa502b513d63984ad2e3eaa0605301c62d Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 7 Sep 2024 10:50:36 -0400 Subject: [PATCH 11/15] Revert back to rollback method --- xarray/coding/calendar_ops.py | 35 +++++++++-------------------------- xarray/core/accessor_dt.py | 2 +- 2 files changed, 10 insertions(+), 27 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 4d223104ade..136cbc03a41 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd -from xarray.coding.cftime_offsets import date_range_like, get_date_type +from xarray.coding.cftime_offsets import date_range_like, get_date_type, to_offset from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _should_cftime_be_used, @@ -306,30 +306,12 @@ def _convert_to_new_calendar_with_new_day_of_year( return np.nan -def _yearstart_cftime(year, date_class): - return date_class(year, 1, 1) - - -def _yearstart_np(year, dtype): - return np.datetime64(int(year) - 1970, "Y").astype(dtype) - - -def _yearstart(times): - if times.dtype == "O": - return apply_ufunc( - _yearstart_cftime, - times.dt.year, - kwargs={"date_class": get_date_type(times.dt.calendar, True)}, - vectorize=True, - dask="parallelized", - ) - return apply_ufunc( - _yearstart_np, - times.dt.year, - kwargs={"dtype": times.dtype}, - vectorize=True, - dask="parallelized", - ) +def _rollback(times, freq): + if times.dtype.kind == "M": + offset = pd.tseries.frequencies.to_offset(freq) + else: + offset = to_offset(freq) + return apply_ufunc(offset.rollback, times, vectorize=True, dask="parallelized") def _decimal_year(times): @@ -341,7 +323,8 @@ def _decimal_year(times): 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ years = times.dt.year - deltas = times - _yearstart(times) + floored_times = times.dt.floor("D") + deltas = times - _rollback(floored_times, "YS") with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="Converting non-nanosecond") days_in_years = times.dt.days_in_year.astype("timedelta64[D]") diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 8bd4eee731f..e73893d0f35 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -563,7 +563,7 @@ def days_in_year(self) -> T_DataArray: @property def decimal_year(self) -> T_DataArray: """Convert the dates as a fractional year.""" - result = _decimal_year(self._obj).astype(float) + result = _decimal_year(self._obj) newvar = Variable( dims=self._obj.dims, attrs=self._obj.attrs, From 9cf9530f6d4b53b37838444759b677fa0586c0e7 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 7 Sep 2024 10:52:36 -0400 Subject: [PATCH 12/15] Fix dask compatibility issues --- xarray/coding/calendar_ops.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 136cbc03a41..126bb87381b 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -311,7 +311,13 @@ def _rollback(times, freq): offset = pd.tseries.frequencies.to_offset(freq) else: offset = to_offset(freq) - return apply_ufunc(offset.rollback, times, vectorize=True, dask="parallelized") + return apply_ufunc( + offset.rollback, + times, + vectorize=True, + dask="parallelized", + output_dtypes=[times.dtype], + ) def _decimal_year(times): @@ -324,7 +330,12 @@ def _decimal_year(times): """ years = times.dt.year floored_times = times.dt.floor("D") - deltas = times - _rollback(floored_times, "YS") + + # Explicitly cast to timedelta64[ns], since xarray's automatic conversion + # from datetime.timedelta to timedelta64[ns] does not occur for dask + # arrays of datetime.timedelta objects. + deltas = (times - _rollback(floored_times, "YS")).astype("timedelta64[ns]") + with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="Converting non-nanosecond") days_in_years = times.dt.days_in_year.astype("timedelta64[D]") From b4a198f5ed5f171e466e8f0ad8b3fff224968208 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 7 Sep 2024 19:48:42 -0400 Subject: [PATCH 13/15] Approach that passes tests under NumPy 1.26.4 --- xarray/coding/calendar_ops.py | 57 ++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/xarray/coding/calendar_ops.py b/xarray/coding/calendar_ops.py index 126bb87381b..52a487ca46d 100644 --- a/xarray/coding/calendar_ops.py +++ b/xarray/coding/calendar_ops.py @@ -1,11 +1,9 @@ from __future__ import annotations -import warnings - import numpy as np import pandas as pd -from xarray.coding.cftime_offsets import date_range_like, get_date_type, to_offset +from xarray.coding.cftime_offsets import date_range_like, get_date_type from xarray.coding.cftimeindex import CFTimeIndex from xarray.coding.times import ( _should_cftime_be_used, @@ -306,18 +304,19 @@ def _convert_to_new_calendar_with_new_day_of_year( return np.nan -def _rollback(times, freq): - if times.dtype.kind == "M": - offset = pd.tseries.frequencies.to_offset(freq) - else: - offset = to_offset(freq) - return apply_ufunc( - offset.rollback, - times, - vectorize=True, - dask="parallelized", - output_dtypes=[times.dtype], - ) +def _decimal_year_cftime(time, year, days_in_year, *, date_class): + year_start = date_class(year, 1, 1) + delta = np.timedelta64(time - year_start, "ns") + days_in_year = np.timedelta64(days_in_year, "D") + return year + delta / days_in_year + + +def _decimal_year_numpy(time, year, days_in_year, *, dtype): + time = np.asarray(time).astype(dtype) + year_start = np.datetime64(int(year) - 1970, "Y").astype(dtype) + delta = time - year_start + days_in_year = np.timedelta64(days_in_year, "D") + return year + delta / days_in_year def _decimal_year(times): @@ -328,18 +327,22 @@ def _decimal_year(times): Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar, 2000.16301 in a "noleap" or 2000.16806 in a "360_day". """ - years = times.dt.year - floored_times = times.dt.floor("D") - - # Explicitly cast to timedelta64[ns], since xarray's automatic conversion - # from datetime.timedelta to timedelta64[ns] does not occur for dask - # arrays of datetime.timedelta objects. - deltas = (times - _rollback(floored_times, "YS")).astype("timedelta64[ns]") - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Converting non-nanosecond") - days_in_years = times.dt.days_in_year.astype("timedelta64[D]") - return years + deltas / days_in_years + if times.dtype == "O": + function = _decimal_year_cftime + kwargs = {"date_class": get_date_type(times.dt.calendar, True)} + else: + function = _decimal_year_numpy + kwargs = {"dtype": times.dtype} + return apply_ufunc( + function, + times, + times.dt.year, + times.dt.days_in_year, + kwargs=kwargs, + vectorize=True, + dask="parallelized", + output_dtypes=[np.float64], + ) def interp_calendar(source, target, dim="time"): From 398b492e503e3846b3b163eed4e9bd575a6c0593 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 8 Sep 2024 13:44:09 -0400 Subject: [PATCH 14/15] Adapt decimal_year test to be more comprehensive --- xarray/tests/test_accessor_dt.py | 52 ++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 8e8821267aa..587f43a5d7f 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -153,14 +153,6 @@ def test_days_in_year(self, calendar, expected) -> None: == expected ).all() - @requires_cftime - def test_decimal_year(self) -> None: - h_per_yr = 366 * 24 - np.testing.assert_array_equal( - self.data.time.dt.decimal_year[0:3], - [2000, 2000 + 1 / h_per_yr, 2000 + 2 / h_per_yr], - ) - def test_not_datetime_type(self) -> None: nontime_data = self.data.copy() int_data = np.arange(len(self.data.time)).astype("int8") @@ -197,7 +189,6 @@ def test_not_datetime_type(self) -> None: "is_year_end", "is_leap_year", "days_in_year", - "decimal_year", ], ) def test_dask_field_access(self, field) -> None: @@ -719,3 +710,46 @@ def test_cftime_round_accessor( result = cftime_rounding_dataarray.dt.round(freq) assert_identical(result, expected) + + +@pytest.mark.parametrize( + "use_cftime", + [False, pytest.param(True, marks=requires_cftime)], + ids=lambda x: f"use_cftime={x}", +) +@pytest.mark.parametrize( + "use_dask", + [False, pytest.param(True, marks=requires_dask)], + ids=lambda x: f"use_dask={x}", +) +def test_decimal_year(use_cftime, use_dask) -> None: + year = 2000 + periods = 10 + freq = "h" + + shape = (2, 5) + dims = ["x", "y"] + hours_in_year = 24 * 366 + + times = xr.date_range(f"{year}", periods=periods, freq=freq, use_cftime=use_cftime) + + da = xr.DataArray(times.values.reshape(shape), dims=dims) + + if use_dask: + da = da.chunk({"y": 2}) + # Computing the decimal year for a cftime datetime array requires a + # number of small computes (6): + # - 4x one compute per .dt accessor call (requires inspecting one + # object-dtype array element to see if it is time-like) + # - 2x one compute per calendar inference (requires inspecting one + # array element to read off the calendar) + max_computes = 6 * use_cftime + with raise_if_dask_computes(max_computes=max_computes): + result = da.dt.decimal_year + else: + result = da.dt.decimal_year + + expected = xr.DataArray( + year + np.arange(periods).reshape(shape) / hours_in_year, dims=dims + ) + xr.testing.assert_equal(result, expected) From 31adaca0703a4b2f89b8fdee582daf2c8f9591da Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 9 Sep 2024 09:42:55 -0400 Subject: [PATCH 15/15] Use proper sphinx roles for cross-referencing. --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4bef445e132..bf9a89d8a23 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,7 +23,7 @@ v2024.07.1 (unreleased) New Features ~~~~~~~~~~~~ -- Add :py:property:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:property:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:`9105`). +- Add :py:attr:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:attr:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:`9105`). By `Pascal Bourgault `_. Performance