Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add days_in_year and decimal_year to dt accessor #9105

Merged
merged 20 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -529,9 +529,11 @@ Datetimelike properties
DataArray.dt.quarter
DataArray.dt.days_in_month
DataArray.dt.daysinmonth
DataArray.dt.days_in_year
DataArray.dt.season
DataArray.dt.time
DataArray.dt.date
DataArray.dt.decimal_year
DataArray.dt.calendar
DataArray.dt.is_month_start
DataArray.dt.is_month_end
Expand Down
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ v2024.07.1 (unreleased)

New Features
~~~~~~~~~~~~

- Add :py:attr:`~core.accessor_dt.DatetimeAccessor.days_in_year` and :py:attr:`~core.accessor_dt.DatetimeAccessor.decimal_year` to the Datetime accessor on DataArrays. (:pull:`9105`).
By `Pascal Bourgault <https://github.com/aulemahal>`_.

Performance
~~~~~~~~~~~

- Make chunk manager an option in ``set_options`` (:pull:`9362`).
By `Tom White <https://github.com/tomwhite>`_.
- Support for :ref:`grouping by multiple variables <groupby.multiple>`.
Expand Down
115 changes: 63 additions & 52 deletions xarray/coding/calendar_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
_should_cftime_be_used,
convert_times,
)
from xarray.core.common import _contains_datetime_like_objects, is_np_datetime_like
from xarray.core.common import (
_contains_datetime_like_objects,
full_like,
is_np_datetime_like,
)
from xarray.core.computation import apply_ufunc

try:
import cftime
Expand All @@ -25,16 +30,6 @@
]


def _days_in_year(year, calendar, use_cftime=True):
"""Return the number of days in the input year according to the input calendar."""
date_type = get_date_type(calendar, use_cftime=use_cftime)
if year == -1 and calendar in _CALENDARS_WITHOUT_YEAR_ZERO:
difference = date_type(year + 2, 1, 1) - date_type(year, 1, 1)
else:
difference = date_type(year + 1, 1, 1) - date_type(year, 1, 1)
return difference.days


def convert_calendar(
obj,
calendar,
Expand Down Expand Up @@ -191,11 +186,7 @@ def convert_calendar(
# Special case for conversion involving 360_day calendar
if align_on == "year":
# Instead of translating dates directly, this tries to keep the position within a year similar.
new_doy = time.groupby(f"{dim}.year").map(
_interpolate_day_of_year,
target_calendar=calendar,
use_cftime=use_cftime,
)
new_doy = _interpolate_day_of_year(time, target_calendar=calendar)
elif align_on == "random":
# The 5 days to remove are randomly chosen, one for each of the five 72-days periods of the year.
new_doy = time.groupby(f"{dim}.year").map(
Expand Down Expand Up @@ -242,16 +233,25 @@ def convert_calendar(
return out


def _interpolate_day_of_year(time, target_calendar, use_cftime):
"""Returns the nearest day in the target calendar of the corresponding
"decimal year" in the source calendar.
"""
year = int(time.dt.year[0])
source_calendar = time.dt.calendar
def _is_leap_year(years, calendar):
func = np.vectorize(cftime.is_leap_year)
return func(years, calendar=calendar)


def _days_in_year(years, calendar):
"""The number of days in the year according to given calendar."""
if calendar == "360_day":
return full_like(years, 360)
return _is_leap_year(years, calendar).astype(int) + 365


def _interpolate_day_of_year(times, target_calendar):
"""Returns the nearest day in the target calendar of the corresponding "decimal year" in the source calendar."""
source_calendar = times.dt.calendar
return np.round(
_days_in_year(year, target_calendar, use_cftime)
* time.dt.dayofyear
/ _days_in_year(year, source_calendar, use_cftime)
_days_in_year(times.dt.year, target_calendar)
* times.dt.dayofyear
/ _days_in_year(times.dt.year, source_calendar)
).astype(int)


Expand All @@ -260,18 +260,18 @@ def _random_day_of_year(time, target_calendar, use_cftime):

Removes Feb 29th and five other days chosen randomly within five sections of 72 days.
"""
year = int(time.dt.year[0])
year = time.dt.year[0]
source_calendar = time.dt.calendar
new_doy = np.arange(360) + 1
rm_idx = np.random.default_rng().integers(0, 72, 5) + 72 * np.arange(5)
if source_calendar == "360_day":
for idx in rm_idx:
new_doy[idx + 1 :] = new_doy[idx + 1 :] + 1
if _days_in_year(year, target_calendar, use_cftime) == 366:
if _days_in_year(year, target_calendar) == 366:
new_doy[new_doy >= 60] = new_doy[new_doy >= 60] + 1
elif target_calendar == "360_day":
new_doy = np.insert(new_doy, rm_idx - np.arange(5), -1)
if _days_in_year(year, source_calendar, use_cftime) == 366:
if _days_in_year(year, source_calendar) == 366:
new_doy = np.insert(new_doy, 60, -1)
return new_doy[time.dt.dayofyear - 1]

Expand Down Expand Up @@ -304,32 +304,45 @@ def _convert_to_new_calendar_with_new_day_of_year(
return np.nan


def _datetime_to_decimal_year(times, dim="time", calendar=None):
"""Convert a datetime DataArray to decimal years according to its calendar or the given one.
def _decimal_year_cftime(time, year, days_in_year, *, date_class):
year_start = date_class(year, 1, 1)
delta = np.timedelta64(time - year_start, "ns")
days_in_year = np.timedelta64(days_in_year, "D")
return year + delta / days_in_year


def _decimal_year_numpy(time, year, days_in_year, *, dtype):
time = np.asarray(time).astype(dtype)
year_start = np.datetime64(int(year) - 1970, "Y").astype(dtype)
delta = time - year_start
days_in_year = np.timedelta64(days_in_year, "D")
return year + delta / days_in_year


def _decimal_year(times):
"""Convert a datetime DataArray to decimal years according to its calendar.

The decimal year of a timestamp is its year plus its sub-year component
converted to the fraction of its year.
Ex: '2000-03-01 12:00' is 2000.1653 in a standard calendar,
2000.16301 in a "noleap" or 2000.16806 in a "360_day".
"""
from xarray.core.dataarray import DataArray

calendar = calendar or times.dt.calendar

if is_np_datetime_like(times.dtype):
times = times.copy(data=convert_times(times.values, get_date_type("standard")))

def _make_index(time):
year = int(time.dt.year[0])
doys = cftime.date2num(time, f"days since {year:04d}-01-01", calendar=calendar)
return DataArray(
year + doys / _days_in_year(year, calendar),
dims=(dim,),
coords=time.coords,
name=dim,
)

return times.groupby(f"{dim}.year").map(_make_index)
if times.dtype == "O":
function = _decimal_year_cftime
kwargs = {"date_class": get_date_type(times.dt.calendar, True)}
else:
function = _decimal_year_numpy
kwargs = {"dtype": times.dtype}
return apply_ufunc(
function,
times,
times.dt.year,
times.dt.days_in_year,
kwargs=kwargs,
vectorize=True,
dask="parallelized",
output_dtypes=[np.float64],
)


def interp_calendar(source, target, dim="time"):
Expand Down Expand Up @@ -372,9 +385,7 @@ def interp_calendar(source, target, dim="time"):
f"Both 'source.{dim}' and 'target' must contain datetime objects."
)

source_calendar = source[dim].dt.calendar
target_calendar = target.dt.calendar

if (
source[dim].time.dt.year == 0
).any() and target_calendar in _CALENDARS_WITHOUT_YEAR_ZERO:
Expand All @@ -383,8 +394,8 @@ def interp_calendar(source, target, dim="time"):
)

out = source.copy()
out[dim] = _datetime_to_decimal_year(source[dim], dim=dim, calendar=source_calendar)
target_idx = _datetime_to_decimal_year(target, dim=dim, calendar=target_calendar)
out[dim] = _decimal_year(source[dim])
target_idx = _decimal_year(target)
out = out.interp(**{dim: target_idx})
out[dim] = target
return out
5 changes: 5 additions & 0 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,11 @@ def round(self, freq):
"""
return self._round_via_method(freq, _round_to_nearest_half_even)

@property
def is_leap_year(self):
func = np.vectorize(cftime.is_leap_year)
return func(self.year, calendar=self.calendar)


def _parse_iso8601_without_reso(date_type, datetime_str):
date, _ = _parse_iso8601_with_reso(date_type, datetime_str)
Expand Down
29 changes: 29 additions & 0 deletions xarray/core/accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import numpy as np
import pandas as pd

from xarray.coding.calendar_ops import _decimal_year
from xarray.coding.times import infer_calendar_name
from xarray.core import duck_array_ops
from xarray.core.common import (
_contains_datetime_like_objects,
full_like,
is_np_datetime_like,
is_np_timedelta_like,
)
Expand Down Expand Up @@ -543,6 +545,33 @@ def calendar(self) -> CFCalendar:
"""
return infer_calendar_name(self._obj.data)

@property
def days_in_year(self) -> T_DataArray:
"""Each datetime as the year plus the fraction of the year elapsed."""
if self.calendar == "360_day":
result = full_like(self.year, 360)
else:
result = self.is_leap_year.astype(int) + 365
newvar = Variable(
dims=self._obj.dims,
attrs=self._obj.attrs,
encoding=self._obj.encoding,
data=result,
)
return self._obj._replace(newvar, name="days_in_year")

@property
def decimal_year(self) -> T_DataArray:
"""Convert the dates as a fractional year."""
aulemahal marked this conversation as resolved.
Show resolved Hide resolved
result = _decimal_year(self._obj)
newvar = Variable(
dims=self._obj.dims,
attrs=self._obj.attrs,
encoding=self._obj.encoding,
data=result,
)
return self._obj._replace(newvar, name="decimal_year")


class TimedeltaAccessor(TimeAccessor[T_DataArray]):
"""Access Timedelta fields for DataArrays with Timedelta-like dtypes.
Expand Down
55 changes: 55 additions & 0 deletions xarray/tests/test_accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,17 @@ def test_strftime(self) -> None:
"2000-01-01 01:00:00" == self.data.time.dt.strftime("%Y-%m-%d %H:%M:%S")[1]
)

@requires_cftime
@pytest.mark.parametrize(
"calendar,expected",
[("standard", 366), ("noleap", 365), ("360_day", 360), ("all_leap", 366)],
)
def test_days_in_year(self, calendar, expected) -> None:
assert (
self.data.convert_calendar(calendar, align_on="year").time.dt.days_in_year
== expected
).all()

def test_not_datetime_type(self) -> None:
nontime_data = self.data.copy()
int_data = np.arange(len(self.data.time)).astype("int8")
Expand Down Expand Up @@ -177,6 +188,7 @@ def test_not_datetime_type(self) -> None:
"is_year_start",
"is_year_end",
"is_leap_year",
"days_in_year",
],
)
def test_dask_field_access(self, field) -> None:
Expand Down Expand Up @@ -698,3 +710,46 @@ def test_cftime_round_accessor(
result = cftime_rounding_dataarray.dt.round(freq)

assert_identical(result, expected)


@pytest.mark.parametrize(
"use_cftime",
[False, pytest.param(True, marks=requires_cftime)],
ids=lambda x: f"use_cftime={x}",
)
@pytest.mark.parametrize(
"use_dask",
[False, pytest.param(True, marks=requires_dask)],
ids=lambda x: f"use_dask={x}",
)
def test_decimal_year(use_cftime, use_dask) -> None:
year = 2000
periods = 10
freq = "h"

shape = (2, 5)
dims = ["x", "y"]
hours_in_year = 24 * 366

times = xr.date_range(f"{year}", periods=periods, freq=freq, use_cftime=use_cftime)

da = xr.DataArray(times.values.reshape(shape), dims=dims)

if use_dask:
da = da.chunk({"y": 2})
# Computing the decimal year for a cftime datetime array requires a
# number of small computes (6):
# - 4x one compute per .dt accessor call (requires inspecting one
# object-dtype array element to see if it is time-like)
# - 2x one compute per calendar inference (requires inspecting one
# array element to read off the calendar)
max_computes = 6 * use_cftime
with raise_if_dask_computes(max_computes=max_computes):
result = da.dt.decimal_year
else:
result = da.dt.decimal_year

expected = xr.DataArray(
year + np.arange(periods).reshape(shape) / hours_in_year, dims=dims
)
xr.testing.assert_equal(result, expected)
Loading