Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a days_in_month accessor to CFTimeIndex #3935

Merged
merged 7 commits into from
Apr 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 18 additions & 104 deletions doc/examples/monthly-means.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,89 +29,9 @@
"import numpy as np\n",
"import pandas as pd\n",
"import xarray as xr\n",
"from netCDF4 import num2date\n",
"import matplotlib.pyplot as plt "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Some calendar information so we can support any netCDF calendar. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-28T20:51:35.991620Z",
"start_time": "2018-11-28T20:51:35.960336Z"
}
},
"outputs": [],
"source": [
"dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n",
" '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n",
" 'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n",
" 'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n",
" 'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n",
" 'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n",
" '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n",
" '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]} "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### A few calendar functions to determine the number of days in each month\n",
"If you were just using the standard calendar, it would be easy to use the `calendar.month_range` function."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-28T20:51:36.015151Z",
"start_time": "2018-11-28T20:51:35.994079Z"
}
},
"outputs": [],
"source": [
"def leap_year(year, calendar='standard'):\n",
" \"\"\"Determine if year is a leap year\"\"\"\n",
" leap = False\n",
" if ((calendar in ['standard', 'gregorian',\n",
" 'proleptic_gregorian', 'julian']) and\n",
" (year % 4 == 0)):\n",
" leap = True\n",
" if ((calendar == 'proleptic_gregorian') and\n",
" (year % 100 == 0) and\n",
" (year % 400 != 0)):\n",
" leap = False\n",
" elif ((calendar in ['standard', 'gregorian']) and\n",
" (year % 100 == 0) and (year % 400 != 0) and\n",
" (year < 1583)):\n",
" leap = False\n",
" return leap\n",
"\n",
"def get_dpm(time, calendar='standard'):\n",
" \"\"\"\n",
" return a array of days per month corresponding to the months provided in `months`\n",
" \"\"\"\n",
" month_length = np.zeros(len(time), dtype=np.int)\n",
" \n",
" cal_days = dpm[calendar]\n",
" \n",
" for i, (month, year) in enumerate(zip(time.month, time.year)):\n",
" month_length[i] = cal_days[month]\n",
" if leap_year(year, calendar=calendar) and month == 2:\n",
" month_length[i] += 1\n",
" return month_length"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -131,7 +51,7 @@
"outputs": [],
"source": [
"ds = xr.tutorial.open_dataset('rasm').load()\n",
"print(ds)"
"ds"
]
},
{
Expand All @@ -143,7 +63,17 @@
"- calculate the month lengths for each monthly data record\n",
"- calculate weights using `groupby('time.season')`\n",
"\n",
"Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. "
"Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. Creating a `DataArray` for the month length is as easy as using the `days_in_month` accessor on the time coordinate. The calendar type, in this case `'noleap'`, is automatically considered in this operation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"month_length = ds.time.dt.days_in_month\n",
"month_length"
]
},
{
Expand All @@ -157,13 +87,8 @@
},
"outputs": [],
"source": [
"# Make a DataArray with the number of days in each month, size = len(time)\n",
"month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar='noleap'),\n",
" coords=[ds.time], name='month_length')\n",
"\n",
"# Calculate the weights by grouping by 'time.season'.\n",
"# Conversion to float type ('astype(float)') only necessary for Python 2.x\n",
"weights = month_length.groupby('time.season') / month_length.astype(float).groupby('time.season').sum()\n",
"weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n",
"\n",
"# Test that the sum of the weights for each season is 1.0\n",
"np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))\n",
Expand All @@ -183,7 +108,7 @@
},
"outputs": [],
"source": [
"print(ds_weighted)"
"ds_weighted"
dcherian marked this conversation as resolved.
Show resolved Hide resolved
]
},
{
Expand Down Expand Up @@ -262,13 +187,9 @@
"source": [
"# Wrap it into a simple function\n",
"def season_mean(ds, calendar='standard'):\n",
" # Make a DataArray of season/year groups\n",
" year_season = xr.DataArray(ds.time.to_index().to_period(freq='Q-NOV').to_timestamp(how='E'),\n",
" coords=[ds.time], name='year_season')\n",
"\n",
" # Make a DataArray with the number of days in each month, size = len(time)\n",
" month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar),\n",
" coords=[ds.time], name='month_length')\n",
" month_length = ds.time.dt.days_in_month\n",
"\n",
" # Calculate the weights by grouping by 'time.season'\n",
" weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n",
"\n",
Expand All @@ -278,13 +199,6 @@
" # Calculate the weighted average\n",
" return (ds * weights).groupby('time.season').sum(dim='time')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -304,7 +218,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.3"
},
"toc": {
"base_numbering": 1,
Expand All @@ -321,5 +235,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
3 changes: 2 additions & 1 deletion doc/weather-climate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:

- Access of basic datetime components via the ``dt`` accessor (in this case
just "year", "month", "day", "hour", "minute", "second", "microsecond",
"season", "dayofyear", and "dayofweek"):
"season", "dayofyear", "dayofweek", and "days_in_month"):

.. ipython:: python

Expand All @@ -104,6 +104,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports:
da.time.dt.season
da.time.dt.dayofyear
da.time.dt.dayofweek
da.time.dt.days_in_month

- Rounding of datetimes to fixed frequencies via the ``dt`` accessor:

Expand Down
12 changes: 11 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ New Features
By `Todd Jennings <https://github.com/toddrjen>`_
- Allow plotting of boolean arrays. (:pull:`3766`)
By `Marek Jacob <https://github.com/MeraX>`_
- A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to
the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which
returns the days in the month each datetime in the index. Now days in month
weights for both standard and non-standard calendars can be obtained using
the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`). This
feature requires cftime version 1.1.0 or greater. By
`Spencer Clark <https://github.com/spencerkclark>`_.
spencerkclark marked this conversation as resolved.
Show resolved Hide resolved

Bug fixes
~~~~~~~~~
Expand All @@ -69,7 +76,10 @@ Documentation
:py:meth:`DataArray.diff` so it does document the ``dim``
parameter as required. (:issue:`1040`, :pull:`3909`)
By `Justus Magin <https://github.com/keewis>`_.

- Updated :doc:`Calculating Seasonal Averages from Timeseries of Monthly Means
<examples/monthly-means>` example notebook to take advantage of the new
``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`
(:pull:`3935`). By `Spencer Clark <https://github.com/spencerkclark>`_.

Internal Changes
~~~~~~~~~~~~~~~~
Expand Down
3 changes: 3 additions & 0 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ class CFTimeIndex(pd.Index):
"dayofyr", "The ordinal day of year of the datetime", "1.0.2.1"
)
dayofweek = _field_accessor("dayofwk", "The day of week of the datetime", "1.0.2.1")
days_in_month = _field_accessor(
"daysinmonth", "The number of days in the month of the datetime", "1.1.0.0"
)
date_type = property(get_date_type)

def __new__(cls, data, name=None):
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def LooseVersion(vstring):
has_pynio, requires_pynio = _importorskip("Nio")
has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF")
has_cftime, requires_cftime = _importorskip("cftime")
has_cftime_1_1_0, requires_cftime_1_1_0 = _importorskip("cftime", minversion="1.1.0.0")
has_dask, requires_dask = _importorskip("dask")
has_bottleneck, requires_bottleneck = _importorskip("bottleneck")
has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis")
Expand Down
9 changes: 8 additions & 1 deletion xarray/tests/test_cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
)
from xarray.tests import assert_array_equal, assert_identical

from . import raises_regex, requires_cftime
from . import raises_regex, requires_cftime, requires_cftime_1_1_0
from .test_coding_times import (
_ALL_CALENDARS,
_NON_STANDARD_CALENDARS,
Expand Down Expand Up @@ -229,6 +229,13 @@ def test_cftimeindex_dayofweek_accessor(index):
assert_array_equal(result, expected)


@requires_cftime_1_1_0
def test_cftimeindex_days_in_month_accessor(index):
result = index.days_in_month
expected = [date.daysinmonth for date in index]
assert_array_equal(result, expected)


@requires_cftime
@pytest.mark.parametrize(
("string", "date_args", "reso"),
Expand Down