diff --git a/doc/time-series.rst b/doc/time-series.rst index afd9f087bfe..5b857789629 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -70,7 +70,11 @@ You can manual decode arrays in this form by passing a dataset to One unfortunate limitation of using ``datetime64[ns]`` is that it limits the native representation of dates to those that fall between the years 1678 and 2262. When a netCDF file contains dates outside of these bounds, dates will be -returned as arrays of ``netcdftime.datetime`` objects. +returned as arrays of ``cftime.datetime`` objects and a ``CFTimeIndex`` +can be used for indexing. The ``CFTimeIndex`` enables only a subset of +the indexing functionality of a ``pandas.DatetimeIndex`` and is only enabled +when using standalone version of ``cftime`` (not the version packaged with +earlier versions ``netCDF4``). See :ref:`CFTimeIndex` for more information. Datetime indexing ----------------- @@ -207,3 +211,93 @@ Dataset and DataArray objects with an arbitrary number of dimensions. For more examples of using grouped operations on a time dimension, see :ref:`toy weather data`. + + +.. _CFTimeIndex: + +Non-standard calendars and dates outside the Timestamp-valid range +------------------------------------------------------------------ + +Through the standalone ``cftime`` library and a custom subclass of +``pandas.Index``, xarray supports a subset of the indexing functionality enabled +through the standard ``pandas.DatetimeIndex`` for dates from non-standard +calendars or dates using a standard calendar, but outside the +`Timestamp-valid range`_ (approximately between years 1678 and 2262). This +behavior has not yet been turned on by default; to take advantage of this +functionality, you must have the ``enable_cftimeindex`` option set to +``True`` within your context (see :py:func:`~xarray.set_options` for more +information). It is expected that this will become the default behavior in +xarray version 0.11. + +For instance, you can create a DataArray indexed by a time +coordinate with a no-leap calendar within a context manager setting the +``enable_cftimeindex`` option, and the time index will be cast to a +``CFTimeIndex``: + +.. ipython:: python + + from itertools import product + from cftime import DatetimeNoLeap + + dates = [DatetimeNoLeap(year, month, 1) for year, month in + product(range(1, 3), range(1, 13))] + with xr.set_options(enable_cftimeindex=True): + da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], + name='foo') + +.. note:: + + With the ``enable_cftimeindex`` option activated, a ``CFTimeIndex`` + will be used for time indexing if any of the following are true: + + - The dates are from a non-standard calendar + - Any dates are outside the Timestamp-valid range + + Otherwise a ``pandas.DatetimeIndex`` will be used. In addition, if any + variable (not just an index variable) is encoded using a non-standard + calendar, its times will be decoded into ``cftime.datetime`` objects, + regardless of whether or not they can be represented using + ``np.datetime64[ns]`` objects. + +For data indexed by a ``CFTimeIndex`` xarray currently supports: + +- `Partial datetime string indexing`_ using strictly `ISO 8601-format`_ partial + datetime strings: + +.. ipython:: python + + da.sel(time='0001') + da.sel(time=slice('0001-05', '0002-02')) + +- Access of basic datetime components via the ``dt`` accessor (in this case + just "year", "month", "day", "hour", "minute", "second", "microsecond", and + "season"): + +.. ipython:: python + + da.time.dt.year + da.time.dt.month + da.time.dt.season + +- Group-by operations based on datetime accessor attributes (e.g. by month of + the year): + +.. ipython:: python + + da.groupby('time.month').sum() + +- And serialization: + +.. ipython:: python + + da.to_netcdf('example.nc') + xr.open_dataset('example.nc') + +.. note:: + + Currently resampling along the time dimension for data indexed by a + ``CFTimeIndex`` is not supported. + +.. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#timestamp-limitations +.. _ISO 8601-format: https://en.wikipedia.org/wiki/ISO_8601 +.. _partial datetime string indexing: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#partial-string-indexing diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d804fe7b915..7e05112243b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,16 @@ v0.10.4 (unreleased) Enhancements ~~~~~~~~~~~~ +- Add an option for using a ``CFTimeIndex`` for indexing times with + non-standard calendars and/or outside the Timestamp-valid range; this index + enables a subset of the functionality of a standard + ``pandas.DatetimeIndex`` (:issue:`789`, :issue:`1084`, :issue:`1252`). + By `Spencer Clark `_ with help from + `Stephan Hoyer `_. +- Allow for serialization of ``cftime.datetime`` objects (:issue:`789`, + :issue:`1084`, :issue:`2008`, :issue:`1252`) using the standalone ``cftime`` + library. By `Spencer Clark + `_. - Support writing lists of strings as netCDF attributes (:issue:`2044`). By `Dan Nowacki `_. diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py new file mode 100644 index 00000000000..fb51ace5d69 --- /dev/null +++ b/xarray/coding/cftimeindex.py @@ -0,0 +1,252 @@ +from __future__ import absolute_import +import re +from datetime import timedelta + +import numpy as np +import pandas as pd + +from xarray.core import pycompat +from xarray.core.utils import is_scalar + + +def named(name, pattern): + return '(?P<' + name + '>' + pattern + ')' + + +def optional(x): + return '(?:' + x + ')?' + + +def trailing_optional(xs): + if not xs: + return '' + return xs[0] + optional(trailing_optional(xs[1:])) + + +def build_pattern(date_sep='\-', datetime_sep='T', time_sep='\:'): + pieces = [(None, 'year', '\d{4}'), + (date_sep, 'month', '\d{2}'), + (date_sep, 'day', '\d{2}'), + (datetime_sep, 'hour', '\d{2}'), + (time_sep, 'minute', '\d{2}'), + (time_sep, 'second', '\d{2}')] + pattern_list = [] + for sep, name, sub_pattern in pieces: + pattern_list.append((sep if sep else '') + named(name, sub_pattern)) + # TODO: allow timezone offsets? + return '^' + trailing_optional(pattern_list) + '$' + + +_BASIC_PATTERN = build_pattern(date_sep='', time_sep='') +_EXTENDED_PATTERN = build_pattern() +_PATTERNS = [_BASIC_PATTERN, _EXTENDED_PATTERN] + + +def parse_iso8601(datetime_string): + for pattern in _PATTERNS: + match = re.match(pattern, datetime_string) + if match: + return match.groupdict() + raise ValueError('no ISO-8601 match for string: %s' % datetime_string) + + +def _parse_iso8601_with_reso(date_type, timestr): + default = date_type(1, 1, 1) + result = parse_iso8601(timestr) + replace = {} + + for attr in ['year', 'month', 'day', 'hour', 'minute', 'second']: + value = result.get(attr, None) + if value is not None: + # Note ISO8601 conventions allow for fractional seconds. + # TODO: Consider adding support for sub-second resolution? + replace[attr] = int(value) + resolution = attr + + return default.replace(**replace), resolution + + +def _parsed_string_to_bounds(date_type, resolution, parsed): + """Generalization of + pandas.tseries.index.DatetimeIndex._parsed_string_to_bounds + for use with non-standard calendars and cftime.datetime + objects. + """ + if resolution == 'year': + return (date_type(parsed.year, 1, 1), + date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1)) + elif resolution == 'month': + if parsed.month == 12: + end = date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1) + else: + end = (date_type(parsed.year, parsed.month + 1, 1) - + timedelta(microseconds=1)) + return date_type(parsed.year, parsed.month, 1), end + elif resolution == 'day': + start = date_type(parsed.year, parsed.month, parsed.day) + return start, start + timedelta(days=1, microseconds=-1) + elif resolution == 'hour': + start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour) + return start, start + timedelta(hours=1, microseconds=-1) + elif resolution == 'minute': + start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour, + parsed.minute) + return start, start + timedelta(minutes=1, microseconds=-1) + elif resolution == 'second': + start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour, + parsed.minute, parsed.second) + return start, start + timedelta(seconds=1, microseconds=-1) + else: + raise KeyError + + +def get_date_field(datetimes, field): + """Adapted from pandas.tslib.get_date_field""" + return np.array([getattr(date, field) for date in datetimes]) + + +def _field_accessor(name, docstring=None): + """Adapted from pandas.tseries.index._field_accessor""" + def f(self): + return get_date_field(self._data, name) + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + +def get_date_type(self): + return type(self._data[0]) + + +def assert_all_valid_date_type(data): + import cftime + + sample = data[0] + date_type = type(sample) + if not isinstance(sample, cftime.datetime): + raise TypeError( + 'CFTimeIndex requires cftime.datetime ' + 'objects. Got object of {}.'.format(date_type)) + if not all(isinstance(value, date_type) for value in data): + raise TypeError( + 'CFTimeIndex requires using datetime ' + 'objects of all the same type. Got\n{}.'.format(data)) + + +class CFTimeIndex(pd.Index): + year = _field_accessor('year', 'The year of the datetime') + month = _field_accessor('month', 'The month of the datetime') + day = _field_accessor('day', 'The days of the datetime') + hour = _field_accessor('hour', 'The hours of the datetime') + minute = _field_accessor('minute', 'The minutes of the datetime') + second = _field_accessor('second', 'The seconds of the datetime') + microsecond = _field_accessor('microsecond', + 'The microseconds of the datetime') + date_type = property(get_date_type) + + def __new__(cls, data): + result = object.__new__(cls) + assert_all_valid_date_type(data) + result._data = np.array(data) + return result + + def _partial_date_slice(self, resolution, parsed): + """Adapted from + pandas.tseries.index.DatetimeIndex._partial_date_slice + + Note that when using a CFTimeIndex, if a partial-date selection + returns a single element, it will never be converted to a scalar + coordinate; this is in slight contrast to the behavior when using + a DatetimeIndex, which sometimes will return a DataArray with a scalar + coordinate depending on the resolution of the datetimes used in + defining the index. For example: + + >>> from cftime import DatetimeNoLeap + >>> import pandas as pd + >>> import xarray as xr + >>> da = xr.DataArray([1, 2], + coords=[[DatetimeNoLeap(2001, 1, 1), + DatetimeNoLeap(2001, 2, 1)]], + dims=['time']) + >>> da.sel(time='2001-01-01') + + array([1]) + Coordinates: + * time (time) object 2001-01-01 00:00:00 + >>> da = xr.DataArray([1, 2], + coords=[[pd.Timestamp(2001, 1, 1), + pd.Timestamp(2001, 2, 1)]], + dims=['time']) + >>> da.sel(time='2001-01-01') + + array(1) + Coordinates: + time datetime64[ns] 2001-01-01 + >>> da = xr.DataArray([1, 2], + coords=[[pd.Timestamp(2001, 1, 1, 1), + pd.Timestamp(2001, 2, 1)]], + dims=['time']) + >>> da.sel(time='2001-01-01') + + array([1]) + Coordinates: + * time (time) datetime64[ns] 2001-01-01T01:00:00 + """ + start, end = _parsed_string_to_bounds(self.date_type, resolution, + parsed) + lhs_mask = (self._data >= start) + rhs_mask = (self._data <= end) + return (lhs_mask & rhs_mask).nonzero()[0] + + def _get_string_slice(self, key): + """Adapted from pandas.tseries.index.DatetimeIndex._get_string_slice""" + parsed, resolution = _parse_iso8601_with_reso(self.date_type, key) + loc = self._partial_date_slice(resolution, parsed) + return loc + + def get_loc(self, key, method=None, tolerance=None): + """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" + if isinstance(key, pycompat.basestring): + return self._get_string_slice(key) + else: + return pd.Index.get_loc(self, key, method=method, + tolerance=tolerance) + + def _maybe_cast_slice_bound(self, label, side, kind): + """Adapted from + pandas.tseries.index.DatetimeIndex._maybe_cast_slice_bound""" + if isinstance(label, pycompat.basestring): + parsed, resolution = _parse_iso8601_with_reso(self.date_type, + label) + start, end = _parsed_string_to_bounds(self.date_type, resolution, + parsed) + if self.is_monotonic_decreasing and len(self): + return end if side == 'left' else start + return start if side == 'left' else end + else: + return label + + # TODO: Add ability to use integer range outside of iloc? + # e.g. series[1:5]. + def get_value(self, series, key): + """Adapted from pandas.tseries.index.DatetimeIndex.get_value""" + if not isinstance(key, slice): + return series.iloc[self.get_loc(key)] + else: + return series.iloc[self.slice_indexer( + key.start, key.stop, key.step)] + + def __contains__(self, key): + """Adapted from + pandas.tseries.base.DatetimeIndexOpsMixin.__contains__""" + try: + result = self.get_loc(key) + return (is_scalar(result) or type(result) == slice or + (isinstance(result, np.ndarray) and result.size)) + except (KeyError, TypeError, ValueError): + return False + + def contains(self, key): + """Needed for .loc based partial-string indexing""" + return self.__contains__(key) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 0a48b62986e..61314d9cbe6 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -9,8 +9,10 @@ import numpy as np import pandas as pd +from ..core.common import contains_cftime_datetimes from ..core import indexing from ..core.formatting import first_n_items, format_timestamp, last_item +from ..core.options import OPTIONS from ..core.pycompat import PY3 from ..core.variable import Variable from .variables import ( @@ -24,7 +26,7 @@ from pandas.tslib import OutOfBoundsDatetime -# standard calendars recognized by netcdftime +# standard calendars recognized by cftime _STANDARD_CALENDARS = set(['standard', 'gregorian', 'proleptic_gregorian']) _NS_PER_TIME_DELTA = {'us': int(1e3), @@ -54,6 +56,15 @@ def _import_cftime(): return cftime +def _require_standalone_cftime(): + """Raises an ImportError if the standalone cftime is not found""" + try: + import cftime # noqa: F401 + except ImportError: + raise ImportError('Using a CFTimeIndex requires the standalone ' + 'version of the cftime library.') + + def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith('s'): @@ -73,28 +84,41 @@ def _unpack_netcdf_time_units(units): return delta_units, ref_date -def _decode_datetime_with_netcdftime(num_dates, units, calendar): +def _decode_datetime_with_cftime(num_dates, units, calendar, + enable_cftimeindex): cftime = _import_cftime() + if enable_cftimeindex: + _require_standalone_cftime() + dates = np.asarray(cftime.num2date(num_dates, units, calendar, + only_use_cftime_datetimes=True)) + else: + dates = np.asarray(cftime.num2date(num_dates, units, calendar)) - dates = np.asarray(cftime.num2date(num_dates, units, calendar)) if (dates[np.nanargmin(num_dates)].year < 1678 or dates[np.nanargmax(num_dates)].year >= 2262): - warnings.warn('Unable to decode time axis into full ' - 'numpy.datetime64 objects, continuing using dummy ' - 'netcdftime.datetime objects instead, reason: dates out' - ' of range', SerializationWarning, stacklevel=3) + if not enable_cftimeindex or calendar in _STANDARD_CALENDARS: + warnings.warn( + 'Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using dummy ' + 'cftime.datetime objects instead, reason: dates out ' + 'of range', SerializationWarning, stacklevel=3) else: - try: - dates = cftime_to_nptime(dates) - except ValueError as e: - warnings.warn('Unable to decode time axis into full ' - 'numpy.datetime64 objects, continuing using ' - 'dummy netcdftime.datetime objects instead, reason:' - '{0}'.format(e), SerializationWarning, stacklevel=3) + if enable_cftimeindex: + if calendar in _STANDARD_CALENDARS: + dates = cftime_to_nptime(dates) + else: + try: + dates = cftime_to_nptime(dates) + except ValueError as e: + warnings.warn( + 'Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using ' + 'dummy cftime.datetime objects instead, reason:' + '{0}'.format(e), SerializationWarning, stacklevel=3) return dates -def _decode_cf_datetime_dtype(data, units, calendar): +def _decode_cf_datetime_dtype(data, units, calendar, enable_cftimeindex): # Verify that at least the first and last date can be decoded # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. @@ -104,7 +128,8 @@ def _decode_cf_datetime_dtype(data, units, calendar): last_item(values) or [0]]) try: - result = decode_cf_datetime(example_value, units, calendar) + result = decode_cf_datetime(example_value, units, calendar, + enable_cftimeindex) except Exception: calendar_msg = ('the default calendar' if calendar is None else 'calendar %r' % calendar) @@ -120,12 +145,13 @@ def _decode_cf_datetime_dtype(data, units, calendar): return dtype -def decode_cf_datetime(num_dates, units, calendar=None): +def decode_cf_datetime(num_dates, units, calendar=None, + enable_cftimeindex=False): """Given an array of numeric dates in netCDF format, convert it into a numpy array of date time objects. For standard (Gregorian) calendars, this function uses vectorized - operations, which makes it much faster than netcdftime.num2date. In such a + operations, which makes it much faster than cftime.num2date. In such a case, the returned array will be of type np.datetime64. Note that time unit in `units` must not be smaller than microseconds and @@ -133,7 +159,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): See also -------- - netcdftime.num2date + cftime.num2date """ num_dates = np.asarray(num_dates) flat_num_dates = num_dates.ravel() @@ -151,7 +177,7 @@ def decode_cf_datetime(num_dates, units, calendar=None): ref_date = pd.Timestamp(ref_date) except ValueError: # ValueError is raised by pd.Timestamp for non-ISO timestamp - # strings, in which case we fall back to using netcdftime + # strings, in which case we fall back to using cftime raise OutOfBoundsDatetime # fixes: https://github.com/pydata/pandas/issues/14068 @@ -170,8 +196,9 @@ def decode_cf_datetime(num_dates, units, calendar=None): ref_date).values except (OutOfBoundsDatetime, OverflowError): - dates = _decode_datetime_with_netcdftime( - flat_num_dates.astype(np.float), units, calendar) + dates = _decode_datetime_with_cftime( + flat_num_dates.astype(np.float), units, calendar, + enable_cftimeindex) return dates.reshape(num_dates.shape) @@ -203,18 +230,41 @@ def _infer_time_units_from_diff(unique_timedeltas): return 'seconds' +def infer_calendar_name(dates): + """Given an array of datetimes, infer the CF calendar name""" + if np.asarray(dates).dtype == 'datetime64[ns]': + return 'proleptic_gregorian' + else: + return np.asarray(dates).ravel()[0].calendar + + def infer_datetime_units(dates): """Given an array of datetimes, returns a CF compatible time-unit string of the form "{time_unit} since {date[0]}", where `time_unit` is 'days', 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = pd.to_datetime(np.asarray(dates).ravel(), box=False) - dates = dates[pd.notnull(dates)] - unique_timedeltas = np.unique(np.diff(dates)) + dates = np.asarray(dates).ravel() + if np.asarray(dates).dtype == 'datetime64[ns]': + dates = pd.to_datetime(dates, box=False) + dates = dates[pd.notnull(dates)] + reference_date = dates[0] if len(dates) > 0 else '1970-01-01' + reference_date = pd.Timestamp(reference_date) + else: + reference_date = dates[0] if len(dates) > 0 else '1970-01-01' + reference_date = format_cftime_datetime(reference_date) + unique_timedeltas = np.unique(np.diff(dates)).astype('timedelta64[ns]') units = _infer_time_units_from_diff(unique_timedeltas) - reference_date = dates[0] if len(dates) > 0 else '1970-01-01' - return '%s since %s' % (units, pd.Timestamp(reference_date)) + return '%s since %s' % (units, reference_date) + + +def format_cftime_datetime(date): + """Converts a cftime.datetime object to a string with the format: + YYYY-MM-DD HH:MM:SS.UUUUUU + """ + return '{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d}'.format( + date.year, date.month, date.day, date.hour, date.minute, date.second, + date.microsecond) def infer_timedelta_units(deltas): @@ -249,8 +299,8 @@ def _cleanup_netcdf_time_units(units): return units -def _encode_datetime_with_netcdftime(dates, units, calendar): - """Fallback method for encoding dates using netcdftime. +def _encode_datetime_with_cftime(dates, units, calendar): + """Fallback method for encoding dates using cftime. This method is more flexible than xarray's parsing using datetime64[ns] arrays but also slower because it loops over each element. @@ -282,7 +332,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): See also -------- - netcdftime.date2num + cftime.date2num """ dates = np.asarray(dates) @@ -292,12 +342,12 @@ def encode_cf_datetime(dates, units=None, calendar=None): units = _cleanup_netcdf_time_units(units) if calendar is None: - calendar = 'proleptic_gregorian' + calendar = infer_calendar_name(dates) delta, ref_date = _unpack_netcdf_time_units(units) try: if calendar not in _STANDARD_CALENDARS or dates.dtype.kind == 'O': - # parse with netcdftime instead + # parse with cftime instead raise OutOfBoundsDatetime assert dates.dtype == 'datetime64[ns]' @@ -307,7 +357,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): num = (dates - ref_date) / time_delta except (OutOfBoundsDatetime, OverflowError): - num = _encode_datetime_with_netcdftime(dates, units, calendar) + num = _encode_datetime_with_cftime(dates, units, calendar) num = cast_to_int_if_safe(num) return (num, units, calendar) @@ -328,8 +378,8 @@ class CFDatetimeCoder(VariableCoder): def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) - - if np.issubdtype(data.dtype, np.datetime64): + if (np.issubdtype(data.dtype, np.datetime64) or + contains_cftime_datetimes(variable)): (data, units, calendar) = encode_cf_datetime( data, encoding.pop('units', None), @@ -342,12 +392,15 @@ def encode(self, variable, name=None): def decode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_decoding(variable) + enable_cftimeindex = OPTIONS['enable_cftimeindex'] if 'units' in attrs and 'since' in attrs['units']: units = pop_to(attrs, encoding, 'units') calendar = pop_to(attrs, encoding, 'calendar') - dtype = _decode_cf_datetime_dtype(data, units, calendar) + dtype = _decode_cf_datetime_dtype( + data, units, calendar, enable_cftimeindex) transform = partial( - decode_cf_datetime, units=units, calendar=calendar) + decode_cf_datetime, units=units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) data = lazy_elemwise_func(data, transform, dtype) return Variable(dims, data, attrs, encoding) diff --git a/xarray/core/accessors.py b/xarray/core/accessors.py index 52d9e6db408..81af0532d93 100644 --- a/xarray/core/accessors.py +++ b/xarray/core/accessors.py @@ -3,7 +3,7 @@ import numpy as np import pandas as pd -from .dtypes import is_datetime_like +from .common import is_np_datetime_like, _contains_datetime_like_objects from .pycompat import dask_array_type @@ -16,6 +16,20 @@ def _season_from_months(months): return seasons[(months // 3) % 4] +def _access_through_cftimeindex(values, name): + """Coerce an array of datetime-like values to a CFTimeIndex + and access requested datetime component + """ + from ..coding.cftimeindex import CFTimeIndex + values_as_cftimeindex = CFTimeIndex(values.ravel()) + if name == 'season': + months = values_as_cftimeindex.month + field_values = _season_from_months(months) + else: + field_values = getattr(values_as_cftimeindex, name) + return field_values.reshape(values.shape) + + def _access_through_series(values, name): """Coerce an array of datetime-like values to a pandas Series and access requested datetime component @@ -48,12 +62,17 @@ def _get_date_field(values, name, dtype): Array-like of datetime fields accessed for each element in values """ + if is_np_datetime_like(values.dtype): + access_method = _access_through_series + else: + access_method = _access_through_cftimeindex + if isinstance(values, dask_array_type): from dask.array import map_blocks - return map_blocks(_access_through_series, + return map_blocks(access_method, values, name, dtype=dtype) else: - return _access_through_series(values, name) + return access_method(values, name) def _round_series(values, name, freq): @@ -111,15 +130,17 @@ class DatetimeAccessor(object): All of the pandas fields are accessible here. Note that these fields are not calendar-aware; if your datetimes are encoded with a non-Gregorian - calendar (e.g. a 360-day calendar) using netcdftime, then some fields like + calendar (e.g. a 360-day calendar) using cftime, then some fields like `dayofyear` may not be accurate. """ def __init__(self, xarray_obj): - if not is_datetime_like(xarray_obj.dtype): + if not _contains_datetime_like_objects(xarray_obj): raise TypeError("'dt' accessor only available for " - "DataArray with datetime64 or timedelta64 dtype") + "DataArray with datetime64 timedelta64 dtype or " + "for arrays containing cftime datetime " + "objects.") self._obj = xarray_obj def _tslib_field_accessor(name, docstring=None, dtype=None): diff --git a/xarray/core/common.py b/xarray/core/common.py index 5beb5234d4c..fa194554166 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -868,3 +868,34 @@ def ones_like(other, dtype=None): """Shorthand for full_like(other, 1, dtype) """ return full_like(other, 1, dtype) + + +def is_np_datetime_like(dtype): + """Check if a dtype is a subclass of the numpy datetime types + """ + return (np.issubdtype(dtype, np.datetime64) or + np.issubdtype(dtype, np.timedelta64)) + + +def contains_cftime_datetimes(var): + """Check if a variable contains cftime datetime objects""" + try: + from cftime import datetime as cftime_datetime + except ImportError: + return False + else: + if var.dtype == np.dtype('O') and var.data.size > 0: + sample = var.data.ravel()[0] + if isinstance(sample, dask_array_type): + sample = sample.compute() + if isinstance(sample, np.ndarray): + sample = sample.item() + return isinstance(sample, cftime_datetime) + else: + return False + + +def _contains_datetime_like_objects(var): + """Check if a variable contains datetime like objects (either + np.datetime64, np.timedelta64, or cftime.datetime)""" + return is_np_datetime_like(var.dtype) or contains_cftime_datetimes(var) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f28e7980b34..d7a69a8e1a8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -17,7 +17,8 @@ rolling, utils) from .. import conventions from .alignment import align -from .common import DataWithCoords, ImplementsDatasetReduce +from .common import (DataWithCoords, ImplementsDatasetReduce, + _contains_datetime_like_objects) from .coordinates import ( DatasetCoordinates, Indexes, LevelCoordinatesSource, assert_coordinate_consistent, remap_label_indexers) @@ -75,7 +76,7 @@ def _get_virtual_variable(variables, key, level_vars=None, dim_sizes=None): virtual_var = ref_var var_name = key else: - if is_datetime_like(ref_var.dtype): + if _contains_datetime_like_objects(ref_var): ref_var = xr.DataArray(ref_var) data = getattr(ref_var.dt, var_name).data else: diff --git a/xarray/core/options.py b/xarray/core/options.py index b2968a2a02f..48d4567fc99 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -3,6 +3,7 @@ OPTIONS = { 'display_width': 80, 'arithmetic_join': 'inner', + 'enable_cftimeindex': False } @@ -15,6 +16,9 @@ class set_options(object): Default: ``80``. - ``arithmetic_join``: DataArray/Dataset alignment in binary operations. Default: ``'inner'``. + - ``enable_cftimeindex``: flag to enable using a ``CFTimeIndex`` + for time indexes with non-standard calendars or dates outside the + Timestamp-valid range. Default: ``False``. You can use ``set_options`` either as a context manager: diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 25a60b87266..06bb3ede393 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -12,6 +12,7 @@ import numpy as np import pandas as pd +from .options import OPTIONS from .pycompat import ( OrderedDict, basestring, bytes_type, dask_array_type, iteritems) @@ -36,6 +37,21 @@ def wrapper(*args, **kwargs): return wrapper +def _maybe_cast_to_cftimeindex(index): + from ..coding.cftimeindex import CFTimeIndex + + if not OPTIONS['enable_cftimeindex']: + return index + else: + if index.dtype == 'O': + try: + return CFTimeIndex(index) + except (ImportError, TypeError): + return index + else: + return index + + def safe_cast_to_index(array): """Given an array, safely cast it to a pandas.Index. @@ -54,7 +70,7 @@ def safe_cast_to_index(array): if hasattr(array, 'dtype') and array.dtype.kind == 'O': kwargs['dtype'] = object index = pd.Index(np.asarray(array), **kwargs) - return index + return _maybe_cast_to_cftimeindex(index) def multiindex_from_product_levels(levels, names=None): diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 94ddc8c0535..1266dda33af 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -14,6 +14,7 @@ import numpy as np import pandas as pd +from xarray.core.common import contains_cftime_datetimes from xarray.core.pycompat import basestring from .facetgrid import FacetGrid @@ -53,7 +54,8 @@ def _ensure_plottable(*args): if not (_valid_numpy_subdtype(np.array(x), numpy_types) or _valid_other_type(np.array(x), other_types)): raise TypeError('Plotting requires coordinates to be numeric ' - 'or dates.') + 'or dates of type np.datetime64 or ' + 'datetime.datetime.') def _easy_facetgrid(darray, plotfunc, x, y, row=None, col=None, @@ -120,6 +122,10 @@ def plot(darray, row=None, col=None, col_wrap=None, ax=None, rtol=0.01, """ darray = darray.squeeze() + if contains_cftime_datetimes(darray): + raise NotImplementedError('Plotting arrays of cftime.datetime objects ' + 'is currently not possible.') + plot_dims = set(darray.dims) plot_dims.discard(row) plot_dims.discard(col) diff --git a/xarray/tests/test_accessors.py b/xarray/tests/test_accessors.py index ad521546d2e..e1b3a95b942 100644 --- a/xarray/tests/test_accessors.py +++ b/xarray/tests/test_accessors.py @@ -2,11 +2,13 @@ import numpy as np import pandas as pd +import pytest import xarray as xr from . import ( - TestCase, assert_array_equal, assert_equal, raises_regex, requires_dask) + TestCase, assert_array_equal, assert_equal, raises_regex, requires_dask, + has_cftime, has_dask, has_cftime_or_netCDF4) class TestDatetimeAccessor(TestCase): @@ -114,3 +116,117 @@ def test_rounders(self): xdates.time.dt.ceil('D').values) assert_array_equal(dates.round('D').values, xdates.time.dt.round('D').values) + + +_CFTIME_CALENDARS = ['365_day', '360_day', 'julian', 'all_leap', + '366_day', 'gregorian', 'proleptic_gregorian'] +_NT = 100 + + +@pytest.fixture(params=_CFTIME_CALENDARS) +def calendar(request): + return request.param + + +@pytest.fixture() +def times(calendar): + import cftime + + return cftime.num2date( + np.arange(_NT), units='hours since 2000-01-01', calendar=calendar, + only_use_cftime_datetimes=True) + + +@pytest.fixture() +def data(times): + data = np.random.rand(10, 10, _NT) + lons = np.linspace(0, 11, 10) + lats = np.linspace(0, 20, 10) + return xr.DataArray(data, coords=[lons, lats, times], + dims=['lon', 'lat', 'time'], name='data') + + +@pytest.fixture() +def times_3d(times): + lons = np.linspace(0, 11, 10) + lats = np.linspace(0, 20, 10) + times_arr = np.random.choice(times, size=(10, 10, _NT)) + return xr.DataArray(times_arr, coords=[lons, lats, times], + dims=['lon', 'lat', 'time'], + name='data') + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('field', ['year', 'month', 'day', 'hour']) +def test_field_access(data, field): + result = getattr(data.time.dt, field) + expected = xr.DataArray( + getattr(xr.coding.cftimeindex.CFTimeIndex(data.time.values), field), + name=field, coords=data.time.coords, dims=data.time.dims) + + assert_equal(result, expected) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('field', ['year', 'month', 'day', 'hour']) +def test_dask_field_access_1d(data, field): + import dask.array as da + + expected = xr.DataArray( + getattr(xr.coding.cftimeindex.CFTimeIndex(data.time.values), field), + name=field, dims=['time']) + times = xr.DataArray(data.time.values, dims=['time']).chunk({'time': 50}) + result = getattr(times.dt, field) + assert isinstance(result.data, da.Array) + assert result.chunks == times.chunks + assert_equal(result.compute(), expected) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('field', ['year', 'month', 'day', 'hour']) +def test_dask_field_access(times_3d, data, field): + import dask.array as da + + expected = xr.DataArray( + getattr(xr.coding.cftimeindex.CFTimeIndex(times_3d.values.ravel()), + field).reshape(times_3d.shape), + name=field, coords=times_3d.coords, dims=times_3d.dims) + times_3d = times_3d.chunk({'lon': 5, 'lat': 5, 'time': 50}) + result = getattr(times_3d.dt, field) + assert isinstance(result.data, da.Array) + assert result.chunks == times_3d.chunks + assert_equal(result.compute(), expected) + + +@pytest.fixture() +def cftime_date_type(calendar): + from .test_coding_times import _all_cftime_date_types + + return _all_cftime_date_types()[calendar] + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_seasons(cftime_date_type): + dates = np.array([cftime_date_type(2000, month, 15) + for month in range(1, 13)]) + dates = xr.DataArray(dates) + seasons = ['DJF', 'DJF', 'MAM', 'MAM', 'MAM', 'JJA', + 'JJA', 'JJA', 'SON', 'SON', 'SON', 'DJF'] + seasons = xr.DataArray(seasons) + + assert_array_equal(seasons.values, dates.dt.season.values) + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, + reason='cftime or netCDF4 not installed') +def test_dt_accessor_error_netCDF4(cftime_date_type): + da = xr.DataArray( + [cftime_date_type(1, 1, 1), cftime_date_type(2, 1, 1)], + dims=['time']) + if not has_cftime: + with pytest.raises(TypeError): + da.dt.month + else: + da.dt.month diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 7f8a440ba5d..7a21502f479 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -32,7 +32,8 @@ assert_identical, has_dask, has_netCDF4, has_scipy, network, raises_regex, requires_dask, requires_h5netcdf, requires_netCDF4, requires_pathlib, requires_pydap, requires_pynio, requires_rasterio, requires_scipy, - requires_scipy_or_netCDF4, requires_zarr) + requires_scipy_or_netCDF4, requires_zarr, + requires_cftime) from .test_dataset import create_test_data try: @@ -341,7 +342,7 @@ def test_roundtrip_string_encoded_characters(self): assert_identical(expected, actual) self.assertEqual(actual['x'].encoding['_Encoding'], 'ascii') - def test_roundtrip_datetime_data(self): + def test_roundtrip_numpy_datetime_data(self): times = pd.to_datetime(['2000-01-01', '2000-01-02', 'NaT']) expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 1950-01-01'}}} @@ -349,6 +350,35 @@ def test_roundtrip_datetime_data(self): assert_identical(expected, actual) assert actual.t0.encoding['units'] == 'days since 1950-01-01' + @requires_cftime + def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): + from .test_coding_times import _all_cftime_date_types + + date_types = _all_cftime_date_types() + for date_type in date_types.values(): + times = [date_type(1, 1, 1), date_type(1, 1, 2)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1, 1, 1)]) + expected_calendar = times[0].calendar + + with xr.set_options(enable_cftimeindex=True): + with self.roundtrip(expected, save_kwargs=kwds) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + assert (abs_diff <= np.timedelta64(1, 's')).all() + assert (actual.t.encoding['units'] == + 'days since 0001-01-01 00:00:00.000000') + assert (actual.t.encoding['calendar'] == + expected_calendar) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + assert (abs_diff <= np.timedelta64(1, 's')).all() + assert (actual.t0.encoding['units'] == + 'days since 0001-01-01') + assert (actual.t.encoding['calendar'] == + expected_calendar) + def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) expected = Dataset({'td': ('td', time_deltas), 'td0': time_deltas[0]}) @@ -1871,7 +1901,7 @@ def test_roundtrip_string_encoded_characters(self): def test_roundtrip_coordinates_with_space(self): pass - def test_roundtrip_datetime_data(self): + def test_roundtrip_numpy_datetime_data(self): # Override method in DatasetIOTestCases - remove not applicable # save_kwds times = pd.to_datetime(['2000-01-01', '2000-01-02', 'NaT']) @@ -1879,6 +1909,46 @@ def test_roundtrip_datetime_data(self): with self.roundtrip(expected) as actual: assert_identical(expected, actual) + def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): + # Override method in DatasetIOTestCases - remove not applicable + # save_kwds + from .test_coding_times import _all_cftime_date_types + + date_types = _all_cftime_date_types() + for date_type in date_types.values(): + times = [date_type(1, 1, 1), date_type(1, 1, 2)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1, 1, 1)]) + + with xr.set_options(enable_cftimeindex=True): + with self.roundtrip(expected) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + def test_roundtrip_cftime_datetime_data_disable_cftimeindex(self): + # Override method in DatasetIOTestCases - remove not applicable + # save_kwds + from .test_coding_times import _all_cftime_date_types + + date_types = _all_cftime_date_types() + for date_type in date_types.values(): + times = [date_type(1, 1, 1), date_type(1, 1, 2)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1, 1, 1)]) + + with xr.set_options(enable_cftimeindex=False): + with self.roundtrip(expected) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + def test_write_store(self): # Override method in DatasetIOTestCases - not applicable to dask pass diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py new file mode 100644 index 00000000000..c78ac038bd5 --- /dev/null +++ b/xarray/tests/test_cftimeindex.py @@ -0,0 +1,555 @@ +from __future__ import absolute_import + +import pytest + +import pandas as pd +import xarray as xr + +from datetime import timedelta +from xarray.coding.cftimeindex import ( + parse_iso8601, CFTimeIndex, assert_all_valid_date_type, + _parsed_string_to_bounds, _parse_iso8601_with_reso) +from xarray.tests import assert_array_equal, assert_identical + +from . import has_cftime, has_cftime_or_netCDF4 +from .test_coding_times import _all_cftime_date_types + + +def date_dict(year=None, month=None, day=None, + hour=None, minute=None, second=None): + return dict(year=year, month=month, day=day, hour=hour, + minute=minute, second=second) + + +ISO8601_STRING_TESTS = { + 'year': ('1999', date_dict(year='1999')), + 'month': ('199901', date_dict(year='1999', month='01')), + 'month-dash': ('1999-01', date_dict(year='1999', month='01')), + 'day': ('19990101', date_dict(year='1999', month='01', day='01')), + 'day-dash': ('1999-01-01', date_dict(year='1999', month='01', day='01')), + 'hour': ('19990101T12', date_dict( + year='1999', month='01', day='01', hour='12')), + 'hour-dash': ('1999-01-01T12', date_dict( + year='1999', month='01', day='01', hour='12')), + 'minute': ('19990101T1234', date_dict( + year='1999', month='01', day='01', hour='12', minute='34')), + 'minute-dash': ('1999-01-01T12:34', date_dict( + year='1999', month='01', day='01', hour='12', minute='34')), + 'second': ('19990101T123456', date_dict( + year='1999', month='01', day='01', hour='12', minute='34', + second='56')), + 'second-dash': ('1999-01-01T12:34:56', date_dict( + year='1999', month='01', day='01', hour='12', minute='34', + second='56')) +} + + +@pytest.mark.parametrize(('string', 'expected'), + list(ISO8601_STRING_TESTS.values()), + ids=list(ISO8601_STRING_TESTS.keys())) +def test_parse_iso8601(string, expected): + result = parse_iso8601(string) + assert result == expected + + with pytest.raises(ValueError): + parse_iso8601(string + '3') + parse_iso8601(string + '.3') + + +_CFTIME_CALENDARS = ['365_day', '360_day', 'julian', 'all_leap', + '366_day', 'gregorian', 'proleptic_gregorian'] + + +@pytest.fixture(params=_CFTIME_CALENDARS) +def date_type(request): + return _all_cftime_date_types()[request.param] + + +@pytest.fixture +def index(date_type): + dates = [date_type(1, 1, 1), date_type(1, 2, 1), + date_type(2, 1, 1), date_type(2, 2, 1)] + return CFTimeIndex(dates) + + +@pytest.fixture +def monotonic_decreasing_index(date_type): + dates = [date_type(2, 2, 1), date_type(2, 1, 1), + date_type(1, 2, 1), date_type(1, 1, 1)] + return CFTimeIndex(dates) + + +@pytest.fixture +def da(index): + return xr.DataArray([1, 2, 3, 4], coords=[index], + dims=['time']) + + +@pytest.fixture +def series(index): + return pd.Series([1, 2, 3, 4], index=index) + + +@pytest.fixture +def df(index): + return pd.DataFrame([1, 2, 3, 4], index=index) + + +@pytest.fixture +def feb_days(date_type): + import cftime + if date_type is cftime.DatetimeAllLeap: + return 29 + elif date_type is cftime.Datetime360Day: + return 30 + else: + return 28 + + +@pytest.fixture +def dec_days(date_type): + import cftime + if date_type is cftime.Datetime360Day: + return 30 + else: + return 31 + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_assert_all_valid_date_type(date_type, index): + import cftime + if date_type is cftime.DatetimeNoLeap: + mixed_date_types = [date_type(1, 1, 1), + cftime.DatetimeAllLeap(1, 2, 1)] + else: + mixed_date_types = [date_type(1, 1, 1), + cftime.DatetimeNoLeap(1, 2, 1)] + with pytest.raises(TypeError): + assert_all_valid_date_type(mixed_date_types) + + with pytest.raises(TypeError): + assert_all_valid_date_type([1, date_type(1, 1, 1)]) + + assert_all_valid_date_type([date_type(1, 1, 1), date_type(1, 2, 1)]) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize(('field', 'expected'), [ + ('year', [1, 1, 2, 2]), + ('month', [1, 2, 1, 2]), + ('day', [1, 1, 1, 1]), + ('hour', [0, 0, 0, 0]), + ('minute', [0, 0, 0, 0]), + ('second', [0, 0, 0, 0]), + ('microsecond', [0, 0, 0, 0])]) +def test_cftimeindex_field_accessors(index, field, expected): + result = getattr(index, field) + assert_array_equal(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize(('string', 'date_args', 'reso'), [ + ('1999', (1999, 1, 1), 'year'), + ('199902', (1999, 2, 1), 'month'), + ('19990202', (1999, 2, 2), 'day'), + ('19990202T01', (1999, 2, 2, 1), 'hour'), + ('19990202T0101', (1999, 2, 2, 1, 1), 'minute'), + ('19990202T010156', (1999, 2, 2, 1, 1, 56), 'second')]) +def test_parse_iso8601_with_reso(date_type, string, date_args, reso): + expected_date = date_type(*date_args) + expected_reso = reso + result_date, result_reso = _parse_iso8601_with_reso(date_type, string) + assert result_date == expected_date + assert result_reso == expected_reso + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_parse_string_to_bounds_year(date_type, dec_days): + parsed = date_type(2, 2, 10, 6, 2, 8, 1) + expected_start = date_type(2, 1, 1) + expected_end = date_type(2, 12, dec_days, 23, 59, 59, 999999) + result_start, result_end = _parsed_string_to_bounds( + date_type, 'year', parsed) + assert result_start == expected_start + assert result_end == expected_end + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_parse_string_to_bounds_month_feb(date_type, feb_days): + parsed = date_type(2, 2, 10, 6, 2, 8, 1) + expected_start = date_type(2, 2, 1) + expected_end = date_type(2, 2, feb_days, 23, 59, 59, 999999) + result_start, result_end = _parsed_string_to_bounds( + date_type, 'month', parsed) + assert result_start == expected_start + assert result_end == expected_end + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_parse_string_to_bounds_month_dec(date_type, dec_days): + parsed = date_type(2, 12, 1) + expected_start = date_type(2, 12, 1) + expected_end = date_type(2, 12, dec_days, 23, 59, 59, 999999) + result_start, result_end = _parsed_string_to_bounds( + date_type, 'month', parsed) + assert result_start == expected_start + assert result_end == expected_end + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize(('reso', 'ex_start_args', 'ex_end_args'), [ + ('day', (2, 2, 10), (2, 2, 10, 23, 59, 59, 999999)), + ('hour', (2, 2, 10, 6), (2, 2, 10, 6, 59, 59, 999999)), + ('minute', (2, 2, 10, 6, 2), (2, 2, 10, 6, 2, 59, 999999)), + ('second', (2, 2, 10, 6, 2, 8), (2, 2, 10, 6, 2, 8, 999999))]) +def test_parsed_string_to_bounds_sub_monthly(date_type, reso, + ex_start_args, ex_end_args): + parsed = date_type(2, 2, 10, 6, 2, 8, 123456) + expected_start = date_type(*ex_start_args) + expected_end = date_type(*ex_end_args) + + result_start, result_end = _parsed_string_to_bounds( + date_type, reso, parsed) + assert result_start == expected_start + assert result_end == expected_end + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_parsed_string_to_bounds_raises(date_type): + with pytest.raises(KeyError): + _parsed_string_to_bounds(date_type, 'a', date_type(1, 1, 1)) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_get_loc(date_type, index): + result = index.get_loc('0001') + expected = [0, 1] + assert_array_equal(result, expected) + + result = index.get_loc(date_type(1, 2, 1)) + expected = 1 + assert result == expected + + result = index.get_loc('0001-02-01') + expected = 1 + assert result == expected + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('kind', ['loc', 'getitem']) +def test_get_slice_bound(date_type, index, kind): + result = index.get_slice_bound('0001', 'left', kind) + expected = 0 + assert result == expected + + result = index.get_slice_bound('0001', 'right', kind) + expected = 2 + assert result == expected + + result = index.get_slice_bound( + date_type(1, 3, 1), 'left', kind) + expected = 2 + assert result == expected + + result = index.get_slice_bound( + date_type(1, 3, 1), 'right', kind) + expected = 2 + assert result == expected + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('kind', ['loc', 'getitem']) +def test_get_slice_bound_decreasing_index( + date_type, monotonic_decreasing_index, kind): + result = monotonic_decreasing_index.get_slice_bound('0001', 'left', kind) + expected = 2 + assert result == expected + + result = monotonic_decreasing_index.get_slice_bound('0001', 'right', kind) + expected = 4 + assert result == expected + + result = monotonic_decreasing_index.get_slice_bound( + date_type(1, 3, 1), 'left', kind) + expected = 2 + assert result == expected + + result = monotonic_decreasing_index.get_slice_bound( + date_type(1, 3, 1), 'right', kind) + expected = 2 + assert result == expected + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_date_type_property(date_type, index): + assert index.date_type is date_type + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains(date_type, index): + assert '0001-01-01' in index + assert '0001' in index + assert '0003' not in index + assert date_type(1, 1, 1) in index + assert date_type(3, 1, 1) not in index + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_groupby(da): + result = da.groupby('time.month').sum('time') + expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=['month']) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_resample_error(da): + with pytest.raises(TypeError): + da.resample(time='Y') + + +SEL_STRING_OR_LIST_TESTS = { + 'string': '0001', + 'string-slice': slice('0001-01-01', '0001-12-30'), + 'bool-list': [True, True, False, False] +} + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_arg', list(SEL_STRING_OR_LIST_TESTS.values()), + ids=list(SEL_STRING_OR_LIST_TESTS.keys())) +def test_sel_string_or_list(da, index, sel_arg): + expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) + result = da.sel(time=sel_arg) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_sel_date_slice_or_list(da, index, date_type): + expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) + result = da.sel(time=slice(date_type(1, 1, 1), date_type(1, 12, 30))) + assert_identical(result, expected) + + result = da.sel(time=[date_type(1, 1, 1), date_type(1, 2, 1)]) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_sel_date_scalar(da, date_type, index): + expected = xr.DataArray(1).assign_coords(time=index[0]) + result = da.sel(time=date_type(1, 1, 1)) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'nearest'}, + {'method': 'nearest', 'tolerance': timedelta(days=70)} +]) +def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): + expected = xr.DataArray(2).assign_coords(time=index[1]) + result = da.sel(time=date_type(1, 4, 1), **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray(3).assign_coords(time=index[2]) + result = da.sel(time=date_type(1, 11, 1), **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'pad'}, + {'method': 'pad', 'tolerance': timedelta(days=365)} +]) +def test_sel_date_scalar_pad(da, date_type, index, sel_kwargs): + expected = xr.DataArray(2).assign_coords(time=index[1]) + result = da.sel(time=date_type(1, 4, 1), **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray(2).assign_coords(time=index[1]) + result = da.sel(time=date_type(1, 11, 1), **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'backfill'}, + {'method': 'backfill', 'tolerance': timedelta(days=365)} +]) +def test_sel_date_scalar_backfill(da, date_type, index, sel_kwargs): + expected = xr.DataArray(3).assign_coords(time=index[2]) + result = da.sel(time=date_type(1, 4, 1), **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray(3).assign_coords(time=index[2]) + result = da.sel(time=date_type(1, 11, 1), **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'pad', 'tolerance': timedelta(days=20)}, + {'method': 'backfill', 'tolerance': timedelta(days=20)}, + {'method': 'nearest', 'tolerance': timedelta(days=20)}, +]) +def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): + with pytest.raises(KeyError): + da.sel(time=date_type(1, 5, 1), **sel_kwargs) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'nearest'}, + {'method': 'nearest', 'tolerance': timedelta(days=70)} +]) +def test_sel_date_list_nearest(da, date_type, index, sel_kwargs): + expected = xr.DataArray( + [2, 2], coords=[[index[1], index[1]]], dims=['time']) + result = da.sel( + time=[date_type(1, 3, 1), date_type(1, 4, 1)], **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray( + [2, 3], coords=[[index[1], index[2]]], dims=['time']) + result = da.sel( + time=[date_type(1, 3, 1), date_type(1, 12, 1)], **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray( + [3, 3], coords=[[index[2], index[2]]], dims=['time']) + result = da.sel( + time=[date_type(1, 11, 1), date_type(1, 12, 1)], **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'pad'}, + {'method': 'pad', 'tolerance': timedelta(days=365)} +]) +def test_sel_date_list_pad(da, date_type, index, sel_kwargs): + expected = xr.DataArray( + [2, 2], coords=[[index[1], index[1]]], dims=['time']) + result = da.sel( + time=[date_type(1, 3, 1), date_type(1, 4, 1)], **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'backfill'}, + {'method': 'backfill', 'tolerance': timedelta(days=365)} +]) +def test_sel_date_list_backfill(da, date_type, index, sel_kwargs): + expected = xr.DataArray( + [3, 3], coords=[[index[2], index[2]]], dims=['time']) + result = da.sel( + time=[date_type(1, 3, 1), date_type(1, 4, 1)], **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'pad', 'tolerance': timedelta(days=20)}, + {'method': 'backfill', 'tolerance': timedelta(days=20)}, + {'method': 'nearest', 'tolerance': timedelta(days=20)}, +]) +def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): + with pytest.raises(KeyError): + da.sel(time=[date_type(1, 2, 1), date_type(1, 5, 1)], **sel_kwargs) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_isel(da, index): + expected = xr.DataArray(1).assign_coords(time=index[0]) + result = da.isel(time=0) + assert_identical(result, expected) + + expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) + result = da.isel(time=[0, 1]) + assert_identical(result, expected) + + +@pytest.fixture +def scalar_args(date_type): + return [date_type(1, 1, 1)] + + +@pytest.fixture +def range_args(date_type): + return ['0001', slice('0001-01-01', '0001-12-30'), + slice(None, '0001-12-30'), + slice(date_type(1, 1, 1), date_type(1, 12, 30)), + slice(None, date_type(1, 12, 30))] + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_indexing_in_series_getitem(series, index, scalar_args, range_args): + for arg in scalar_args: + assert series[arg] == 1 + + expected = pd.Series([1, 2], index=index[:2]) + for arg in range_args: + assert series[arg].equals(expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_indexing_in_series_loc(series, index, scalar_args, range_args): + for arg in scalar_args: + assert series.loc[arg] == 1 + + expected = pd.Series([1, 2], index=index[:2]) + for arg in range_args: + assert series.loc[arg].equals(expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_indexing_in_series_iloc(series, index): + expected = 1 + assert series.iloc[0] == expected + + expected = pd.Series([1, 2], index=index[:2]) + assert series.iloc[:2].equals(expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): + expected = pd.Series([1], name=index[0]) + for arg in scalar_args: + result = df.loc[arg] + assert result.equals(expected) + + expected = pd.DataFrame([1, 2], index=index[:2]) + for arg in range_args: + result = df.loc[arg] + assert result.equals(expected) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_indexing_in_dataframe_iloc(df, index): + expected = pd.Series([1], name=index[0]) + result = df.iloc[0] + assert result.equals(expected) + assert result.equals(expected) + + expected = pd.DataFrame([1, 2], index=index[:2]) + result = df.iloc[:2] + assert result.equals(expected) + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize('enable_cftimeindex', [False, True]) +def test_concat_cftimeindex(date_type, enable_cftimeindex): + with xr.set_options(enable_cftimeindex=enable_cftimeindex): + da1 = xr.DataArray( + [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], + dims=['time']) + da2 = xr.DataArray( + [3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]], + dims=['time']) + da = xr.concat([da1, da2], dim='time') + + if enable_cftimeindex and has_cftime: + assert isinstance(da.indexes['time'], CFTimeIndex) + else: + assert isinstance(da.indexes['time'], pd.Index) + assert not isinstance(da.indexes['time'], CFTimeIndex) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 7e69d4b3ff2..7c1e869f772 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1,15 +1,55 @@ from __future__ import absolute_import, division, print_function +from itertools import product import warnings import numpy as np import pandas as pd import pytest -from xarray import Variable, coding +from xarray import Variable, coding, set_options, DataArray, decode_cf from xarray.coding.times import _import_cftime - -from . import TestCase, assert_array_equal, requires_cftime_or_netCDF4 +from xarray.coding.variables import SerializationWarning +from xarray.core.common import contains_cftime_datetimes + +from . import (assert_array_equal, has_cftime_or_netCDF4, + requires_cftime_or_netCDF4, has_cftime, has_dask) + + +_NON_STANDARD_CALENDARS = {'noleap', '365_day', '360_day', + 'julian', 'all_leap', '366_day'} +_ALL_CALENDARS = _NON_STANDARD_CALENDARS.union( + coding.times._STANDARD_CALENDARS) +_CF_DATETIME_NUM_DATES_UNITS = [ + (np.arange(10), 'days since 2000-01-01'), + (np.arange(10).astype('float64'), 'days since 2000-01-01'), + (np.arange(10).astype('float32'), 'days since 2000-01-01'), + (np.arange(10).reshape(2, 5), 'days since 2000-01-01'), + (12300 + np.arange(5), 'hours since 1680-01-01 00:00:00'), + # here we add a couple minor formatting errors to test + # the robustness of the parsing algorithm. + (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), + (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), + (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), + (10, 'days since 2000-01-01'), + ([10], 'daYs since 2000-01-01'), + ([[10]], 'days since 2000-01-01'), + ([10, 10], 'days since 2000-01-01'), + (np.array(10), 'days since 2000-01-01'), + (0, 'days since 1000-01-01'), + ([0], 'days since 1000-01-01'), + ([[0]], 'days since 1000-01-01'), + (np.arange(2), 'days since 1000-01-01'), + (np.arange(0, 100000, 20000), 'days since 1900-01-01'), + (17093352.0, 'hours since 1-1-1 00:00:0.0'), + ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), + (0, 'milliseconds since 2000-01-01T00:00:00'), + (0, 'microseconds since 2000-01-01T00:00:00'), + (np.int32(788961600), 'seconds since 1981-01-01') # GH2002 +] +_CF_DATETIME_TESTS = [num_dates_units + (calendar,) for num_dates_units, + calendar in product(_CF_DATETIME_NUM_DATES_UNITS, + coding.times._STANDARD_CALENDARS)] @np.vectorize @@ -20,309 +60,698 @@ def _ensure_naive_tz(dt): return dt -class TestDatetime(TestCase): - @requires_cftime_or_netCDF4 - def test_cf_datetime(self): - cftime = _import_cftime() - for num_dates, units in [ - (np.arange(10), 'days since 2000-01-01'), - (np.arange(10).astype('float64'), 'days since 2000-01-01'), - (np.arange(10).astype('float32'), 'days since 2000-01-01'), - (np.arange(10).reshape(2, 5), 'days since 2000-01-01'), - (12300 + np.arange(5), 'hours since 1680-01-01 00:00:00'), - # here we add a couple minor formatting errors to test - # the robustness of the parsing algorithm. - (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), - (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), - (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), - (10, 'days since 2000-01-01'), - ([10], 'daYs since 2000-01-01'), - ([[10]], 'days since 2000-01-01'), - ([10, 10], 'days since 2000-01-01'), - (np.array(10), 'days since 2000-01-01'), - (0, 'days since 1000-01-01'), - ([0], 'days since 1000-01-01'), - ([[0]], 'days since 1000-01-01'), - (np.arange(2), 'days since 1000-01-01'), - (np.arange(0, 100000, 20000), 'days since 1900-01-01'), - (17093352.0, 'hours since 1-1-1 00:00:0.0'), - ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), - (0, 'milliseconds since 2000-01-01T00:00:00'), - (0, 'microseconds since 2000-01-01T00:00:00'), - (np.int32(788961600), 'seconds since 1981-01-01'), # GH2002 - ]: - for calendar in ['standard', 'gregorian', 'proleptic_gregorian']: - expected = _ensure_naive_tz( - cftime.num2date(num_dates, units, calendar)) - print(num_dates, units, calendar) - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(num_dates, units, - calendar) - if (isinstance(actual, np.ndarray) and - np.issubdtype(actual.dtype, np.datetime64)): - # self.assertEqual(actual.dtype.kind, 'M') - # For some reason, numpy 1.8 does not compare ns precision - # datetime64 arrays as equal to arrays of datetime objects, - # but it works for us precision. Thus, convert to us - # precision for the actual array equal comparison... - actual_cmp = actual.astype('M8[us]') - else: - actual_cmp = actual - assert_array_equal(expected, actual_cmp) - encoded, _, _ = coding.times.encode_cf_datetime(actual, units, - calendar) - if '1-1-1' not in units: - # pandas parses this date very strangely, so the original - # units/encoding cannot be preserved in this case: - # (Pdb) pd.to_datetime('1-1-1 00:00:0.0') - # Timestamp('2001-01-01 00:00:00') - assert_array_equal(num_dates, np.around(encoded, 1)) - if (hasattr(num_dates, 'ndim') and num_dates.ndim == 1 and - '1000' not in units): - # verify that wrapping with a pandas.Index works - # note that it *does not* currently work to even put - # non-datetime64 compatible dates into a pandas.Index - encoded, _, _ = coding.times.encode_cf_datetime( - pd.Index(actual), units, calendar) - assert_array_equal(num_dates, np.around(encoded, 1)) - - @requires_cftime_or_netCDF4 - def test_decode_cf_datetime_overflow(self): - # checks for - # https://github.com/pydata/pandas/issues/14068 - # https://github.com/pydata/xarray/issues/975 - - from datetime import datetime - units = 'days since 2000-01-01 00:00:00' - - # date after 2262 and before 1678 - days = (-117608, 95795) - expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) - - for i, day in enumerate(days): - result = coding.times.decode_cf_datetime(day, units) - assert result == expected[i] - - def test_decode_cf_datetime_non_standard_units(self): - expected = pd.date_range(periods=100, start='1970-01-01', freq='h') - # netCDFs from madis.noaa.gov use this format for their time units - # they cannot be parsed by netcdftime, but pd.Timestamp works - units = 'hours since 1-1-1970' - actual = coding.times.decode_cf_datetime(np.arange(100), units) +def _all_cftime_date_types(): + try: + import cftime + except ImportError: + import netcdftime as cftime + return {'noleap': cftime.DatetimeNoLeap, + '365_day': cftime.DatetimeNoLeap, + '360_day': cftime.Datetime360Day, + 'julian': cftime.DatetimeJulian, + 'all_leap': cftime.DatetimeAllLeap, + '366_day': cftime.DatetimeAllLeap, + 'gregorian': cftime.DatetimeGregorian, + 'proleptic_gregorian': cftime.DatetimeProlepticGregorian} + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize(['num_dates', 'units', 'calendar'], + _CF_DATETIME_TESTS) +def test_cf_datetime(num_dates, units, calendar): + cftime = _import_cftime() + expected = _ensure_naive_tz( + cftime.num2date(num_dates, units, calendar)) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime(num_dates, units, + calendar) + if (isinstance(actual, np.ndarray) and + np.issubdtype(actual.dtype, np.datetime64)): + # self.assertEqual(actual.dtype.kind, 'M') + # For some reason, numpy 1.8 does not compare ns precision + # datetime64 arrays as equal to arrays of datetime objects, + # but it works for us precision. Thus, convert to us + # precision for the actual array equal comparison... + actual_cmp = actual.astype('M8[us]') + else: + actual_cmp = actual + assert_array_equal(expected, actual_cmp) + encoded, _, _ = coding.times.encode_cf_datetime(actual, units, + calendar) + if '1-1-1' not in units: + # pandas parses this date very strangely, so the original + # units/encoding cannot be preserved in this case: + # (Pdb) pd.to_datetime('1-1-1 00:00:0.0') + # Timestamp('2001-01-01 00:00:00') + assert_array_equal(num_dates, np.around(encoded, 1)) + if (hasattr(num_dates, 'ndim') and num_dates.ndim == 1 and + '1000' not in units): + # verify that wrapping with a pandas.Index works + # note that it *does not* currently work to even put + # non-datetime64 compatible dates into a pandas.Index + encoded, _, _ = coding.times.encode_cf_datetime( + pd.Index(actual), units, calendar) + assert_array_equal(num_dates, np.around(encoded, 1)) + + +@requires_cftime_or_netCDF4 +def test_decode_cf_datetime_overflow(): + # checks for + # https://github.com/pydata/pandas/issues/14068 + # https://github.com/pydata/xarray/issues/975 + + from datetime import datetime + units = 'days since 2000-01-01 00:00:00' + + # date after 2262 and before 1678 + days = (-117608, 95795) + expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) + + for i, day in enumerate(days): + result = coding.times.decode_cf_datetime(day, units) + assert result == expected[i] + + +def test_decode_cf_datetime_non_standard_units(): + expected = pd.date_range(periods=100, start='1970-01-01', freq='h') + # netCDFs from madis.noaa.gov use this format for their time units + # they cannot be parsed by cftime, but pd.Timestamp works + units = 'hours since 1-1-1970' + actual = coding.times.decode_cf_datetime(np.arange(100), units) + assert_array_equal(actual, expected) + + +@requires_cftime_or_netCDF4 +def test_decode_cf_datetime_non_iso_strings(): + # datetime strings that are _almost_ ISO compliant but not quite, + # but which netCDF4.num2date can still parse correctly + expected = pd.date_range(periods=100, start='2000-01-01', freq='h') + cases = [(np.arange(100), 'hours since 2000-01-01 0'), + (np.arange(100), 'hours since 2000-1-1 0'), + (np.arange(100), 'hours since 2000-01-01 0:00')] + for num_dates, units in cases: + actual = coding.times.decode_cf_datetime(num_dates, units) assert_array_equal(actual, expected) - @requires_cftime_or_netCDF4 - def test_decode_cf_datetime_non_iso_strings(self): - # datetime strings that are _almost_ ISO compliant but not quite, - # but which netCDF4.num2date can still parse correctly - expected = pd.date_range(periods=100, start='2000-01-01', freq='h') - cases = [(np.arange(100), 'hours since 2000-01-01 0'), - (np.arange(100), 'hours since 2000-1-1 0'), - (np.arange(100), 'hours since 2000-01-01 0:00')] - for num_dates, units in cases: - actual = coding.times.decode_cf_datetime(num_dates, units) - assert_array_equal(actual, expected) - - @requires_cftime_or_netCDF4 - def test_decode_non_standard_calendar(self): - cftime = _import_cftime() - - for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', - '366_day']: - units = 'days since 0001-01-01' - times = pd.date_range('2001-04-01-00', end='2001-04-30-23', - freq='H') - noleap_time = cftime.date2num(times.to_pydatetime(), units, - calendar=calendar) - expected = times.values - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(noleap_time, units, - calendar=calendar) - assert actual.dtype == np.dtype('M8[ns]') - abs_diff = abs(actual - expected) - # once we no longer support versions of netCDF4 older than 1.1.5, - # we could do this check with near microsecond accuracy: - # https://github.com/Unidata/netcdf4-python/issues/355 - assert (abs_diff <= np.timedelta64(1, 's')).all() - - @requires_cftime_or_netCDF4 - def test_decode_non_standard_calendar_single_element(self): - units = 'days since 0001-01-01' - for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', - '366_day']: - for num_time in [735368, [735368], [[735368]]]: - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(num_time, units, - calendar=calendar) - assert actual.dtype == np.dtype('M8[ns]') - - @requires_cftime_or_netCDF4 - def test_decode_non_standard_calendar_single_element_fallback(self): - cftime = _import_cftime() - - units = 'days since 0001-01-01' - try: - dt = cftime.netcdftime.datetime(2001, 2, 29) - except AttributeError: - # Must be using standalone netcdftime library - dt = cftime.datetime(2001, 2, 29) - for calendar in ['360_day', 'all_leap', '366_day']: - num_time = cftime.date2num(dt, units, calendar) - with pytest.warns(Warning, match='Unable to decode time axis'): - actual = coding.times.decode_cf_datetime(num_time, units, - calendar=calendar) - expected = np.asarray(cftime.num2date(num_time, units, calendar)) - assert actual.dtype == np.dtype('O') - assert expected == actual - - @requires_cftime_or_netCDF4 - def test_decode_non_standard_calendar_multidim_time(self): - cftime = _import_cftime() - - calendar = 'noleap' - units = 'days since 0001-01-01' - times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') - times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') - noleap_time1 = cftime.date2num(times1.to_pydatetime(), units, - calendar=calendar) - noleap_time2 = cftime.date2num(times2.to_pydatetime(), units, - calendar=calendar) - mdim_time = np.empty((len(noleap_time1), 2), ) - mdim_time[:, 0] = noleap_time1 - mdim_time[:, 1] = noleap_time2 - expected1 = times1.values - expected2 = times2.values +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(coding.times._STANDARD_CALENDARS, [False, True])) +def test_decode_standard_calendar_inside_timestamp_range( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + units = 'days since 0001-01-01' + times = pd.date_range('2001-04-01-00', end='2001-04-30-23', + freq='H') + noleap_time = cftime.date2num(times.to_pydatetime(), units, + calendar=calendar) + expected = times.values + expected_dtype = np.dtype('M8[ns]') + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime( + noleap_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + assert actual.dtype == expected_dtype + abs_diff = abs(actual - expected) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(_NON_STANDARD_CALENDARS, [False, True])) +def test_decode_non_standard_calendar_inside_timestamp_range( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + units = 'days since 0001-01-01' + times = pd.date_range('2001-04-01-00', end='2001-04-30-23', + freq='H') + noleap_time = cftime.date2num(times.to_pydatetime(), units, + calendar=calendar) + if enable_cftimeindex: + expected = cftime.num2date(noleap_time, units, calendar=calendar) + expected_dtype = np.dtype('O') + else: + expected = times.values + expected_dtype = np.dtype('M8[ns]') + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime( + noleap_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + assert actual.dtype == expected_dtype + abs_diff = abs(actual - expected) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(_ALL_CALENDARS, [False, True])) +def test_decode_dates_outside_timestamp_range( + calendar, enable_cftimeindex): + from datetime import datetime + + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + + units = 'days since 0001-01-01' + times = [datetime(1, 4, 1, h) for h in range(1, 5)] + noleap_time = cftime.date2num(times, units, calendar=calendar) + if enable_cftimeindex: + expected = cftime.num2date(noleap_time, units, calendar=calendar, + only_use_cftime_datetimes=True) + else: + expected = cftime.num2date(noleap_time, units, calendar=calendar) + expected_date_type = type(expected[0]) + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime( + noleap_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + assert all(isinstance(value, expected_date_type) for value in actual) + abs_diff = abs(actual - expected) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(coding.times._STANDARD_CALENDARS, [False, True])) +def test_decode_standard_calendar_single_element_inside_timestamp_range( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + units = 'days since 0001-01-01' + for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(mdim_time, units, - calendar=calendar) + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) assert actual.dtype == np.dtype('M8[ns]') - assert_array_equal(actual[:, 0], expected1) - assert_array_equal(actual[:, 1], expected2) - - @requires_cftime_or_netCDF4 - def test_decode_non_standard_calendar_fallback(self): - cftime = _import_cftime() - # ensure leap year doesn't matter - for year in [2010, 2011, 2012, 2013, 2014]: - for calendar in ['360_day', '366_day', 'all_leap']: - calendar = '360_day' - units = 'days since {0}-01-01'.format(year) - num_times = np.arange(100) - expected = cftime.num2date(num_times, units, calendar) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - actual = coding.times.decode_cf_datetime(num_times, units, - calendar=calendar) - assert len(w) == 1 - assert 'Unable to decode time axis' in \ - str(w[0].message) - - assert actual.dtype == np.dtype('O') - assert_array_equal(actual, expected) - - @requires_cftime_or_netCDF4 - def test_cf_datetime_nan(self): - for num_dates, units, expected_list in [ - ([np.nan], 'days since 2000-01-01', ['NaT']), - ([np.nan, 0], 'days since 2000-01-01', - ['NaT', '2000-01-01T00:00:00Z']), - ([np.nan, 0, 1], 'days since 2000-01-01', - ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z']), - ]: + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(_NON_STANDARD_CALENDARS, [False, True])) +def test_decode_non_standard_calendar_single_element_inside_timestamp_range( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + units = 'days since 0001-01-01' + for num_time in [735368, [735368], [[735368]]]: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + if enable_cftimeindex: + assert actual.dtype == np.dtype('O') + else: + assert actual.dtype == np.dtype('M8[ns]') + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(_NON_STANDARD_CALENDARS, [False, True])) +def test_decode_single_element_outside_timestamp_range( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + units = 'days since 0001-01-01' + for days in [1, 1470376]: + for num_time in [days, [days], [[days]]]: with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') - actual = coding.times.decode_cf_datetime(num_dates, units) - expected = np.array(expected_list, dtype='datetime64[ns]') - assert_array_equal(expected, actual) - - @requires_cftime_or_netCDF4 - def test_decoded_cf_datetime_array_2d(self): - # regression test for GH1229 - variable = Variable(('x', 'y'), np.array([[0, 1], [2, 3]]), - {'units': 'days since 2000-01-01'}) - result = coding.times.CFDatetimeCoder().decode(variable) - assert result.dtype == 'datetime64[ns]' - expected = pd.date_range('2000-01-01', periods=4).values.reshape(2, 2) - assert_array_equal(np.asarray(result), expected) - - def test_infer_datetime_units(self): - for dates, expected in [(pd.date_range('1900-01-01', periods=5), - 'days since 1900-01-01 00:00:00'), - (pd.date_range('1900-01-01 12:00:00', freq='H', - periods=2), - 'hours since 1900-01-01 12:00:00'), - (['1900-01-01', '1900-01-02', - '1900-01-02 00:00:01'], - 'seconds since 1900-01-01 00:00:00'), - (pd.to_datetime( - ['1900-01-01', '1900-01-02', 'NaT']), - 'days since 1900-01-01 00:00:00'), - (pd.to_datetime(['1900-01-01', - '1900-01-02T00:00:00.005']), - 'seconds since 1900-01-01 00:00:00'), - (pd.to_datetime(['NaT', '1900-01-01']), - 'days since 1900-01-01 00:00:00'), - (pd.to_datetime(['NaT']), - 'days since 1970-01-01 00:00:00'), - ]: - assert expected == coding.times.infer_datetime_units(dates) + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + expected = cftime.num2date(days, units, calendar) + assert isinstance(actual.item(), type(expected)) + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(coding.times._STANDARD_CALENDARS, [False, True])) +def test_decode_standard_calendar_multidim_time_inside_timestamp_range( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + + units = 'days since 0001-01-01' + times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') + times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') + noleap_time1 = cftime.date2num(times1.to_pydatetime(), + units, calendar=calendar) + noleap_time2 = cftime.date2num(times2.to_pydatetime(), + units, calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + expected1 = times1.values + expected2 = times2.values + + actual = coding.times.decode_cf_datetime( + mdim_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + assert actual.dtype == np.dtype('M8[ns]') + + abs_diff1 = abs(actual[:, 0] - expected1) + abs_diff2 = abs(actual[:, 1] - expected2) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff1 <= np.timedelta64(1, 's')).all() + assert (abs_diff2 <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(_NON_STANDARD_CALENDARS, [False, True])) +def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + + units = 'days since 0001-01-01' + times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') + times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') + noleap_time1 = cftime.date2num(times1.to_pydatetime(), + units, calendar=calendar) + noleap_time2 = cftime.date2num(times2.to_pydatetime(), + units, calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + if enable_cftimeindex: + expected1 = cftime.num2date(noleap_time1, units, calendar) + expected2 = cftime.num2date(noleap_time2, units, calendar) + expected_dtype = np.dtype('O') + else: + expected1 = times1.values + expected2 = times2.values + expected_dtype = np.dtype('M8[ns]') + + actual = coding.times.decode_cf_datetime( + mdim_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + + assert actual.dtype == expected_dtype + abs_diff1 = abs(actual[:, 0] - expected1) + abs_diff2 = abs(actual[:, 1] - expected2) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff1 <= np.timedelta64(1, 's')).all() + assert (abs_diff2 <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(_ALL_CALENDARS, [False, True])) +def test_decode_multidim_time_outside_timestamp_range( + calendar, enable_cftimeindex): + from datetime import datetime + + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + + units = 'days since 0001-01-01' + times1 = [datetime(1, 4, day) for day in range(1, 6)] + times2 = [datetime(1, 5, day) for day in range(1, 6)] + noleap_time1 = cftime.date2num(times1, units, calendar=calendar) + noleap_time2 = cftime.date2num(times2, units, calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + if enable_cftimeindex: + expected1 = cftime.num2date(noleap_time1, units, calendar, + only_use_cftime_datetimes=True) + expected2 = cftime.num2date(noleap_time2, units, calendar, + only_use_cftime_datetimes=True) + else: + expected1 = cftime.num2date(noleap_time1, units, calendar) + expected2 = cftime.num2date(noleap_time2, units, calendar) + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime( + mdim_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + + assert actual.dtype == np.dtype('O') + + abs_diff1 = abs(actual[:, 0] - expected1) + abs_diff2 = abs(actual[:, 1] - expected2) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff1 <= np.timedelta64(1, 's')).all() + assert (abs_diff2 <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(['360_day', 'all_leap', '366_day'], [False, True])) +def test_decode_non_standard_calendar_single_element_fallback( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + + units = 'days since 0001-01-01' + try: + dt = cftime.netcdftime.datetime(2001, 2, 29) + except AttributeError: + # Must be using standalone netcdftime library + dt = cftime.datetime(2001, 2, 29) + + num_time = cftime.date2num(dt, units, calendar) + if enable_cftimeindex: + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + else: + with pytest.warns(SerializationWarning, + match='Unable to decode time axis'): + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + + expected = np.asarray(cftime.num2date(num_time, units, calendar)) + assert actual.dtype == np.dtype('O') + assert expected == actual + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(['360_day'], [False, True])) +def test_decode_non_standard_calendar_fallback( + calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + + cftime = _import_cftime() + # ensure leap year doesn't matter + for year in [2010, 2011, 2012, 2013, 2014]: + units = 'days since {0}-01-01'.format(year) + num_times = np.arange(100) + expected = cftime.num2date(num_times, units, calendar) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + actual = coding.times.decode_cf_datetime( + num_times, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + if enable_cftimeindex: + assert len(w) == 0 + else: + assert len(w) == 1 + assert 'Unable to decode time axis' in str(w[0].message) + + assert actual.dtype == np.dtype('O') + assert_array_equal(actual, expected) - def test_cf_timedelta(self): - examples = [ - ('1D', 'days', np.int64(1)), - (['1D', '2D', '3D'], 'days', np.array([1, 2, 3], 'int64')), - ('1h', 'hours', np.int64(1)), - ('1ms', 'milliseconds', np.int64(1)), - ('1us', 'microseconds', np.int64(1)), - (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), - (['30m', '60m'], 'hours', [0.5, 1.0]), - (np.timedelta64('NaT', 'ns'), 'days', np.nan), - (['NaT', 'NaT'], 'days', [np.nan, np.nan]), - ] - - for timedeltas, units, numbers in examples: - timedeltas = pd.to_timedelta(timedeltas, box=False) - numbers = np.array(numbers) - - expected = numbers - actual, _ = coding.times.encode_cf_timedelta(timedeltas, units) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype - - if units is not None: - expected = timedeltas - actual = coding.times.decode_cf_timedelta(numbers, units) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype - - expected = np.timedelta64('NaT', 'ns') - actual = coding.times.decode_cf_timedelta(np.array(np.nan), 'days') - assert_array_equal(expected, actual) - def test_cf_timedelta_2d(self): - timedeltas = ['1D', '2D', '3D'] - units = 'days' - numbers = np.atleast_2d([1, 2, 3]) +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['num_dates', 'units', 'expected_list'], + [([np.nan], 'days since 2000-01-01', ['NaT']), + ([np.nan, 0], 'days since 2000-01-01', + ['NaT', '2000-01-01T00:00:00Z']), + ([np.nan, 0, 1], 'days since 2000-01-01', + ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z'])]) +def test_cf_datetime_nan(num_dates, units, expected_list): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN') + actual = coding.times.decode_cf_datetime(num_dates, units) + expected = np.array(expected_list, dtype='datetime64[ns]') + assert_array_equal(expected, actual) + + +@requires_cftime_or_netCDF4 +def test_decoded_cf_datetime_array_2d(): + # regression test for GH1229 + variable = Variable(('x', 'y'), np.array([[0, 1], [2, 3]]), + {'units': 'days since 2000-01-01'}) + result = coding.times.CFDatetimeCoder().decode(variable) + assert result.dtype == 'datetime64[ns]' + expected = pd.date_range('2000-01-01', periods=4).values.reshape(2, 2) + assert_array_equal(np.asarray(result), expected) + + +@pytest.mark.parametrize( + ['dates', 'expected'], + [(pd.date_range('1900-01-01', periods=5), + 'days since 1900-01-01 00:00:00'), + (pd.date_range('1900-01-01 12:00:00', freq='H', + periods=2), + 'hours since 1900-01-01 12:00:00'), + (pd.to_datetime( + ['1900-01-01', '1900-01-02', 'NaT']), + 'days since 1900-01-01 00:00:00'), + (pd.to_datetime(['1900-01-01', + '1900-01-02T00:00:00.005']), + 'seconds since 1900-01-01 00:00:00'), + (pd.to_datetime(['NaT', '1900-01-01']), + 'days since 1900-01-01 00:00:00'), + (pd.to_datetime(['NaT']), + 'days since 1970-01-01 00:00:00')]) +def test_infer_datetime_units(dates, expected): + assert expected == coding.times.infer_datetime_units(dates) + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +def test_infer_cftime_datetime_units(): + date_types = _all_cftime_date_types() + for date_type in date_types.values(): + for dates, expected in [ + ([date_type(1900, 1, 1), + date_type(1900, 1, 2)], + 'days since 1900-01-01 00:00:00.000000'), + ([date_type(1900, 1, 1, 12), + date_type(1900, 1, 1, 13)], + 'seconds since 1900-01-01 12:00:00.000000'), + ([date_type(1900, 1, 1), + date_type(1900, 1, 2), + date_type(1900, 1, 2, 0, 0, 1)], + 'seconds since 1900-01-01 00:00:00.000000'), + ([date_type(1900, 1, 1), + date_type(1900, 1, 2, 0, 0, 0, 5)], + 'days since 1900-01-01 00:00:00.000000')]: + assert expected == coding.times.infer_datetime_units(dates) - timedeltas = np.atleast_2d(pd.to_timedelta(timedeltas, box=False)) - expected = timedeltas +@pytest.mark.parametrize( + ['timedeltas', 'units', 'numbers'], + [('1D', 'days', np.int64(1)), + (['1D', '2D', '3D'], 'days', np.array([1, 2, 3], 'int64')), + ('1h', 'hours', np.int64(1)), + ('1ms', 'milliseconds', np.int64(1)), + ('1us', 'microseconds', np.int64(1)), + (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), + (['30m', '60m'], 'hours', [0.5, 1.0]), + (np.timedelta64('NaT', 'ns'), 'days', np.nan), + (['NaT', 'NaT'], 'days', [np.nan, np.nan])]) +def test_cf_timedelta(timedeltas, units, numbers): + timedeltas = pd.to_timedelta(timedeltas, box=False) + numbers = np.array(numbers) + + expected = numbers + actual, _ = coding.times.encode_cf_timedelta(timedeltas, units) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + if units is not None: + expected = timedeltas actual = coding.times.decode_cf_timedelta(numbers, units) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype - def test_infer_timedelta_units(self): - for deltas, expected in [ - (pd.to_timedelta(['1 day', '2 days']), 'days'), - (pd.to_timedelta(['1h', '1 day 1 hour']), 'hours'), - (pd.to_timedelta(['1m', '2m', np.nan]), 'minutes'), - (pd.to_timedelta(['1m3s', '1m4s']), 'seconds')]: - assert expected == coding.times.infer_timedelta_units(deltas) + expected = np.timedelta64('NaT', 'ns') + actual = coding.times.decode_cf_timedelta(np.array(np.nan), 'days') + assert_array_equal(expected, actual) + + +def test_cf_timedelta_2d(): + timedeltas = ['1D', '2D', '3D'] + units = 'days' + numbers = np.atleast_2d([1, 2, 3]) + + timedeltas = np.atleast_2d(pd.to_timedelta(timedeltas, box=False)) + expected = timedeltas + + actual = coding.times.decode_cf_timedelta(numbers, units) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + +@pytest.mark.parametrize( + ['deltas', 'expected'], + [(pd.to_timedelta(['1 day', '2 days']), 'days'), + (pd.to_timedelta(['1h', '1 day 1 hour']), 'hours'), + (pd.to_timedelta(['1m', '2m', np.nan]), 'minutes'), + (pd.to_timedelta(['1m3s', '1m4s']), 'seconds')]) +def test_infer_timedelta_units(deltas, expected): + assert expected == coding.times.infer_timedelta_units(deltas) + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize(['date_args', 'expected'], + [((1, 2, 3, 4, 5, 6), + '0001-02-03 04:05:06.000000'), + ((10, 2, 3, 4, 5, 6), + '0010-02-03 04:05:06.000000'), + ((100, 2, 3, 4, 5, 6), + '0100-02-03 04:05:06.000000'), + ((1000, 2, 3, 4, 5, 6), + '1000-02-03 04:05:06.000000')]) +def test_format_cftime_datetime(date_args, expected): + date_types = _all_cftime_date_types() + for date_type in date_types.values(): + result = coding.times.format_cftime_datetime(date_type(*date_args)) + assert result == expected + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_cftimeindex'], + product(_ALL_CALENDARS, [False, True])) +def test_decode_cf_enable_cftimeindex(calendar, enable_cftimeindex): + days = [1., 2., 3.] + da = DataArray(days, coords=[days], dims=['time'], name='test') + ds = da.to_dataset() + + for v in ['test', 'time']: + ds[v].attrs['units'] = 'days since 2001-01-01' + ds[v].attrs['calendar'] = calendar + + if (not has_cftime and enable_cftimeindex and + calendar not in coding.times._STANDARD_CALENDARS): + with pytest.raises(ValueError): + with set_options(enable_cftimeindex=enable_cftimeindex): + ds = decode_cf(ds) + else: + with set_options(enable_cftimeindex=enable_cftimeindex): + ds = decode_cf(ds) + + if (enable_cftimeindex and + calendar not in coding.times._STANDARD_CALENDARS): + assert ds.test.dtype == np.dtype('O') + else: + assert ds.test.dtype == np.dtype('M8[ns]') + + +@pytest.fixture(params=_ALL_CALENDARS) +def calendar(request): + return request.param + + +@pytest.fixture() +def times(calendar): + cftime = _import_cftime() + + return cftime.num2date( + np.arange(4), units='hours since 2000-01-01', calendar=calendar, + only_use_cftime_datetimes=True) + + +@pytest.fixture() +def data(times): + data = np.random.rand(2, 2, 4) + lons = np.linspace(0, 11, 2) + lats = np.linspace(0, 20, 2) + return DataArray(data, coords=[lons, lats, times], + dims=['lon', 'lat', 'time'], name='data') + + +@pytest.fixture() +def times_3d(times): + lons = np.linspace(0, 11, 2) + lats = np.linspace(0, 20, 2) + times_arr = np.random.choice(times, size=(2, 2, 4)) + return DataArray(times_arr, coords=[lons, lats, times], + dims=['lon', 'lat', 'time'], + name='data') + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains_cftime_datetimes_1d(data): + assert contains_cftime_datetimes(data.time) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains_cftime_datetimes_dask_1d(data): + assert contains_cftime_datetimes(data.time.chunk()) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains_cftime_datetimes_3d(times_3d): + assert contains_cftime_datetimes(times_3d) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains_cftime_datetimes_dask_3d(times_3d): + assert contains_cftime_datetimes(times_3d.chunk()) + + +@pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])]) +def test_contains_cftime_datetimes_non_cftimes(non_cftime_data): + assert not contains_cftime_datetimes(non_cftime_data) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])]) +def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data): + assert not contains_cftime_datetimes(non_cftime_data.chunk()) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 32ab3a634cb..3d2f8cbbf4f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -11,14 +11,14 @@ import xarray as xr from xarray import ( - DataArray, Dataset, IndexVariable, Variable, align, broadcast) -from xarray.coding.times import CFDatetimeCoder + DataArray, Dataset, IndexVariable, Variable, align, broadcast, set_options) +from xarray.coding.times import CFDatetimeCoder, _import_cftime from xarray.core.common import full_like from xarray.core.pycompat import OrderedDict, iteritems from xarray.tests import ( ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal, assert_identical, raises_regex, requires_bottleneck, requires_dask, - requires_scipy, source_ndarray, unittest) + requires_scipy, source_ndarray, unittest, requires_cftime) class TestDataArray(TestCase): @@ -2208,6 +2208,19 @@ def test_resample(self): with raises_regex(ValueError, 'index must be monotonic'): array[[2, 0, 1]].resample(time='1D') + @requires_cftime + def test_resample_cftimeindex(self): + cftime = _import_cftime() + times = cftime.num2date(np.arange(12), units='hours since 0001-01-01', + calendar='noleap') + with set_options(enable_cftimeindex=True): + array = DataArray(np.arange(12), [('time', times)]) + + with raises_regex(TypeError, + 'Only valid with DatetimeIndex, ' + 'TimedeltaIndex or PeriodIndex'): + array.resample(time='6H').mean() + def test_resample_first(self): times = pd.date_range('2000-01-01', freq='6H', periods=10) array = DataArray(np.arange(10), [('time', times)]) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 2a5eeb86bdd..4d32ea429ca 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -9,6 +9,7 @@ import xarray.plot as xplt from xarray import DataArray +from xarray.coding.times import _import_cftime from xarray.plot.plot import _infer_interval_breaks from xarray.plot.utils import ( _build_discrete_cmap, _color_palette, _determine_cmap_params, @@ -16,7 +17,7 @@ from . import ( TestCase, assert_array_equal, assert_equal, raises_regex, - requires_matplotlib, requires_seaborn) + requires_matplotlib, requires_seaborn, requires_cftime) # import mpl and change the backend before other mpl imports try: @@ -1488,3 +1489,24 @@ def test_plot_seaborn_no_import_warning(): with pytest.warns(None) as record: _color_palette('Blues', 4) assert len(record) == 0 + + +@requires_cftime +def test_plot_cftime_coordinate_error(): + cftime = _import_cftime() + time = cftime.num2date(np.arange(5), units='days since 0001-01-01', + calendar='noleap') + data = DataArray(np.arange(5), coords=[time], dims=['time']) + with raises_regex(TypeError, + 'requires coordinates to be numeric or dates'): + data.plot() + + +@requires_cftime +def test_plot_cftime_data_error(): + cftime = _import_cftime() + data = cftime.num2date(np.arange(5), units='days since 0001-01-01', + calendar='noleap') + data = DataArray(data, coords=[np.arange(5)], dims=['x']) + with raises_regex(NotImplementedError, 'cftime.datetime'): + data.plot() diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 3a76b6e8c92..0b3b0ee7dd6 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -4,10 +4,14 @@ import pandas as pd import pytest +from datetime import datetime +from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import duck_array_ops, utils +from xarray.core.options import set_options from xarray.core.pycompat import OrderedDict - -from . import TestCase, assert_array_equal, requires_dask +from .test_coding_times import _all_cftime_date_types +from . import (TestCase, requires_dask, assert_array_equal, + has_cftime_or_netCDF4, has_cftime) class TestAlias(TestCase): @@ -20,20 +24,51 @@ def new_method(): old_method() -class TestSafeCastToIndex(TestCase): - def test(self): - dates = pd.date_range('2000-01-01', periods=10) - x = np.arange(5) - td = x * np.timedelta64(1, 'D') - for expected, array in [ - (dates, dates.values), - (pd.Index(x, dtype=object), x.astype(object)), - (pd.Index(td), td), - (pd.Index(td, dtype=object), td.astype(object)), - ]: - actual = utils.safe_cast_to_index(array) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype +def test_safe_cast_to_index(): + dates = pd.date_range('2000-01-01', periods=10) + x = np.arange(5) + td = x * np.timedelta64(1, 'D') + for expected, array in [ + (dates, dates.values), + (pd.Index(x, dtype=object), x.astype(object)), + (pd.Index(td), td), + (pd.Index(td, dtype=object), td.astype(object)), + ]: + actual = utils.safe_cast_to_index(array) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize('enable_cftimeindex', [False, True]) +def test_safe_cast_to_index_cftimeindex(enable_cftimeindex): + date_types = _all_cftime_date_types() + for date_type in date_types.values(): + dates = [date_type(1, 1, day) for day in range(1, 20)] + + if enable_cftimeindex and has_cftime: + expected = CFTimeIndex(dates) + else: + expected = pd.Index(dates) + + with set_options(enable_cftimeindex=enable_cftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + assert isinstance(actual, type(expected)) + + +# Test that datetime.datetime objects are never used in a CFTimeIndex +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize('enable_cftimeindex', [False, True]) +def test_safe_cast_to_index_datetime_datetime(enable_cftimeindex): + dates = [datetime(1, 1, day) for day in range(1, 20)] + + expected = pd.Index(dates) + with set_options(enable_cftimeindex=enable_cftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) + assert_array_equal(expected, actual) + assert isinstance(actual, pd.Index) def test_multiindex_from_product_levels():