From e1e8223adf066c5d18a2198d373763d021360aab Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 5 Feb 2017 14:25:40 -0500 Subject: [PATCH 01/58] Start on implementing and testing NetCDFTimeIndex --- xarray/core/netcdftimeindex.py | 180 ++++++++++++++++ xarray/tests/test_netcdftimeindex.py | 301 +++++++++++++++++++++++++++ 2 files changed, 481 insertions(+) create mode 100644 xarray/core/netcdftimeindex.py create mode 100644 xarray/tests/test_netcdftimeindex.py diff --git a/xarray/core/netcdftimeindex.py b/xarray/core/netcdftimeindex.py new file mode 100644 index 00000000000..43824e82d0e --- /dev/null +++ b/xarray/core/netcdftimeindex.py @@ -0,0 +1,180 @@ +import re +from datetime import timedelta + +import numpy as np +import pandas as pd + +from pandas.lib import isscalar + + +def named(name, pattern): + return '(?P<' + name + '>' + pattern + ')' + + +def optional(x): + return '(?:' + x + ')?' + + +def trailing_optional(xs): + if not xs: + return '' + return xs[0] + optional(trailing_optional(xs[1:])) + + +def build_pattern(date_sep='\-', datetime_sep='T', time_sep='\:'): + pieces = [(None, 'year', '\d{4}'), + (date_sep, 'month', '\d{2}'), + (date_sep, 'day', '\d{2}'), + (datetime_sep, 'hour', '\d{2}'), + (time_sep, 'minute', '\d{2}'), + (time_sep, 'second', '\d{2}' + optional('\.\d+'))] + pattern_list = [] + for sep, name, sub_pattern in pieces: + pattern_list.append((sep if sep else '') + named(name, sub_pattern)) + # TODO: allow timezone offsets? + return '^' + trailing_optional(pattern_list) + '$' + + +def parse_iso8601(datetime_string): + basic_pattern = build_pattern(date_sep='', time_sep='') + extended_pattern = build_pattern() + patterns = [basic_pattern, extended_pattern] + for pattern in patterns: + match = re.match(pattern, datetime_string) + if match: + return match.groupdict() + raise ValueError('no ISO-8601 match for string: %s' % datetime_string) + + +def _parse_iso8601_with_reso(date_type, timestr): + default = date_type(1, 1, 1) + result = parse_iso8601(timestr) + replace = {} + + for attr in ['year', 'month', 'day', 'hour', 'minute', 'second']: + value = result.get(attr, None) + if value is not None: + replace[attr] = int(value) + resolution = attr + + return default.replace(**replace), resolution + + +def _parsed_string_to_bounds(date_type, resolution, parsed): + if resolution == 'year': + return (date_type(parsed.year, 1, 1), + date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1)) + if resolution == 'month': + if parsed.month == 12: + end = date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1) + else: + end = (date_type(parsed.year, parsed.month + 1, 1) - + timedelta(microseconds=1)) + return date_type(parsed.year, parsed.month, 1), end + if resolution == 'day': + start = date_type(parsed.year, parsed.month, parsed.day) + return start, start + timedelta(days=1, microseconds=-1) + if resolution == 'hour': + start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour) + return start, start + timedelta(hours=1, microseconds=-1) + if resolution == 'minute': + start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour, + parsed.minute) + return start, start + timedelta(minutes=1, microseconds=-1) + if resolution == 'second': + start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour, + parsed.minute, parsed.second) + return start, start + timedelta(seconds=1, microseconds=-1) + else: + raise KeyError + + +def get_date_field(datetimes, field): + return [getattr(date, field) for date in datetimes] + + +def _field_accessor(name, docstring=None): + def f(self): + return get_date_field(self._data, name) + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + +def get_date_type(self): + return type(self._data[0]) + + +class NetCDFTimeIndex(pd.Index): + def __new__(cls, data): + result = object.__new__(cls) + result._data = np.array(data) + return result + + year = _field_accessor('year', 'The year of the datetime') + month = _field_accessor('month', 'The month of the datetime') + day = _field_accessor('day', 'The days of the datetime') + hour = _field_accessor('hour', 'The hours of the datetime') + minute = _field_accessor('minute', 'The minutes of the datetime') + second = _field_accessor('second', 'The seconds of the datetime') + microsecond = _field_accessor('microsecond', + 'The microseconds of the datetime') + date_type = property(get_date_type) + + def _partial_date_slice(self, resolution, parsed, + use_lhs=True, use_rhs=True): + start, end = _parsed_string_to_bounds(self.date_type, resolution, + parsed) + lhs_mask = (self._data >= start) if use_lhs else True + rhs_mask = (self._data <= end) if use_rhs else True + return (lhs_mask & rhs_mask).nonzero()[0] + + def _get_string_slice(self, key, use_lhs=True, use_rhs=True): + parsed, resolution = _parse_iso8601_with_reso(self.date_type, key) + loc = self._partial_date_slice(resolution, parsed, use_lhs, use_rhs) + return loc + + def get_loc(self, key, method=None, tolerance=None): + if isinstance(key, pd.compat.string_types): + result = self._get_string_slice(key) + # Prevents problem with __contains__ if key corresponds to only + # the first element in index (if we leave things as a list, + # np.any([0]) is False). + # Also coerces things to scalar coords in xarray if possible, + # which is consistent with the behavior with a DatetimeIndex. + if len(result) == 1: + return result[0] + else: + return result + else: + return pd.Index.get_loc(self, key, method=method, + tolerance=tolerance) + + def _maybe_cast_slice_bound(self, label, side, kind): + if isinstance(label, pd.compat.string_types): + parsed, resolution = _parse_iso8601_with_reso(self.date_type, + label) + start, end = _parsed_string_to_bounds(self.date_type, resolution, + parsed) + if self.is_monotonic_decreasing and len(self): + return end if side == 'left' else start + return start if side == 'left' else end + else: + return label + + # TODO: Add ability to use integer range outside of iloc? + # e.g. series[1:5]. + def get_value(self, series, key): + if not isinstance(key, slice): + return series.iloc[self.get_loc(key)] + else: + return series.iloc[self.slice_indexer( + key.start, key.stop, key.step)] + + def __contains__(self, key): + try: + result = self.get_loc(key) + return isscalar(result) or type(result) == slice or np.any(result) + except (KeyError, TypeError, ValueError): + return False diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py new file mode 100644 index 00000000000..b2a0f85a731 --- /dev/null +++ b/xarray/tests/test_netcdftimeindex.py @@ -0,0 +1,301 @@ +import pandas as pd +import xarray as xr + +from . import TestCase, requires_netCDF4 + + +class TestISODateParser(TestCase): + def test_parse_iso8601(self): + from xarray.core.netcdftimeindex import parse_iso8601 + + def date_dict(year=None, month=None, day=None, + hour=None, minute=None, second=None): + return dict(year=year, month=month, day=day, hour=hour, + minute=minute, second=second) + + for string, expected in [ + ('1999', date_dict(year='1999')), + ('199901', date_dict(year='1999', month='01')), + ('1999-01', date_dict(year='1999', month='01')), + ('19990101', date_dict(year='1999', month='01', day='01')), + ('1999-01-01', date_dict(year='1999', month='01', day='01')), + ('19990101T12', date_dict(year='1999', month='01', day='01', + hour='12')), + ('1999-01-01T12', date_dict(year='1999', month='01', day='01', + hour='12')), + ('19990101T1234', date_dict(year='1999', month='01', day='01', + hour='12', minute='34')), + ('1999-01-01T12:34', date_dict(year='1999', month='01', day='01', + hour='12', minute='34')), + ('19990101T123456', date_dict(year='1999', month='01', day='01', + hour='12', minute='34', + second='56')), + ('1999-01-01T12:34:56', date_dict(year='1999', month='01', + day='01', hour='12', minute='34', + second='56')), + ('19990101T123456.78', date_dict(year='1999', month='01', + day='01', hour='12', minute='34', + second='56.78')), + ('1999-01-01T12:34:56.78', date_dict(year='1999', month='01', + day='01', hour='12', + minute='34', second='56.78')) + ]: + result = parse_iso8601(string) + self.assertEqual(result, expected) + + if '.' not in string: + with self.assertRaises(ValueError): + parse_iso8601(string + '3') + + +@requires_netCDF4 +class NetCDFTimeIndexTests(object): + feb_days = 28 + dec_days = 31 + + def set_date_type(self): + self.date_type = None + + def setUp(self): + from xarray.core.netcdftimeindex import NetCDFTimeIndex + self.set_date_type() + dates = [self.date_type(1, 1, 1), self.date_type(1, 2, 1), + self.date_type(2, 1, 1), self.date_type(2, 2, 1)] + self.index = NetCDFTimeIndex(dates) + self.da = xr.DataArray([1, 2, 3, 4], coords=[self.index], + dims=['time']) + self.series = pd.Series([1, 2, 3, 4], index=self.index) + self.df = pd.DataFrame([1, 2, 3, 4], index=self.index) + + def tearDown(self): + pass + + def test_netcdftimeindex_field_accessors(self): + for field, expected in [ + ('year', [1, 1, 2, 2]), + ('month', [1, 2, 1, 2]), + ('day', [1, 1, 1, 1]), + ('hour', [0, 0, 0, 0]), + ('minute', [0, 0, 0, 0]), + ('second', [0, 0, 0, 0]), + ('microsecond', [0, 0, 0, 0]) + ]: + result = getattr(self.index, field) + self.assertArrayEqual(result, expected) + + def test_parse_iso8601_with_reso(self): + from xarray.core.netcdftimeindex import _parse_iso8601_with_reso + + for string, (expected_date, expected_reso) in [ + ('1999', (self.date_type(1999, 1, 1), 'year')), + ('199902', (self.date_type(1999, 2, 1), 'month')), + ('19990202', (self.date_type(1999, 2, 2), 'day')), + ('19990202T01', (self.date_type(1999, 2, 2, 1), 'hour')), + ('19990202T0101', (self.date_type(1999, 2, 2, 1, 1), + 'minute')), + ('19990202T010156', (self.date_type(1999, 2, 2, 1, 1, 56), + 'second')) + ]: + result_date, result_reso = _parse_iso8601_with_reso( + self.date_type, string) + self.assertEqual(result_date, expected_date) + self.assertEqual(result_reso, expected_reso) + + def test_parsed_string_to_bounds(self): + from xarray.core.netcdftimeindex import _parsed_string_to_bounds + parsed = self.date_type(2, 2, 10, 6, 2, 8, 1) + + for resolution, (expected_start, expected_end) in [ + ('year', (self.date_type(2, 1, 1), + self.date_type(2, 12, self.dec_days, 23, 59, 59, + 999999))), + ('month', (self.date_type(2, 2, 1), + self.date_type(2, 2, self.feb_days, 23, 59, 59, + 999999))), + ('day', (self.date_type(2, 2, 10), + self.date_type(2, 2, 10, 23, 59, 59, 999999))), + ('hour', (self.date_type(2, 2, 10, 6), + self.date_type(2, 2, 10, 6, 59, 59, 999999))), + ('minute', (self.date_type(2, 2, 10, 6, 2), + self.date_type(2, 2, 10, 6, 2, 59, 999999))), + ('second', (self.date_type(2, 2, 10, 6, 2, 8), + self.date_type(2, 2, 10, 6, 2, 8, 999999))) + ]: + result_start, result_end = _parsed_string_to_bounds( + self.date_type, resolution, parsed) + self.assertEqual(result_start, expected_start) + self.assertEqual(result_end, expected_end) + + # Test special case for monthly resolution and parsed date in December + parsed = self.date_type(2, 12, 1) + expected_start = self.date_type(2, 12, 1) + expected_end = self.date_type(2, 12, self.dec_days, 23, 59, 59, 999999) + result_start, result_end = _parsed_string_to_bounds( + self.date_type, 'month', parsed) + self.assertEqual(result_start, expected_start) + self.assertEqual(result_end, expected_end) + + def test_get_loc(self): + result = self.index.get_loc('0001') + expected = [0, 1] + self.assertArrayEqual(result, expected) + + result = self.index.get_loc(self.date_type(1, 2, 1)) + expected = 1 + self.assertEqual(result, expected) + + result = self.index.get_loc('0001-02-01') + expected = 1 + self.assertEqual(result, expected) + + def test_get_slice_bound(self): + for kind in ['loc', 'getitem']: + result = self.index.get_slice_bound('0001', 'left', kind) + expected = 0 + self.assertEqual(result, expected) + + result = self.index.get_slice_bound('0001', 'right', kind) + expected = 2 + self.assertEqual(result, expected) + + result = self.index.get_slice_bound( + self.date_type(1, 3, 1), 'left', kind) + expected = 2 + self.assertEqual(result, expected) + + result = self.index.get_slice_bound( + self.date_type(1, 3, 1), 'right', kind) + expected = 2 + self.assertEqual(result, expected) + + def test_date_type_property(self): + self.assertEqual(self.index.date_type, self.date_type) + + def test_contains(self): + assert '0001' in self.index + assert '0003' not in self.index + assert self.date_type(1, 1, 1) in self.index + assert self.date_type(3, 1, 1) not in self.index + + def test_groupby(self): + result = self.da.groupby('time.month').sum('time') + expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=['month']) + self.assertDataArrayIdentical(result, expected) + + def test_sel(self): + expected = xr.DataArray([1, 2], coords=[self.index[:2]], dims=['time']) + for result in [ + self.da.sel(time='0001'), + self.da.sel(time=slice('0001-01-01', '0001-12-30')), + self.da.sel(time=slice(self.date_type(1, 1, 1), + self.date_type(1, 12, 30))), + self.da.sel(time=[self.date_type(1, 1, 1), + self.date_type(1, 2, 1)]), + self.da.sel(time=[True, True, False, False]) + ]: + self.assertDataArrayIdentical(result, expected) + + expected = xr.DataArray(1).assign_coords(time=self.index[0]) + for result in [ + self.da.sel(time=self.date_type(1, 1, 1)), + self.da.sel(time='0001-01-01') + ]: + self.assertDataArrayIdentical(result, expected) + + def test_isel(self): + expected = xr.DataArray(1).assign_coords(time=self.index[0]) + result = self.da.isel(time=0) + self.assertDataArrayIdentical(result, expected) + + expected = xr.DataArray([1, 2], coords=[self.index[:2]], dims=['time']) + result = self.da.isel(time=[0, 1]) + self.assertDataArrayIdentical(result, expected) + + def test_indexing_in_series(self): + # Note that integer-based indexing outside of iloc does not work + # using the simplified get_value method (for now). + expected = 1 + for result in [ + # self.series[0], + self.series[self.date_type(1, 1, 1)], + self.series['0001-01-01'], + self.series.loc['0001-01-01'], + self.series.loc[self.date_type(1, 1, 1)], + self.series.iloc[0] + ]: + self.assertEqual(result, expected) + self.assertEqual(result, expected) + + expected = pd.Series([1, 2], index=self.index[:2]) + for result in [ + # self.series[:2], + self.series['0001'], + self.series['0001-01-01':'0001-12-30'], + self.series[self.date_type(1, 1, 1):self.date_type(1, 12, 30)], + self.series.loc[self.date_type(1, 1, 1):self.date_type(1, 12, 30)], + self.series.loc['0001'], + self.series.loc['0001-01-01':'0001-12-30'], + self.series.loc[:'0001-12-30'], + self.series.iloc[:2] + ]: + pd.util.testing.assert_series_equal(result, expected) + + def test_indexing_in_dataframe(self): + expected = pd.Series([1], name=self.index[0]) + for result in [ + self.df.loc['0001-01-01'], + self.df.loc[self.date_type(1, 1, 1)], + self.df.iloc[0] + ]: + pd.util.testing.assert_series_equal(result, expected) + + expected = pd.DataFrame([1, 2], index=self.index[:2]) + for result in [ + self.df.loc['0001'], + self.df.loc['0001-01-01':'0001-12-30'], + self.df.loc[:'0001-12-30'], + self.df.loc[self.date_type(1, 1, 1):self.date_type(1, 12, 30)], + self.df.iloc[:2] + ]: + pd.util.testing.assert_frame_equal(result, expected) + + +class DatetimeJulianTestCase(NetCDFTimeIndexTests, TestCase): + def set_date_type(self): + from netcdftime import DatetimeJulian + self.date_type = DatetimeJulian + + +class DatetimeGregorianTestCase(NetCDFTimeIndexTests, TestCase): + def set_date_type(self): + from netcdftime import DatetimeGregorian + self.date_type = DatetimeGregorian + + +class DatetimeProlepticGregorianTestCase(NetCDFTimeIndexTests, TestCase): + def set_date_type(self): + from netcdftime import DatetimeProlepticGregorian + self.date_type = DatetimeProlepticGregorian + + +class DatetimeNoLeapTestCase(NetCDFTimeIndexTests, TestCase): + def set_date_type(self): + from netcdftime import DatetimeNoLeap + self.date_type = DatetimeNoLeap + + +class DatetimeAllLeapTestCase(NetCDFTimeIndexTests, TestCase): + feb_days = 29 + + def set_date_type(self): + from netcdftime import DatetimeAllLeap + self.date_type = DatetimeAllLeap + + +class Datetime360DayTestCase(NetCDFTimeIndexTests, TestCase): + feb_days = 30 + dec_days = 30 + + def set_date_type(self): + from netcdftime import Datetime360Day + self.date_type = Datetime360Day From 6496458fcd268091bc357ded73c2452d31aa7acb Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 6 Feb 2017 13:49:33 -0500 Subject: [PATCH 02/58] TST Move to using pytest fixtures to structure tests --- xarray/tests/test_netcdftimeindex.py | 644 ++++++++++++++------------- 1 file changed, 346 insertions(+), 298 deletions(-) diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index b2a0f85a731..469e09b259a 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -1,301 +1,349 @@ +import pytest + import pandas as pd import xarray as xr -from . import TestCase, requires_netCDF4 - - -class TestISODateParser(TestCase): - def test_parse_iso8601(self): - from xarray.core.netcdftimeindex import parse_iso8601 - - def date_dict(year=None, month=None, day=None, - hour=None, minute=None, second=None): - return dict(year=year, month=month, day=day, hour=hour, - minute=minute, second=second) - - for string, expected in [ - ('1999', date_dict(year='1999')), - ('199901', date_dict(year='1999', month='01')), - ('1999-01', date_dict(year='1999', month='01')), - ('19990101', date_dict(year='1999', month='01', day='01')), - ('1999-01-01', date_dict(year='1999', month='01', day='01')), - ('19990101T12', date_dict(year='1999', month='01', day='01', - hour='12')), - ('1999-01-01T12', date_dict(year='1999', month='01', day='01', - hour='12')), - ('19990101T1234', date_dict(year='1999', month='01', day='01', - hour='12', minute='34')), - ('1999-01-01T12:34', date_dict(year='1999', month='01', day='01', - hour='12', minute='34')), - ('19990101T123456', date_dict(year='1999', month='01', day='01', - hour='12', minute='34', - second='56')), - ('1999-01-01T12:34:56', date_dict(year='1999', month='01', - day='01', hour='12', minute='34', - second='56')), - ('19990101T123456.78', date_dict(year='1999', month='01', - day='01', hour='12', minute='34', - second='56.78')), - ('1999-01-01T12:34:56.78', date_dict(year='1999', month='01', - day='01', hour='12', - minute='34', second='56.78')) - ]: - result = parse_iso8601(string) - self.assertEqual(result, expected) - - if '.' not in string: - with self.assertRaises(ValueError): - parse_iso8601(string + '3') - - -@requires_netCDF4 -class NetCDFTimeIndexTests(object): - feb_days = 28 - dec_days = 31 - - def set_date_type(self): - self.date_type = None - - def setUp(self): - from xarray.core.netcdftimeindex import NetCDFTimeIndex - self.set_date_type() - dates = [self.date_type(1, 1, 1), self.date_type(1, 2, 1), - self.date_type(2, 1, 1), self.date_type(2, 2, 1)] - self.index = NetCDFTimeIndex(dates) - self.da = xr.DataArray([1, 2, 3, 4], coords=[self.index], - dims=['time']) - self.series = pd.Series([1, 2, 3, 4], index=self.index) - self.df = pd.DataFrame([1, 2, 3, 4], index=self.index) - - def tearDown(self): - pass - - def test_netcdftimeindex_field_accessors(self): - for field, expected in [ - ('year', [1, 1, 2, 2]), - ('month', [1, 2, 1, 2]), - ('day', [1, 1, 1, 1]), - ('hour', [0, 0, 0, 0]), - ('minute', [0, 0, 0, 0]), - ('second', [0, 0, 0, 0]), - ('microsecond', [0, 0, 0, 0]) - ]: - result = getattr(self.index, field) - self.assertArrayEqual(result, expected) - - def test_parse_iso8601_with_reso(self): - from xarray.core.netcdftimeindex import _parse_iso8601_with_reso - - for string, (expected_date, expected_reso) in [ - ('1999', (self.date_type(1999, 1, 1), 'year')), - ('199902', (self.date_type(1999, 2, 1), 'month')), - ('19990202', (self.date_type(1999, 2, 2), 'day')), - ('19990202T01', (self.date_type(1999, 2, 2, 1), 'hour')), - ('19990202T0101', (self.date_type(1999, 2, 2, 1, 1), - 'minute')), - ('19990202T010156', (self.date_type(1999, 2, 2, 1, 1, 56), - 'second')) - ]: - result_date, result_reso = _parse_iso8601_with_reso( - self.date_type, string) - self.assertEqual(result_date, expected_date) - self.assertEqual(result_reso, expected_reso) - - def test_parsed_string_to_bounds(self): - from xarray.core.netcdftimeindex import _parsed_string_to_bounds - parsed = self.date_type(2, 2, 10, 6, 2, 8, 1) - - for resolution, (expected_start, expected_end) in [ - ('year', (self.date_type(2, 1, 1), - self.date_type(2, 12, self.dec_days, 23, 59, 59, - 999999))), - ('month', (self.date_type(2, 2, 1), - self.date_type(2, 2, self.feb_days, 23, 59, 59, - 999999))), - ('day', (self.date_type(2, 2, 10), - self.date_type(2, 2, 10, 23, 59, 59, 999999))), - ('hour', (self.date_type(2, 2, 10, 6), - self.date_type(2, 2, 10, 6, 59, 59, 999999))), - ('minute', (self.date_type(2, 2, 10, 6, 2), - self.date_type(2, 2, 10, 6, 2, 59, 999999))), - ('second', (self.date_type(2, 2, 10, 6, 2, 8), - self.date_type(2, 2, 10, 6, 2, 8, 999999))) - ]: - result_start, result_end = _parsed_string_to_bounds( - self.date_type, resolution, parsed) - self.assertEqual(result_start, expected_start) - self.assertEqual(result_end, expected_end) - - # Test special case for monthly resolution and parsed date in December - parsed = self.date_type(2, 12, 1) - expected_start = self.date_type(2, 12, 1) - expected_end = self.date_type(2, 12, self.dec_days, 23, 59, 59, 999999) - result_start, result_end = _parsed_string_to_bounds( - self.date_type, 'month', parsed) - self.assertEqual(result_start, expected_start) - self.assertEqual(result_end, expected_end) - - def test_get_loc(self): - result = self.index.get_loc('0001') - expected = [0, 1] - self.assertArrayEqual(result, expected) - - result = self.index.get_loc(self.date_type(1, 2, 1)) - expected = 1 - self.assertEqual(result, expected) - - result = self.index.get_loc('0001-02-01') - expected = 1 - self.assertEqual(result, expected) - - def test_get_slice_bound(self): - for kind in ['loc', 'getitem']: - result = self.index.get_slice_bound('0001', 'left', kind) - expected = 0 - self.assertEqual(result, expected) - - result = self.index.get_slice_bound('0001', 'right', kind) - expected = 2 - self.assertEqual(result, expected) - - result = self.index.get_slice_bound( - self.date_type(1, 3, 1), 'left', kind) - expected = 2 - self.assertEqual(result, expected) - - result = self.index.get_slice_bound( - self.date_type(1, 3, 1), 'right', kind) - expected = 2 - self.assertEqual(result, expected) - - def test_date_type_property(self): - self.assertEqual(self.index.date_type, self.date_type) - - def test_contains(self): - assert '0001' in self.index - assert '0003' not in self.index - assert self.date_type(1, 1, 1) in self.index - assert self.date_type(3, 1, 1) not in self.index - - def test_groupby(self): - result = self.da.groupby('time.month').sum('time') - expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=['month']) - self.assertDataArrayIdentical(result, expected) - - def test_sel(self): - expected = xr.DataArray([1, 2], coords=[self.index[:2]], dims=['time']) - for result in [ - self.da.sel(time='0001'), - self.da.sel(time=slice('0001-01-01', '0001-12-30')), - self.da.sel(time=slice(self.date_type(1, 1, 1), - self.date_type(1, 12, 30))), - self.da.sel(time=[self.date_type(1, 1, 1), - self.date_type(1, 2, 1)]), - self.da.sel(time=[True, True, False, False]) - ]: - self.assertDataArrayIdentical(result, expected) - - expected = xr.DataArray(1).assign_coords(time=self.index[0]) - for result in [ - self.da.sel(time=self.date_type(1, 1, 1)), - self.da.sel(time='0001-01-01') - ]: - self.assertDataArrayIdentical(result, expected) - - def test_isel(self): - expected = xr.DataArray(1).assign_coords(time=self.index[0]) - result = self.da.isel(time=0) - self.assertDataArrayIdentical(result, expected) - - expected = xr.DataArray([1, 2], coords=[self.index[:2]], dims=['time']) - result = self.da.isel(time=[0, 1]) - self.assertDataArrayIdentical(result, expected) - - def test_indexing_in_series(self): - # Note that integer-based indexing outside of iloc does not work - # using the simplified get_value method (for now). - expected = 1 - for result in [ - # self.series[0], - self.series[self.date_type(1, 1, 1)], - self.series['0001-01-01'], - self.series.loc['0001-01-01'], - self.series.loc[self.date_type(1, 1, 1)], - self.series.iloc[0] - ]: - self.assertEqual(result, expected) - self.assertEqual(result, expected) - - expected = pd.Series([1, 2], index=self.index[:2]) - for result in [ - # self.series[:2], - self.series['0001'], - self.series['0001-01-01':'0001-12-30'], - self.series[self.date_type(1, 1, 1):self.date_type(1, 12, 30)], - self.series.loc[self.date_type(1, 1, 1):self.date_type(1, 12, 30)], - self.series.loc['0001'], - self.series.loc['0001-01-01':'0001-12-30'], - self.series.loc[:'0001-12-30'], - self.series.iloc[:2] - ]: - pd.util.testing.assert_series_equal(result, expected) - - def test_indexing_in_dataframe(self): - expected = pd.Series([1], name=self.index[0]) - for result in [ - self.df.loc['0001-01-01'], - self.df.loc[self.date_type(1, 1, 1)], - self.df.iloc[0] - ]: - pd.util.testing.assert_series_equal(result, expected) - - expected = pd.DataFrame([1, 2], index=self.index[:2]) - for result in [ - self.df.loc['0001'], - self.df.loc['0001-01-01':'0001-12-30'], - self.df.loc[:'0001-12-30'], - self.df.loc[self.date_type(1, 1, 1):self.date_type(1, 12, 30)], - self.df.iloc[:2] - ]: - pd.util.testing.assert_frame_equal(result, expected) - - -class DatetimeJulianTestCase(NetCDFTimeIndexTests, TestCase): - def set_date_type(self): - from netcdftime import DatetimeJulian - self.date_type = DatetimeJulian - - -class DatetimeGregorianTestCase(NetCDFTimeIndexTests, TestCase): - def set_date_type(self): - from netcdftime import DatetimeGregorian - self.date_type = DatetimeGregorian - - -class DatetimeProlepticGregorianTestCase(NetCDFTimeIndexTests, TestCase): - def set_date_type(self): - from netcdftime import DatetimeProlepticGregorian - self.date_type = DatetimeProlepticGregorian - - -class DatetimeNoLeapTestCase(NetCDFTimeIndexTests, TestCase): - def set_date_type(self): - from netcdftime import DatetimeNoLeap - self.date_type = DatetimeNoLeap - - -class DatetimeAllLeapTestCase(NetCDFTimeIndexTests, TestCase): - feb_days = 29 - - def set_date_type(self): - from netcdftime import DatetimeAllLeap - self.date_type = DatetimeAllLeap - - -class Datetime360DayTestCase(NetCDFTimeIndexTests, TestCase): - feb_days = 30 - dec_days = 30 - - def set_date_type(self): - from netcdftime import Datetime360Day - self.date_type = Datetime360Day +from xarray.tests import assert_array_equal, assert_identical + +# Putting this at the module level for now, though technically we +# don't need netCDF4 to test the string parser. +pytest.importorskip('netCDF4') + + +def date_dict(year=None, month=None, day=None, + hour=None, minute=None, second=None): + return dict(year=year, month=month, day=day, hour=hour, + minute=minute, second=second) + + +@pytest.mark.parametrize(('string', 'expected'), [ + ('1999', date_dict(year='1999')), + ('199901', date_dict(year='1999', month='01')), + ('1999-01', date_dict(year='1999', month='01')), + ('19990101', date_dict(year='1999', month='01', day='01')), + ('1999-01-01', date_dict(year='1999', month='01', day='01')), + ('19990101T12', date_dict(year='1999', month='01', day='01', hour='12')), + ('1999-01-01T12', date_dict(year='1999', month='01', day='01', hour='12')), + ('19990101T1234', date_dict( + year='1999', month='01', day='01', hour='12', minute='34')), + ('1999-01-01T12:34', date_dict( + year='1999', month='01', day='01', hour='12', minute='34')), + ('19990101T123456', date_dict( + year='1999', month='01', day='01', hour='12', minute='34', + second='56')), + ('1999-01-01T12:34:56', date_dict( + year='1999', month='01', day='01', hour='12', minute='34', + second='56')), + ('19990101T123456.78', date_dict( + year='1999', month='01', day='01', hour='12', minute='34', + second='56.78')), + ('1999-01-01T12:34:56.78', date_dict( + year='1999', month='01', day='01', hour='12', minute='34', + second='56.78')) +], ids=['year', 'month', 'month-dash', 'day', 'day-dash', 'hour', 'hour-dash', + 'minute', 'minute-dash', 'second', 'second-dash', 'second-dec', + 'second-dec-dash']) +def test_parse_iso8601(string, expected): + from xarray.core.netcdftimeindex import parse_iso8601 + + result = parse_iso8601(string) + assert result == expected + + if '.' not in string: + with pytest.raises(ValueError): + parse_iso8601(string + '3') + + +def netcdftime_date_types(): + from netcdftime import ( + DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, + DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day) + return [DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, + DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day] + + +@pytest.fixture(params=netcdftime_date_types()) +def date_type(request): + return request.param + + +@pytest.fixture +def index(date_type): + from xarray.core.netcdftimeindex import NetCDFTimeIndex + + dates = [date_type(1, 1, 1), date_type(1, 2, 1), + date_type(2, 1, 1), date_type(2, 2, 1)] + return NetCDFTimeIndex(dates) + + +@pytest.fixture +def da(index): + return xr.DataArray([1, 2, 3, 4], coords=[index], + dims=['time']) + + +@pytest.fixture +def series(index): + return pd.Series([1, 2, 3, 4], index=index) + + +@pytest.fixture +def df(index): + return pd.DataFrame([1, 2, 3, 4], index=index) + + +@pytest.fixture +def feb_days(date_type): + from netcdftime import DatetimeAllLeap, Datetime360Day + if date_type == DatetimeAllLeap: + return 29 + elif date_type == Datetime360Day: + return 30 + else: + return 28 + + +@pytest.fixture +def dec_days(date_type): + from netcdftime import Datetime360Day + if date_type == Datetime360Day: + return 30 + else: + return 31 + + +@pytest.mark.parametrize(('field', 'expected'), [ + ('year', [1, 1, 2, 2]), + ('month', [1, 2, 1, 2]), + ('day', [1, 1, 1, 1]), + ('hour', [0, 0, 0, 0]), + ('minute', [0, 0, 0, 0]), + ('second', [0, 0, 0, 0]), + ('microsecond', [0, 0, 0, 0]) +], ids=['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond']) +def test_netcdftimeindex_field_accessors(index, field, expected): + result = getattr(index, field) + assert_array_equal(result, expected) + + +@pytest.mark.parametrize(('string', 'date_args', 'reso'), [ + ('1999', (1999, 1, 1), 'year'), + ('199902', (1999, 2, 1), 'month'), + ('19990202', (1999, 2, 2), 'day'), + ('19990202T01', (1999, 2, 2, 1), 'hour'), + ('19990202T0101', (1999, 2, 2, 1, 1), 'minute'), + ('19990202T010156', (1999, 2, 2, 1, 1, 56), 'second')], + ids=['year', 'month', 'day', 'hour', 'minute', 'second'] +) +def test_parse_iso8601_with_reso(date_type, string, date_args, reso): + from xarray.core.netcdftimeindex import _parse_iso8601_with_reso + expected_date = date_type(*date_args) + expected_reso = reso + result_date, result_reso = _parse_iso8601_with_reso(date_type, string) + assert result_date == expected_date + assert result_reso == expected_reso + + +def test_parse_string_to_bounds_year(date_type, dec_days): + from xarray.core.netcdftimeindex import _parsed_string_to_bounds + parsed = date_type(2, 2, 10, 6, 2, 8, 1) + expected_start = date_type(2, 1, 1) + expected_end = date_type(2, 12, dec_days, 23, 59, 59, 999999) + result_start, result_end = _parsed_string_to_bounds( + date_type, 'year', parsed) + assert result_start == expected_start + assert result_end == expected_end + + +def test_parse_string_to_bounds_month_feb(date_type, feb_days): + from xarray.core.netcdftimeindex import _parsed_string_to_bounds + parsed = date_type(2, 2, 10, 6, 2, 8, 1) + expected_start = date_type(2, 2, 1) + expected_end = date_type(2, 2, feb_days, 23, 59, 59, 999999) + result_start, result_end = _parsed_string_to_bounds( + date_type, 'month', parsed) + assert result_start == expected_start + assert result_end == expected_end + + +def test_parse_string_to_bounds_month_dec(date_type, dec_days): + from xarray.core.netcdftimeindex import _parsed_string_to_bounds + parsed = date_type(2, 12, 1) + expected_start = date_type(2, 12, 1) + expected_end = date_type(2, 12, dec_days, 23, 59, 59, 999999) + result_start, result_end = _parsed_string_to_bounds( + date_type, 'month', parsed) + assert result_start == expected_start + assert result_end == expected_end + + +@pytest.mark.parametrize(('reso', 'ex_start_args', 'ex_end_args'), [ + ('day', (2, 2, 10), (2, 2, 10, 23, 59, 59, 999999)), + ('hour', (2, 2, 10, 6), (2, 2, 10, 6, 59, 59, 999999)), + ('minute', (2, 2, 10, 6, 2), (2, 2, 10, 6, 2, 59, 999999)), + ('second', (2, 2, 10, 6, 2, 8), (2, 2, 10, 6, 2, 8, 999999)) +], ids=['day', 'hour', 'minute', 'second']) +def test_parsed_string_to_bounds_sub_monthly(date_type, reso, + ex_start_args, ex_end_args): + from xarray.core.netcdftimeindex import _parsed_string_to_bounds + parsed = date_type(2, 2, 10, 6, 2, 8, 1) + expected_start = date_type(*ex_start_args) + expected_end = date_type(*ex_end_args) + + result_start, result_end = _parsed_string_to_bounds( + date_type, reso, parsed) + assert result_start == expected_start + assert result_end == expected_end + + +def test_get_loc(date_type, index): + result = index.get_loc('0001') + expected = [0, 1] + assert_array_equal(result, expected) + + result = index.get_loc(date_type(1, 2, 1)) + expected = 1 + assert result == expected + + result = index.get_loc('0001-02-01') + expected = 1 + assert result == expected + + +@pytest.mark.parametrize('kind', ['loc', 'getitem']) +def test_get_slice_bound(date_type, index, kind): + result = index.get_slice_bound('0001', 'left', kind) + expected = 0 + assert result == expected + + result = index.get_slice_bound('0001', 'right', kind) + expected = 2 + assert result == expected + + result = index.get_slice_bound( + date_type(1, 3, 1), 'left', kind) + expected = 2 + assert result == expected + + result = index.get_slice_bound( + date_type(1, 3, 1), 'right', kind) + expected = 2 + assert result == expected + + +def test_date_type_property(date_type, index): + assert index.date_type == date_type + + +def test_contains(date_type, index): + assert '0001-01-01' in index + assert '0001' in index + assert '0003' not in index + assert date_type(1, 1, 1) in index + assert date_type(3, 1, 1) not in index + + +def test_groupby(da): + result = da.groupby('time.month').sum('time') + expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=['month']) + assert_identical(result, expected) + + +@pytest.mark.parametrize('sel_arg', [ + '0001', + slice('0001-01-01', '0001-12-30'), + [True, True, False, False] +], ids=['string', 'string-slice', 'bool-list']) +def test_sel_string_or_list(da, index, sel_arg): + expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) + result = da.sel(time=sel_arg) + assert_identical(result, expected) + + +def test_sel_date_slice_or_list(da, index, date_type): + expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) + result = da.sel(time=slice(date_type(1, 1, 1), date_type(1, 12, 30))) + assert_identical(result, expected) + + result = da.sel(time=[date_type(1, 1, 1), date_type(1, 2, 1)]) + assert_identical(result, expected) + + +def test_sel_date_scalar(da, date_type, index): + expected = xr.DataArray(1).assign_coords(time=index[0]) + result = da.sel(time=date_type(1, 1, 1)) + assert_identical(result, expected) + + result = da.sel(time='0001-01-01') + assert_identical(result, expected) + + +def test_isel(da, index): + expected = xr.DataArray(1).assign_coords(time=index[0]) + result = da.isel(time=0) + assert_identical(result, expected) + + expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) + result = da.isel(time=[0, 1]) + assert_identical(result, expected) + + +@pytest.fixture +def scalar_args(date_type): + return ['0001-01-01', date_type(1, 1, 1)] + + +@pytest.fixture +def range_args(date_type): + return ['0001', slice('0001-01-01', '0001-12-30'), + slice(None, '0001-12-30'), + slice(date_type(1, 1, 1), date_type(1, 12, 30)), + slice(None, date_type(1, 12, 30))] + + +def test_indexing_in_series_getitem(series, index, scalar_args, range_args): + for arg in scalar_args: + assert series[arg] == 1 + + expected = pd.Series([1, 2], index=index[:2]) + for arg in range_args: + pd.util.testing.assert_series_equal(series[arg], expected) + + +def test_indexing_in_series_loc(series, index, scalar_args, range_args): + for arg in scalar_args: + assert series.loc[arg] == 1 + + expected = pd.Series([1, 2], index=index[:2]) + for arg in range_args: + pd.util.testing.assert_series_equal(series.loc[arg], expected) + + +def test_indexing_in_series_iloc(series, index): + expected = 1 + assert series.iloc[0] == expected + + expected = pd.Series([1, 2], index=index[:2]) + pd.util.testing.assert_series_equal(series.iloc[:2], expected) + + +def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): + expected = pd.Series([1], name=index[0]) + for arg in scalar_args: + result = df.loc[arg] + pd.util.testing.assert_series_equal(result, expected) + + expected = pd.DataFrame([1, 2], index=index[:2]) + for arg in range_args: + result = df.loc[arg] + pd.util.testing.assert_frame_equal(result, expected) + + +def test_indexing_in_dataframe_iloc(df, index): + expected = pd.Series([1], name=index[0]) + result = df.iloc[0] + pd.util.testing.assert_series_equal(result, expected) + + expected = pd.DataFrame([1, 2], index=index[:2]) + result = df.iloc[:2] + pd.util.testing.assert_frame_equal(result, expected) From 675b2f7cddd0a21f68cbc6ae7e4069462f798496 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 10 Feb 2017 10:17:52 -0500 Subject: [PATCH 03/58] Address initial review comments --- xarray/__init__.py | 2 +- xarray/backends/api.py | 5 +- xarray/backends/common.py | 2 +- xarray/backends/netCDF4_.py | 2 +- xarray/backends/netcdf3.py | 5 +- xarray/conventions/__init__.py | 0 .../{conventions.py => conventions/coding.py} | 12 +- .../{core => conventions}/netcdftimeindex.py | 74 ++++++++---- xarray/convert.py | 2 +- xarray/core/dataset.py | 4 +- .../{test_conventions.py => test_coding.py} | 111 +++++++++--------- xarray/tests/test_netcdftimeindex.py | 68 ++++++----- 12 files changed, 166 insertions(+), 121 deletions(-) create mode 100644 xarray/conventions/__init__.py rename xarray/{conventions.py => conventions/coding.py} (99%) rename xarray/{core => conventions}/netcdftimeindex.py (70%) rename xarray/tests/{test_conventions.py => test_coding.py} (88%) diff --git a/xarray/__init__.py b/xarray/__init__.py index c12ffe35a1f..c8a5a443ce9 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -14,7 +14,7 @@ from .backends.api import (open_dataset, open_dataarray, open_mfdataset, save_mfdataset) -from .conventions import decode_cf +from .conventions.coding import decode_cf try: from .version import version as __version__ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 770aed952f0..4b0d0014599 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -10,8 +10,9 @@ import numpy as np -from .. import backends, conventions +from .. import backends from .common import ArrayWriter, GLOBAL_LOCK +from ..conventions import coding from ..core import indexing from ..core.combine import auto_combine from ..core.utils import close_on_error, is_remote_uri @@ -217,7 +218,7 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, cache = chunks is None def maybe_decode_store(store, lock=False): - ds = conventions.decode_cf( + ds = coding.decode_cf( store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index e7cbd0bd9ae..b8149061e46 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -8,7 +8,7 @@ from collections import Mapping from distutils.version import StrictVersion -from ..conventions import cf_encoder +from ..conventions.coding import cf_encoder from ..core.utils import FrozenOrderedDict from ..core.pycompat import iteritems, dask_array_type diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index f72a26affaf..1b37db687b7 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -7,7 +7,7 @@ import numpy as np from .. import Variable -from ..conventions import pop_to +from ..conventions.coding import pop_to from ..core import indexing from ..core.utils import (FrozenOrderedDict, NDArrayMixin, close_on_error, is_remote_uri) diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index 033cd81e1d4..872d8baf635 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -5,7 +5,8 @@ import numpy as np -from .. import conventions, Variable +from .. import Variable +from ..conventions import coding from ..core import ops from ..core.pycompat import basestring, unicode_type, OrderedDict @@ -56,7 +57,7 @@ def coerce_nc3_dtype(arr): def maybe_convert_to_char_array(data, dims): if data.dtype.kind == 'S' and data.dtype.itemsize > 1: - data = conventions.string_to_char(data) + data = coding.string_to_char(data) dims = dims + ('string%s' % data.shape[-1],) return data, dims diff --git a/xarray/conventions/__init__.py b/xarray/conventions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/xarray/conventions.py b/xarray/conventions/coding.py similarity index 99% rename from xarray/conventions.py rename to xarray/conventions/coding.py index 178ee8442db..514e9de71b4 100644 --- a/xarray/conventions.py +++ b/xarray/conventions/coding.py @@ -11,10 +11,10 @@ from collections import defaultdict from pandas.tslib import OutOfBoundsDatetime -from .core import indexing, ops, utils -from .core.formatting import format_timestamp, first_n_items, last_item -from .core.variable import as_variable, Variable -from .core.pycompat import iteritems, OrderedDict, PY3, basestring +from ..core import indexing, ops, utils +from ..core.formatting import format_timestamp, first_n_items, last_item +from ..core.variable import as_variable, Variable +from ..core.pycompat import iteritems, OrderedDict, PY3, basestring # standard calendars recognized by netcdftime @@ -929,8 +929,8 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, ------- decoded : Dataset """ - from .core.dataset import Dataset - from .backends.common import AbstractDataStore + from ..core.dataset import Dataset + from ..backends.common import AbstractDataStore if isinstance(obj, Dataset): vars = obj._variables diff --git a/xarray/core/netcdftimeindex.py b/xarray/conventions/netcdftimeindex.py similarity index 70% rename from xarray/core/netcdftimeindex.py rename to xarray/conventions/netcdftimeindex.py index 43824e82d0e..aee6c3a3b35 100644 --- a/xarray/core/netcdftimeindex.py +++ b/xarray/conventions/netcdftimeindex.py @@ -4,7 +4,8 @@ import numpy as np import pandas as pd -from pandas.lib import isscalar +from xarray.core import pycompat +from xarray.core.utils import is_scalar def named(name, pattern): @@ -35,10 +36,12 @@ def build_pattern(date_sep='\-', datetime_sep='T', time_sep='\:'): return '^' + trailing_optional(pattern_list) + '$' +basic_pattern = build_pattern(date_sep='', time_sep='') +extended_pattern = build_pattern() +patterns = [basic_pattern, extended_pattern] + + def parse_iso8601(datetime_string): - basic_pattern = build_pattern(date_sep='', time_sep='') - extended_pattern = build_pattern() - patterns = [basic_pattern, extended_pattern] for pattern in patterns: match = re.match(pattern, datetime_string) if match: @@ -54,6 +57,10 @@ def _parse_iso8601_with_reso(date_type, timestr): for attr in ['year', 'month', 'day', 'hour', 'minute', 'second']: value = result.get(attr, None) if value is not None: + # Note ISO8601 conventions allow for fractional seconds; casting + # to an int means all seconds values get rounded down to the + # nearest integer. TODO: Consider adding support for sub-second + # resolution? replace[attr] = int(value) resolution = attr @@ -61,6 +68,11 @@ def _parse_iso8601_with_reso(date_type, timestr): def _parsed_string_to_bounds(date_type, resolution, parsed): + """Generalization of + pandas.tseries.index.DatetimeIndex._parsed_string_to_bounds + for use with non-standard calendars and netcdftime._netcdftime.datetime + objects. + """ if resolution == 'year': return (date_type(parsed.year, 1, 1), date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1)) @@ -90,10 +102,12 @@ def _parsed_string_to_bounds(date_type, resolution, parsed): def get_date_field(datetimes, field): + """Adapted from pandas.tslib.get_date_field""" return [getattr(date, field) for date in datetimes] def _field_accessor(name, docstring=None): + """Adapted from pandas.tseries.index._field_accessor""" def f(self): return get_date_field(self._data, name) @@ -106,9 +120,23 @@ def get_date_type(self): return type(self._data[0]) +def assert_all_same_netcdftime_datetimes(data): + from netcdftime._netcdftime import datetime + + if not isinstance(data[0], datetime): + raise TypeError( + 'NetCDFTimeIndex requires netcdftime._netcdftime.datetime' + ' objects.') + if not all(isinstance(value, type(data[0])) for value in data): + raise TypeError( + 'NetCDFTimeIndex requires using netcdftime._netcdftime.datetime' + ' objects of all the same type.') + + class NetCDFTimeIndex(pd.Index): def __new__(cls, data): result = object.__new__(cls) + assert_all_same_netcdftime_datetimes(data) result._data = np.array(data) return result @@ -122,37 +150,33 @@ def __new__(cls, data): 'The microseconds of the datetime') date_type = property(get_date_type) - def _partial_date_slice(self, resolution, parsed, - use_lhs=True, use_rhs=True): + def _partial_date_slice(self, resolution, parsed): + """Adapted from + pandas.tseries.index.DatetimeIndex._partial_date_slice""" start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) - lhs_mask = (self._data >= start) if use_lhs else True - rhs_mask = (self._data <= end) if use_rhs else True + lhs_mask = (self._data >= start) + rhs_mask = (self._data <= end) return (lhs_mask & rhs_mask).nonzero()[0] - def _get_string_slice(self, key, use_lhs=True, use_rhs=True): + def _get_string_slice(self, key): + """Adapted from pandas.tseries.index.DatetimeIndex._get_string_slice""" parsed, resolution = _parse_iso8601_with_reso(self.date_type, key) - loc = self._partial_date_slice(resolution, parsed, use_lhs, use_rhs) + loc = self._partial_date_slice(resolution, parsed) return loc def get_loc(self, key, method=None, tolerance=None): - if isinstance(key, pd.compat.string_types): - result = self._get_string_slice(key) - # Prevents problem with __contains__ if key corresponds to only - # the first element in index (if we leave things as a list, - # np.any([0]) is False). - # Also coerces things to scalar coords in xarray if possible, - # which is consistent with the behavior with a DatetimeIndex. - if len(result) == 1: - return result[0] - else: - return result + """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" + if isinstance(key, pycompat.basestring): + return self._get_string_slice(key) else: return pd.Index.get_loc(self, key, method=method, tolerance=tolerance) def _maybe_cast_slice_bound(self, label, side, kind): - if isinstance(label, pd.compat.string_types): + """Adapted from + pandas.tseries.index.DatetimeIndex._maybe_cast_slice_bound""" + if isinstance(label, pycompat.basestring): parsed, resolution = _parse_iso8601_with_reso(self.date_type, label) start, end = _parsed_string_to_bounds(self.date_type, resolution, @@ -166,6 +190,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): # TODO: Add ability to use integer range outside of iloc? # e.g. series[1:5]. def get_value(self, series, key): + """Adapted from pandas.tseries.index.DatetimeIndex.get_value""" if not isinstance(key, slice): return series.iloc[self.get_loc(key)] else: @@ -173,8 +198,11 @@ def get_value(self, series, key): key.start, key.stop, key.step)] def __contains__(self, key): + """Adapted from + pandas.tseries.base.DatetimeIndexOpsMixin.__contains__""" try: result = self.get_loc(key) - return isscalar(result) or type(result) == slice or np.any(result) + return (is_scalar(result) or type(result) == slice or + (isinstance(result, np.ndarray) and result.size)) except (KeyError, TypeError, ValueError): return False diff --git a/xarray/convert.py b/xarray/convert.py index 26822ea97c8..1bef9eb1477 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -7,7 +7,7 @@ import numpy as np from .core.dataarray import DataArray -from .conventions import ( +from .conventions.coding import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) ignored_attrs = set(['name', 'tileIndex']) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 746916bf00c..0468d3d786a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -16,8 +16,8 @@ from . import indexing from . import alignment from . import formatting -from .. import conventions from .alignment import align +from ..conventions import coding from .coordinates import DatasetCoordinates, LevelCoordinatesSource, Indexes from .common import ImplementsDatasetReduce, BaseDataObject from .merge import (dataset_update_method, dataset_merge_method, @@ -875,7 +875,7 @@ def dump_to_store(self, store, encoder=None, sync=True, encoding=None, """Store dataset contents to a backends.*DataStore object.""" if encoding is None: encoding = {} - variables, attrs = conventions.encode_dataset_coordinates(self) + variables, attrs = coding.encode_dataset_coordinates(self) check_encoding = set() for k, enc in encoding.items(): diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_coding.py similarity index 88% rename from xarray/tests/test_conventions.py rename to xarray/tests/test_coding.py index 6c9d791660d..e4824948a84 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_coding.py @@ -6,19 +6,20 @@ import pandas as pd import warnings -from xarray import conventions, Variable, Dataset, open_dataset +from xarray import Variable, Dataset, open_dataset +from xarray.conventions import coding from xarray.core import utils, indexing from . import TestCase, requires_netCDF4, unittest from .test_backends import CFEncodedDataTest from xarray.core.pycompat import iteritems from xarray.backends.memory import InMemoryDataStore from xarray.backends.common import WritableCFDataStore -from xarray.conventions import decode_cf +from xarray.conventions.coding import decode_cf class TestMaskedAndScaledArray(TestCase): def test(self): - x = conventions.MaskedAndScaledArray(np.arange(3), fill_value=0) + x = coding.MaskedAndScaledArray(np.arange(3), fill_value=0) self.assertEqual(x.dtype, np.dtype('float')) self.assertEqual(x.shape, (3,)) self.assertEqual(x.size, 3) @@ -26,31 +27,31 @@ def test(self): self.assertEqual(len(x), 3) self.assertArrayEqual([np.nan, 1, 2], x) - x = conventions.MaskedAndScaledArray(np.arange(3), add_offset=1) + x = coding.MaskedAndScaledArray(np.arange(3), add_offset=1) self.assertArrayEqual(np.arange(3) + 1, x) - x = conventions.MaskedAndScaledArray(np.arange(3), scale_factor=2) + x = coding.MaskedAndScaledArray(np.arange(3), scale_factor=2) self.assertArrayEqual(2 * np.arange(3), x) - x = conventions.MaskedAndScaledArray(np.array([-99, -1, 0, 1, 2]), + x = coding.MaskedAndScaledArray(np.array([-99, -1, 0, 1, 2]), -99, 0.01, 1) expected = np.array([np.nan, 0.99, 1, 1.01, 1.02]) self.assertArrayEqual(expected, x) def test_0d(self): - x = conventions.MaskedAndScaledArray(np.array(0), fill_value=0) + x = coding.MaskedAndScaledArray(np.array(0), fill_value=0) self.assertTrue(np.isnan(x)) self.assertTrue(np.isnan(x[...])) - x = conventions.MaskedAndScaledArray(np.array(0), fill_value=10) + x = coding.MaskedAndScaledArray(np.array(0), fill_value=10) self.assertEqual(0, x[...]) def test_multiple_fill_value(self): - x = conventions.MaskedAndScaledArray( + x = coding.MaskedAndScaledArray( np.arange(4), fill_value=np.array([0, 1])) self.assertArrayEqual([np.nan, np.nan, 2, 3], x) - x = conventions.MaskedAndScaledArray( + x = coding.MaskedAndScaledArray( np.array(0), fill_value=np.array([0, 1])) self.assertTrue(np.isnan(x)) self.assertTrue(np.isnan(x[...])) @@ -59,7 +60,7 @@ def test_multiple_fill_value(self): class TestCharToStringArray(TestCase): def test_wrapper_class(self): array = np.array(list('abc'), dtype='S') - actual = conventions.CharToStringArray(array) + actual = coding.CharToStringArray(array) expected = np.array('abc', dtype='S') self.assertEqual(actual.dtype, expected.dtype) self.assertEqual(actual.shape, expected.shape) @@ -73,7 +74,7 @@ def test_wrapper_class(self): self.assertEqual(str(actual), 'abc') array = np.array([list('abc'), list('cdf')], dtype='S') - actual = conventions.CharToStringArray(array) + actual = coding.CharToStringArray(array) expected = np.array(['abc', 'cdf'], dtype='S') self.assertEqual(actual.dtype, expected.dtype) self.assertEqual(actual.shape, expected.shape) @@ -88,30 +89,30 @@ def test_wrapper_class(self): def test_char_to_string(self): array = np.array([['a', 'b', 'c'], ['d', 'e', 'f']]) expected = np.array(['abc', 'def']) - actual = conventions.char_to_string(array) + actual = coding.char_to_string(array) self.assertArrayEqual(actual, expected) expected = np.array(['ad', 'be', 'cf']) - actual = conventions.char_to_string(array.T) # non-contiguous + actual = coding.char_to_string(array.T) # non-contiguous self.assertArrayEqual(actual, expected) def test_string_to_char(self): array = np.array([['ab', 'cd'], ['ef', 'gh']]) expected = np.array([[['a', 'b'], ['c', 'd']], [['e', 'f'], ['g', 'h']]]) - actual = conventions.string_to_char(array) + actual = coding.string_to_char(array) self.assertArrayEqual(actual, expected) expected = np.array([[['a', 'b'], ['e', 'f']], [['c', 'd'], ['g', 'h']]]) - actual = conventions.string_to_char(array.T) + actual = coding.string_to_char(array.T) self.assertArrayEqual(actual, expected) class TestBoolTypeArray(TestCase): def test_booltype_array(self): x = np.array([1, 0, 1, 1, 0], dtype='i1') - bx = conventions.BoolTypeArray(x) + bx = coding.BoolTypeArray(x) self.assertEqual(bx.dtype, np.bool) self.assertArrayEqual(bx, np.array([True, False, True, True, False], dtype=np.bool)) @@ -159,7 +160,7 @@ def test_cf_datetime(self): with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = conventions.decode_cf_datetime(num_dates, units, + actual = coding.decode_cf_datetime(num_dates, units, calendar) if (isinstance(actual, np.ndarray) and np.issubdtype(actual.dtype, np.datetime64)): @@ -172,7 +173,7 @@ def test_cf_datetime(self): else: actual_cmp = actual self.assertArrayEqual(expected, actual_cmp) - encoded, _, _ = conventions.encode_cf_datetime(actual, units, + encoded, _, _ = coding.encode_cf_datetime(actual, units, calendar) if '1-1-1' not in units: # pandas parses this date very strangely, so the original @@ -185,7 +186,7 @@ def test_cf_datetime(self): # verify that wrapping with a pandas.Index works # note that it *does not* currently work to even put # non-datetime64 compatible dates into a pandas.Index :( - encoded, _, _ = conventions.encode_cf_datetime( + encoded, _, _ = coding.encode_cf_datetime( pd.Index(actual), units, calendar) self.assertArrayEqual(num_dates, np.around(encoded, 1)) @@ -203,7 +204,7 @@ def test_decode_cf_datetime_overflow(self): expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) for i, day in enumerate(days): - result = conventions.decode_cf_datetime(day, units) + result = coding.decode_cf_datetime(day, units) self.assertEqual(result, expected[i]) @requires_netCDF4 @@ -213,7 +214,7 @@ def test_decode_cf_datetime_transition_to_invalid(self): ds = Dataset(coords={'time' : [0, 266 * 365]}) units = 'days since 2000-01-01 00:00:00' ds.time.attrs = dict(units=units) - ds_decoded = conventions.decode_cf(ds) + ds_decoded = coding.decode_cf(ds) expected = [datetime(2000, 1, 1, 0, 0), datetime(2265, 10, 28, 0, 0)] @@ -221,26 +222,26 @@ def test_decode_cf_datetime_transition_to_invalid(self): self.assertArrayEqual(ds_decoded.time.values, expected) def test_decoded_cf_datetime_array(self): - actual = conventions.DecodedCFDatetimeArray( + actual = coding.DecodedCFDatetimeArray( np.array([0, 1, 2]), 'days since 1900-01-01', 'standard') expected = pd.date_range('1900-01-01', periods=3).values self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) self.assertArrayEqual(actual, expected) # default calendar - actual = conventions.DecodedCFDatetimeArray( + actual = coding.DecodedCFDatetimeArray( np.array([0, 1, 2]), 'days since 1900-01-01') self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) self.assertArrayEqual(actual, expected) def test_slice_decoded_cf_datetime_array(self): - actual = conventions.DecodedCFDatetimeArray( + actual = coding.DecodedCFDatetimeArray( np.array([0, 1, 2]), 'days since 1900-01-01', 'standard') expected = pd.date_range('1900-01-01', periods=3).values self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) self.assertArrayEqual(actual[slice(0, 2)], expected[slice(0, 2)]) - actual = conventions.DecodedCFDatetimeArray( + actual = coding.DecodedCFDatetimeArray( np.array([0, 1, 2]), 'days since 1900-01-01', 'standard') expected = pd.date_range('1900-01-01', periods=3).values self.assertEqual(actual.dtype, np.dtype('datetime64[ns]')) @@ -251,7 +252,7 @@ def test_decode_cf_datetime_non_standard_units(self): # netCDFs from madis.noaa.gov use this format for their time units # they cannot be parsed by netcdftime, but pd.Timestamp works units = 'hours since 1-1-1970' - actual = conventions.decode_cf_datetime(np.arange(100), units) + actual = coding.decode_cf_datetime(np.arange(100), units) self.assertArrayEqual(actual, expected) def test_decode_cf_with_conflicting_fill_missing_value(self): @@ -260,20 +261,20 @@ def test_decode_cf_with_conflicting_fill_missing_value(self): 'missing_value': 0, '_FillValue': 1}) self.assertRaisesRegexp(ValueError, "_FillValue and missing_value", - lambda: conventions.decode_cf_variable(var)) + lambda: coding.decode_cf_variable(var)) var = Variable(['t'], np.arange(10), {'units': 'foobar', 'missing_value': np.nan, '_FillValue': np.nan}) - var = conventions.decode_cf_variable(var) + var = coding.decode_cf_variable(var) self.assertIsNotNone(var) var = Variable(['t'], np.arange(10), {'units': 'foobar', 'missing_value': np.float32(np.nan), '_FillValue': np.float32(np.nan)}) - var = conventions.decode_cf_variable(var) + var = coding.decode_cf_variable(var) self.assertIsNotNone(var) @requires_netCDF4 @@ -285,7 +286,7 @@ def test_decode_cf_datetime_non_iso_strings(self): (np.arange(100), 'hours since 2000-1-1 0'), (np.arange(100), 'hours since 2000-01-01 0:00')] for num_dates, units in cases: - actual = conventions.decode_cf_datetime(num_dates, units) + actual = coding.decode_cf_datetime(num_dates, units) self.assertArrayEqual(actual, expected) @requires_netCDF4 @@ -302,7 +303,7 @@ def test_decode_non_standard_calendar(self): expected = times.values with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = conventions.decode_cf_datetime(noleap_time, units, + actual = coding.decode_cf_datetime(noleap_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) abs_diff = abs(actual - expected) @@ -320,7 +321,7 @@ def test_decode_non_standard_calendar_single_element(self): with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = conventions.decode_cf_datetime(num_time, units, + actual = coding.decode_cf_datetime(num_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) @@ -333,7 +334,7 @@ def test_decode_non_standard_calendar_single_element_fallback(self): for calendar in ['360_day', 'all_leap', '366_day']: num_time = nc4.date2num(dt, units, calendar) with self.assertWarns('Unable to decode time axis'): - actual = conventions.decode_cf_datetime(num_time, units, + actual = coding.decode_cf_datetime(num_time, units, calendar=calendar) expected = np.asarray(nc4.num2date(num_time, units, calendar)) print(num_time, calendar, actual, expected) @@ -360,7 +361,7 @@ def test_decode_non_standard_calendar_multidim_time(self): expected2 = times2.values with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = conventions.decode_cf_datetime(mdim_time, units, + actual = coding.decode_cf_datetime(mdim_time, units, calendar=calendar) self.assertEqual(actual.dtype, np.dtype('M8[ns]')) self.assertArrayEqual(actual[:, 0], expected1) @@ -379,7 +380,7 @@ def test_decode_non_standard_calendar_fallback(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - actual = conventions.decode_cf_datetime(num_times, units, + actual = coding.decode_cf_datetime(num_times, units, calendar=calendar) self.assertEqual(len(w), 1) self.assertIn('Unable to decode time axis', @@ -399,14 +400,14 @@ def test_cf_datetime_nan(self): ]: with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'All-NaN') - actual = conventions.decode_cf_datetime(num_dates, units) + actual = coding.decode_cf_datetime(num_dates, units) expected = np.array(expected_list, dtype='datetime64[ns]') self.assertArrayEqual(expected, actual) @requires_netCDF4 def test_decoded_cf_datetime_array_2d(self): # regression test for GH1229 - array = conventions.DecodedCFDatetimeArray(np.array([[0, 1], [2, 3]]), + array = coding.DecodedCFDatetimeArray(np.array([[0, 1], [2, 3]]), 'days since 2000-01-01') assert array.dtype == 'datetime64[ns]' expected = pd.date_range('2000-01-01', periods=4).values.reshape(2, 2) @@ -431,7 +432,7 @@ def test_infer_datetime_units(self): (pd.to_datetime(['NaT']), 'days since 1970-01-01 00:00:00'), ]: - self.assertEqual(expected, conventions.infer_datetime_units(dates)) + self.assertEqual(expected, coding.infer_datetime_units(dates)) def test_cf_timedelta(self): examples = [ @@ -453,18 +454,18 @@ def test_cf_timedelta(self): numbers = np.array(numbers) expected = numbers - actual, _ = conventions.encode_cf_timedelta(timedeltas, units) + actual, _ = coding.encode_cf_timedelta(timedeltas, units) self.assertArrayEqual(expected, actual) self.assertEqual(expected.dtype, actual.dtype) if units is not None: expected = timedeltas - actual = conventions.decode_cf_timedelta(numbers, units) + actual = coding.decode_cf_timedelta(numbers, units) self.assertArrayEqual(expected, actual) self.assertEqual(expected.dtype, actual.dtype) expected = np.timedelta64('NaT', 'ns') - actual = conventions.decode_cf_timedelta(np.array(np.nan), 'days') + actual = coding.decode_cf_timedelta(np.array(np.nan), 'days') self.assertArrayEqual(expected, actual) def test_cf_timedelta_2d(self): @@ -473,7 +474,7 @@ def test_cf_timedelta_2d(self): timedeltas = np.atleast_2d(pd.to_timedelta(timedeltas, box=False)) expected = timedeltas - actual = conventions.decode_cf_timedelta(numbers, units) + actual = coding.decode_cf_timedelta(numbers, units) self.assertArrayEqual(expected, actual) self.assertEqual(expected.dtype, actual.dtype) @@ -483,7 +484,7 @@ def test_infer_timedelta_units(self): (pd.to_timedelta(['1h', '1 day 1 hour']), 'hours'), (pd.to_timedelta(['1m', '2m', np.nan]), 'minutes'), (pd.to_timedelta(['1m3s', '1m4s']), 'seconds')]: - self.assertEqual(expected, conventions.infer_timedelta_units(deltas)) + self.assertEqual(expected, coding.infer_timedelta_units(deltas)) def test_invalid_units_raises_eagerly(self): ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})}) @@ -506,7 +507,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self): # this should not throw a warning (GH1111) with warnings.catch_warnings(): warnings.filterwarnings('error') - conventions.DecodedCFDatetimeArray(np.asarray([722624]), + coding.DecodedCFDatetimeArray(np.asarray([722624]), "days since 0001-01-01") @@ -514,7 +515,7 @@ class TestNativeEndiannessArray(TestCase): def test(self): x = np.arange(5, dtype='>i8') expected = np.arange(5, dtype='int64') - a = conventions.NativeEndiannessArray(x) + a = coding.NativeEndiannessArray(x) assert a.dtype == expected.dtype assert a.dtype == expected[:].dtype self.assertArrayEqual(a, expected) @@ -532,13 +533,13 @@ def test_incompatible_attributes(self): ] for var in invalid_vars: with self.assertRaises(ValueError): - conventions.encode_cf_variable(var) + coding.encode_cf_variable(var) def test_missing_fillvalue(self): v = Variable(['x'], np.array([np.nan, 1, 2, 3])) v.encoding = {'dtype': 'int16'} with self.assertWarns('floating point data as an integer'): - conventions.encode_cf_variable(v) + coding.encode_cf_variable(v) @requires_netCDF4 @@ -552,26 +553,26 @@ def test_dataset(self): expected = Dataset({'foo': ('t', [0, 0, 0], {'units': 'bar'})}, {'t': pd.date_range('2000-01-01', periods=3), 'y': ('t', [5.0, 10.0, np.nan])}) - actual = conventions.decode_cf(original) + actual = coding.decode_cf(original) self.assertDatasetIdentical(expected, actual) def test_invalid_coordinates(self): # regression test for GH308 original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})}) - actual = conventions.decode_cf(original) + actual = coding.decode_cf(original) self.assertDatasetIdentical(original, actual) def test_decode_coordinates(self): # regression test for GH610 original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'x'}), 'x': ('t', [4, 5])}) - actual = conventions.decode_cf(original) + actual = coding.decode_cf(original) self.assertEqual(actual.foo.encoding['coordinates'], 'x') def test_0d_int32_encoding(self): original = Variable((), np.int32(0), encoding={'dtype': 'int64'}) expected = Variable((), np.int64(0)) - actual = conventions.maybe_encode_dtype(original) + actual = coding.maybe_encode_dtype(original) self.assertDatasetIdentical(expected, actual) def test_decode_cf_with_multiple_missing_values(self): @@ -579,7 +580,7 @@ def test_decode_cf_with_multiple_missing_values(self): {'missing_value': np.array([0, 1])}) expected = Variable(['t'], [np.nan, np.nan, 2], {}) with warnings.catch_warnings(record=True) as w: - actual = conventions.decode_cf_variable(original) + actual = coding.decode_cf_variable(original) self.assertDatasetIdentical(expected, actual) self.assertIn('variable has multiple fill', str(w[0].message)) @@ -595,8 +596,8 @@ def test_decode_cf_with_drop_variables(self): 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}), 'y': ('t', [5, 10, np.nan]) }) - actual = conventions.decode_cf(original, drop_variables=("x",)) - actual2 = conventions.decode_cf(original, drop_variables="x") + actual = coding.decode_cf(original, drop_variables=("x",)) + actual2 = coding.decode_cf(original, drop_variables="x") self.assertDatasetIdentical(expected, actual) self.assertDatasetIdentical(expected, actual2) diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 469e09b259a..875af16f997 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -3,6 +3,9 @@ import pandas as pd import xarray as xr +from xarray.conventions.netcdftimeindex import ( + parse_iso8601, NetCDFTimeIndex, assert_all_same_netcdftime_datetimes, + _parsed_string_to_bounds, _parse_iso8601_with_reso) from xarray.tests import assert_array_equal, assert_identical # Putting this at the module level for now, though technically we @@ -15,8 +18,7 @@ def date_dict(year=None, month=None, day=None, return dict(year=year, month=month, day=day, hour=hour, minute=minute, second=second) - -@pytest.mark.parametrize(('string', 'expected'), [ +ISO8601_STRING_TESTS = [ ('1999', date_dict(year='1999')), ('199901', date_dict(year='1999', month='01')), ('1999-01', date_dict(year='1999', month='01')), @@ -40,12 +42,16 @@ def date_dict(year=None, month=None, day=None, ('1999-01-01T12:34:56.78', date_dict( year='1999', month='01', day='01', hour='12', minute='34', second='56.78')) -], ids=['year', 'month', 'month-dash', 'day', 'day-dash', 'hour', 'hour-dash', - 'minute', 'minute-dash', 'second', 'second-dash', 'second-dec', - 'second-dec-dash']) -def test_parse_iso8601(string, expected): - from xarray.core.netcdftimeindex import parse_iso8601 +] +ISO8601_STRING_TEST_IDS = [ + 'year', 'month', 'month-dash', 'day', 'day-dash', 'hour', 'hour-dash', + 'minute', 'minute-dash', 'second', 'second-dash', 'second-dec', + 'second-dec-dash'] + +@pytest.mark.parametrize(('string', 'expected'), ISO8601_STRING_TESTS, + ids=ISO8601_STRING_TEST_IDS) +def test_parse_iso8601(string, expected): result = parse_iso8601(string) assert result == expected @@ -69,8 +75,6 @@ def date_type(request): @pytest.fixture def index(date_type): - from xarray.core.netcdftimeindex import NetCDFTimeIndex - dates = [date_type(1, 1, 1), date_type(1, 2, 1), date_type(2, 1, 1), date_type(2, 2, 1)] return NetCDFTimeIndex(dates) @@ -95,9 +99,9 @@ def df(index): @pytest.fixture def feb_days(date_type): from netcdftime import DatetimeAllLeap, Datetime360Day - if date_type == DatetimeAllLeap: + if date_type is DatetimeAllLeap: return 29 - elif date_type == Datetime360Day: + elif date_type is Datetime360Day: return 30 else: return 28 @@ -106,12 +110,29 @@ def feb_days(date_type): @pytest.fixture def dec_days(date_type): from netcdftime import Datetime360Day - if date_type == Datetime360Day: + if date_type is Datetime360Day: return 30 else: return 31 +def test_assert_all_netcdftime_datetimes(date_type, index): + from netcdftime import DatetimeNoLeap, DatetimeAllLeap + + if date_type is DatetimeNoLeap: + mixed_date_types = [date_type(1, 1, 1), DatetimeAllLeap(1, 2, 1)] + else: + mixed_date_types = [date_type(1, 1, 1), DatetimeNoLeap(1, 2, 1)] + with pytest.raises(TypeError): + assert_all_same_netcdftime_datetimes(mixed_date_types) + + with pytest.raises(TypeError): + assert_all_same_netcdftime_datetimes([1, date_type(1, 1, 1)]) + + assert_all_same_netcdftime_datetimes([date_type(1, 1, 1), + date_type(1, 2, 1)]) + + @pytest.mark.parametrize(('field', 'expected'), [ ('year', [1, 1, 2, 2]), ('month', [1, 2, 1, 2]), @@ -136,7 +157,6 @@ def test_netcdftimeindex_field_accessors(index, field, expected): ids=['year', 'month', 'day', 'hour', 'minute', 'second'] ) def test_parse_iso8601_with_reso(date_type, string, date_args, reso): - from xarray.core.netcdftimeindex import _parse_iso8601_with_reso expected_date = date_type(*date_args) expected_reso = reso result_date, result_reso = _parse_iso8601_with_reso(date_type, string) @@ -145,7 +165,6 @@ def test_parse_iso8601_with_reso(date_type, string, date_args, reso): def test_parse_string_to_bounds_year(date_type, dec_days): - from xarray.core.netcdftimeindex import _parsed_string_to_bounds parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 1, 1) expected_end = date_type(2, 12, dec_days, 23, 59, 59, 999999) @@ -156,7 +175,6 @@ def test_parse_string_to_bounds_year(date_type, dec_days): def test_parse_string_to_bounds_month_feb(date_type, feb_days): - from xarray.core.netcdftimeindex import _parsed_string_to_bounds parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 2, 1) expected_end = date_type(2, 2, feb_days, 23, 59, 59, 999999) @@ -167,7 +185,6 @@ def test_parse_string_to_bounds_month_feb(date_type, feb_days): def test_parse_string_to_bounds_month_dec(date_type, dec_days): - from xarray.core.netcdftimeindex import _parsed_string_to_bounds parsed = date_type(2, 12, 1) expected_start = date_type(2, 12, 1) expected_end = date_type(2, 12, dec_days, 23, 59, 59, 999999) @@ -185,7 +202,6 @@ def test_parse_string_to_bounds_month_dec(date_type, dec_days): ], ids=['day', 'hour', 'minute', 'second']) def test_parsed_string_to_bounds_sub_monthly(date_type, reso, ex_start_args, ex_end_args): - from xarray.core.netcdftimeindex import _parsed_string_to_bounds parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(*ex_start_args) expected_end = date_type(*ex_end_args) @@ -274,9 +290,6 @@ def test_sel_date_scalar(da, date_type, index): result = da.sel(time=date_type(1, 1, 1)) assert_identical(result, expected) - result = da.sel(time='0001-01-01') - assert_identical(result, expected) - def test_isel(da, index): expected = xr.DataArray(1).assign_coords(time=index[0]) @@ -290,7 +303,7 @@ def test_isel(da, index): @pytest.fixture def scalar_args(date_type): - return ['0001-01-01', date_type(1, 1, 1)] + return [date_type(1, 1, 1)] @pytest.fixture @@ -307,7 +320,7 @@ def test_indexing_in_series_getitem(series, index, scalar_args, range_args): expected = pd.Series([1, 2], index=index[:2]) for arg in range_args: - pd.util.testing.assert_series_equal(series[arg], expected) + assert series[arg].equals(expected) def test_indexing_in_series_loc(series, index, scalar_args, range_args): @@ -316,7 +329,7 @@ def test_indexing_in_series_loc(series, index, scalar_args, range_args): expected = pd.Series([1, 2], index=index[:2]) for arg in range_args: - pd.util.testing.assert_series_equal(series.loc[arg], expected) + assert series.loc[arg].equals(expected) def test_indexing_in_series_iloc(series, index): @@ -324,26 +337,27 @@ def test_indexing_in_series_iloc(series, index): assert series.iloc[0] == expected expected = pd.Series([1, 2], index=index[:2]) - pd.util.testing.assert_series_equal(series.iloc[:2], expected) + assert series.iloc[:2].equals(expected) def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): expected = pd.Series([1], name=index[0]) for arg in scalar_args: result = df.loc[arg] - pd.util.testing.assert_series_equal(result, expected) + assert result.equals(expected) expected = pd.DataFrame([1, 2], index=index[:2]) for arg in range_args: result = df.loc[arg] - pd.util.testing.assert_frame_equal(result, expected) + assert result.equals(expected) def test_indexing_in_dataframe_iloc(df, index): expected = pd.Series([1], name=index[0]) result = df.iloc[0] + assert result.equals(expected) pd.util.testing.assert_series_equal(result, expected) expected = pd.DataFrame([1, 2], index=index[:2]) result = df.iloc[:2] - pd.util.testing.assert_frame_equal(result, expected) + assert result.equals(expected) From 7beddc1849450b82dd05d71608964a5ab17437c3 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 11 Feb 2017 17:01:57 -0500 Subject: [PATCH 04/58] Address second round of review comments --- xarray/conventions/netcdftimeindex.py | 42 +++-- xarray/tests/test_netcdftimeindex.py | 234 ++++++++++++++++++++------ 2 files changed, 209 insertions(+), 67 deletions(-) diff --git a/xarray/conventions/netcdftimeindex.py b/xarray/conventions/netcdftimeindex.py index aee6c3a3b35..6c9fe300e9a 100644 --- a/xarray/conventions/netcdftimeindex.py +++ b/xarray/conventions/netcdftimeindex.py @@ -28,7 +28,7 @@ def build_pattern(date_sep='\-', datetime_sep='T', time_sep='\:'): (date_sep, 'day', '\d{2}'), (datetime_sep, 'hour', '\d{2}'), (time_sep, 'minute', '\d{2}'), - (time_sep, 'second', '\d{2}' + optional('\.\d+'))] + (time_sep, 'second', '\d{2}')] pattern_list = [] for sep, name, sub_pattern in pieces: pattern_list.append((sep if sep else '') + named(name, sub_pattern)) @@ -36,13 +36,13 @@ def build_pattern(date_sep='\-', datetime_sep='T', time_sep='\:'): return '^' + trailing_optional(pattern_list) + '$' -basic_pattern = build_pattern(date_sep='', time_sep='') -extended_pattern = build_pattern() -patterns = [basic_pattern, extended_pattern] +_BASIC_PATTERN = build_pattern(date_sep='', time_sep='') +_EXTENDED_PATTERN = build_pattern() +_PATTERNS = [_BASIC_PATTERN, _EXTENDED_PATTERN] def parse_iso8601(datetime_string): - for pattern in patterns: + for pattern in _PATTERNS: match = re.match(pattern, datetime_string) if match: return match.groupdict() @@ -57,10 +57,8 @@ def _parse_iso8601_with_reso(date_type, timestr): for attr in ['year', 'month', 'day', 'hour', 'minute', 'second']: value = result.get(attr, None) if value is not None: - # Note ISO8601 conventions allow for fractional seconds; casting - # to an int means all seconds values get rounded down to the - # nearest integer. TODO: Consider adding support for sub-second - # resolution? + # Note ISO8601 conventions allow for fractional seconds. + # TODO: Consider adding support for sub-second resolution? replace[attr] = int(value) resolution = attr @@ -120,23 +118,31 @@ def get_date_type(self): return type(self._data[0]) -def assert_all_same_netcdftime_datetimes(data): - from netcdftime._netcdftime import datetime +def assert_all_valid_date_type(data): + from netcdftime import ( + DatetimeJulian, DatetimeNoLeap, DatetimeAllLeap, + DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day) - if not isinstance(data[0], datetime): + valid_types = (DatetimeJulian, DatetimeNoLeap, DatetimeAllLeap, + DatetimeGregorian, DatetimeProlepticGregorian, + Datetime360Day) + + sample = data[0] + date_type = type(sample) + if not isinstance(sample, valid_types): raise TypeError( - 'NetCDFTimeIndex requires netcdftime._netcdftime.datetime' - ' objects.') - if not all(isinstance(value, type(data[0])) for value in data): + 'NetCDFTimeIndex requires netcdftime._netcdftime.datetime ' + 'objects. Got object of {}.'.format(date_type)) + if not all(isinstance(value, date_type) for value in data): raise TypeError( - 'NetCDFTimeIndex requires using netcdftime._netcdftime.datetime' - ' objects of all the same type.') + 'NetCDFTimeIndex requires using netcdftime._netcdftime.datetime ' + 'objects of all the same type. Got\n{}.'.format(data)) class NetCDFTimeIndex(pd.Index): def __new__(cls, data): result = object.__new__(cls) - assert_all_same_netcdftime_datetimes(data) + assert_all_valid_date_type(data) result._data = np.array(data) return result diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 875af16f997..62b0c3d7397 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -3,8 +3,9 @@ import pandas as pd import xarray as xr +from datetime import timedelta from xarray.conventions.netcdftimeindex import ( - parse_iso8601, NetCDFTimeIndex, assert_all_same_netcdftime_datetimes, + parse_iso8601, NetCDFTimeIndex, assert_all_valid_date_type, _parsed_string_to_bounds, _parse_iso8601_with_reso) from xarray.tests import assert_array_equal, assert_identical @@ -18,46 +19,38 @@ def date_dict(year=None, month=None, day=None, return dict(year=year, month=month, day=day, hour=hour, minute=minute, second=second) -ISO8601_STRING_TESTS = [ - ('1999', date_dict(year='1999')), - ('199901', date_dict(year='1999', month='01')), - ('1999-01', date_dict(year='1999', month='01')), - ('19990101', date_dict(year='1999', month='01', day='01')), - ('1999-01-01', date_dict(year='1999', month='01', day='01')), - ('19990101T12', date_dict(year='1999', month='01', day='01', hour='12')), - ('1999-01-01T12', date_dict(year='1999', month='01', day='01', hour='12')), - ('19990101T1234', date_dict( +ISO8601_STRING_TESTS = { + 'year': ('1999', date_dict(year='1999')), + 'month': ('199901', date_dict(year='1999', month='01')), + 'month-dash': ('1999-01', date_dict(year='1999', month='01')), + 'day': ('19990101', date_dict(year='1999', month='01', day='01')), + 'day-dash': ('1999-01-01', date_dict(year='1999', month='01', day='01')), + 'hour': ('19990101T12', date_dict( + year='1999', month='01', day='01', hour='12')), + 'hour-dash': ('1999-01-01T12', date_dict( + year='1999', month='01', day='01', hour='12')), + 'minute': ('19990101T1234', date_dict( year='1999', month='01', day='01', hour='12', minute='34')), - ('1999-01-01T12:34', date_dict( + 'minute-dash': ('1999-01-01T12:34', date_dict( year='1999', month='01', day='01', hour='12', minute='34')), - ('19990101T123456', date_dict( + 'second': ('19990101T123456', date_dict( year='1999', month='01', day='01', hour='12', minute='34', second='56')), - ('1999-01-01T12:34:56', date_dict( + 'second-dash': ('1999-01-01T12:34:56', date_dict( year='1999', month='01', day='01', hour='12', minute='34', - second='56')), - ('19990101T123456.78', date_dict( - year='1999', month='01', day='01', hour='12', minute='34', - second='56.78')), - ('1999-01-01T12:34:56.78', date_dict( - year='1999', month='01', day='01', hour='12', minute='34', - second='56.78')) -] -ISO8601_STRING_TEST_IDS = [ - 'year', 'month', 'month-dash', 'day', 'day-dash', 'hour', 'hour-dash', - 'minute', 'minute-dash', 'second', 'second-dash', 'second-dec', - 'second-dec-dash'] + second='56')) +} -@pytest.mark.parametrize(('string', 'expected'), ISO8601_STRING_TESTS, - ids=ISO8601_STRING_TEST_IDS) +@pytest.mark.parametrize(('string', 'expected'), ISO8601_STRING_TESTS.values(), + ids=ISO8601_STRING_TESTS.keys()) def test_parse_iso8601(string, expected): result = parse_iso8601(string) assert result == expected - if '.' not in string: - with pytest.raises(ValueError): - parse_iso8601(string + '3') + with pytest.raises(ValueError): + parse_iso8601(string + '3') + parse_iso8601(string + '.3') def netcdftime_date_types(): @@ -80,6 +73,13 @@ def index(date_type): return NetCDFTimeIndex(dates) +@pytest.fixture +def monotonic_decreasing_index(date_type): + dates = [date_type(2, 2, 1), date_type(2, 1, 1), + date_type(1, 2, 1), date_type(1, 1, 1)] + return NetCDFTimeIndex(dates) + + @pytest.fixture def da(index): return xr.DataArray([1, 2, 3, 4], coords=[index], @@ -124,13 +124,12 @@ def test_assert_all_netcdftime_datetimes(date_type, index): else: mixed_date_types = [date_type(1, 1, 1), DatetimeNoLeap(1, 2, 1)] with pytest.raises(TypeError): - assert_all_same_netcdftime_datetimes(mixed_date_types) + assert_all_valid_date_type(mixed_date_types) with pytest.raises(TypeError): - assert_all_same_netcdftime_datetimes([1, date_type(1, 1, 1)]) + assert_all_valid_date_type([1, date_type(1, 1, 1)]) - assert_all_same_netcdftime_datetimes([date_type(1, 1, 1), - date_type(1, 2, 1)]) + assert_all_valid_date_type([date_type(1, 1, 1), date_type(1, 2, 1)]) @pytest.mark.parametrize(('field', 'expected'), [ @@ -140,8 +139,7 @@ def test_assert_all_netcdftime_datetimes(date_type, index): ('hour', [0, 0, 0, 0]), ('minute', [0, 0, 0, 0]), ('second', [0, 0, 0, 0]), - ('microsecond', [0, 0, 0, 0]) -], ids=['year', 'month', 'day', 'hour', 'minute', 'second', 'microsecond']) + ('microsecond', [0, 0, 0, 0])]) def test_netcdftimeindex_field_accessors(index, field, expected): result = getattr(index, field) assert_array_equal(result, expected) @@ -153,9 +151,7 @@ def test_netcdftimeindex_field_accessors(index, field, expected): ('19990202', (1999, 2, 2), 'day'), ('19990202T01', (1999, 2, 2, 1), 'hour'), ('19990202T0101', (1999, 2, 2, 1, 1), 'minute'), - ('19990202T010156', (1999, 2, 2, 1, 1, 56), 'second')], - ids=['year', 'month', 'day', 'hour', 'minute', 'second'] -) + ('19990202T010156', (1999, 2, 2, 1, 1, 56), 'second')]) def test_parse_iso8601_with_reso(date_type, string, date_args, reso): expected_date = date_type(*date_args) expected_reso = reso @@ -198,11 +194,10 @@ def test_parse_string_to_bounds_month_dec(date_type, dec_days): ('day', (2, 2, 10), (2, 2, 10, 23, 59, 59, 999999)), ('hour', (2, 2, 10, 6), (2, 2, 10, 6, 59, 59, 999999)), ('minute', (2, 2, 10, 6, 2), (2, 2, 10, 6, 2, 59, 999999)), - ('second', (2, 2, 10, 6, 2, 8), (2, 2, 10, 6, 2, 8, 999999)) -], ids=['day', 'hour', 'minute', 'second']) + ('second', (2, 2, 10, 6, 2, 8), (2, 2, 10, 6, 2, 8, 999999))]) def test_parsed_string_to_bounds_sub_monthly(date_type, reso, ex_start_args, ex_end_args): - parsed = date_type(2, 2, 10, 6, 2, 8, 1) + parsed = date_type(2, 2, 10, 6, 2, 8, 123456) expected_start = date_type(*ex_start_args) expected_end = date_type(*ex_end_args) @@ -212,6 +207,11 @@ def test_parsed_string_to_bounds_sub_monthly(date_type, reso, assert result_end == expected_end +def test_parsed_string_to_bounds_raises(date_type): + with pytest.raises(KeyError): + _parsed_string_to_bounds(date_type, 'a', date_type(1, 1, 1)) + + def test_get_loc(date_type, index): result = index.get_loc('0001') expected = [0, 1] @@ -247,8 +247,30 @@ def test_get_slice_bound(date_type, index, kind): assert result == expected +@pytest.mark.parametrize('kind', ['loc', 'getitem']) +def test_get_slice_bound_decreasing_index( + date_type, monotonic_decreasing_index, kind): + result = monotonic_decreasing_index.get_slice_bound('0001', 'left', kind) + expected = 2 + assert result == expected + + result = monotonic_decreasing_index.get_slice_bound('0001', 'right', kind) + expected = 4 + assert result == expected + + result = monotonic_decreasing_index.get_slice_bound( + date_type(1, 3, 1), 'left', kind) + expected = 2 + assert result == expected + + result = monotonic_decreasing_index.get_slice_bound( + date_type(1, 3, 1), 'right', kind) + expected = 2 + assert result == expected + + def test_date_type_property(date_type, index): - assert index.date_type == date_type + assert index.date_type is date_type def test_contains(date_type, index): @@ -265,11 +287,15 @@ def test_groupby(da): assert_identical(result, expected) -@pytest.mark.parametrize('sel_arg', [ - '0001', - slice('0001-01-01', '0001-12-30'), - [True, True, False, False] -], ids=['string', 'string-slice', 'bool-list']) +SEL_STRING_OR_LIST_TESTS = { + 'string': '0001', + 'string-slice': slice('0001-01-01', '0001-12-30'), + 'bool-list': [True, True, False, False] +} + + +@pytest.mark.parametrize('sel_arg', SEL_STRING_OR_LIST_TESTS.values(), + ids=SEL_STRING_OR_LIST_TESTS.keys()) def test_sel_string_or_list(da, index, sel_arg): expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) result = da.sel(time=sel_arg) @@ -291,6 +317,116 @@ def test_sel_date_scalar(da, date_type, index): assert_identical(result, expected) +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'nearest'}, + {'method': 'nearest', 'tolerance': timedelta(days=70)} +]) +def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): + expected = xr.DataArray(2).assign_coords(time=index[1]) + result = da.sel(time=date_type(1, 4, 1), **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray(3).assign_coords(time=index[2]) + result = da.sel(time=date_type(1, 11, 1), **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'pad'}, + {'method': 'pad', 'tolerance': timedelta(days=365)} +]) +def test_sel_date_scalar_pad(da, date_type, index, sel_kwargs): + expected = xr.DataArray(2).assign_coords(time=index[1]) + result = da.sel(time=date_type(1, 4, 1), **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray(2).assign_coords(time=index[1]) + result = da.sel(time=date_type(1, 11, 1), **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'backfill'}, + {'method': 'backfill', 'tolerance': timedelta(days=365)} +]) +def test_sel_date_scalar_backfill(da, date_type, index, sel_kwargs): + expected = xr.DataArray(3).assign_coords(time=index[2]) + result = da.sel(time=date_type(1, 4, 1), **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray(3).assign_coords(time=index[2]) + result = da.sel(time=date_type(1, 11, 1), **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'pad', 'tolerance': timedelta(days=20)}, + {'method': 'backfill', 'tolerance': timedelta(days=20)}, + {'method': 'nearest', 'tolerance': timedelta(days=20)}, +]) +def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): + with pytest.raises(KeyError): + da.sel(time=date_type(1, 5, 1), **sel_kwargs) + + +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'nearest'}, + {'method': 'nearest', 'tolerance': timedelta(days=70)} +]) +def test_sel_date_list_nearest(da, date_type, index, sel_kwargs): + expected = xr.DataArray( + [2, 2], coords=[[index[1], index[1]]], dims=['time']) + result = da.sel( + time=[date_type(1, 3, 1), date_type(1, 4, 1)], **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray( + [2, 3], coords=[[index[1], index[2]]], dims=['time']) + result = da.sel( + time=[date_type(1, 3, 1), date_type(1, 12, 1)], **sel_kwargs) + assert_identical(result, expected) + + expected = xr.DataArray( + [3, 3], coords=[[index[2], index[2]]], dims=['time']) + result = da.sel( + time=[date_type(1, 11, 1), date_type(1, 12, 1)], **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'pad'}, + {'method': 'pad', 'tolerance': timedelta(days=365)} +]) +def test_sel_date_list_pad(da, date_type, index, sel_kwargs): + expected = xr.DataArray( + [2, 2], coords=[[index[1], index[1]]], dims=['time']) + result = da.sel( + time=[date_type(1, 3, 1), date_type(1, 4, 1)], **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'backfill'}, + {'method': 'backfill', 'tolerance': timedelta(days=365)} +]) +def test_sel_date_list_backfill(da, date_type, index, sel_kwargs): + expected = xr.DataArray( + [3, 3], coords=[[index[2], index[2]]], dims=['time']) + result = da.sel( + time=[date_type(1, 3, 1), date_type(1, 4, 1)], **sel_kwargs) + assert_identical(result, expected) + + +@pytest.mark.parametrize('sel_kwargs', [ + {'method': 'pad', 'tolerance': timedelta(days=20)}, + {'method': 'backfill', 'tolerance': timedelta(days=20)}, + {'method': 'nearest', 'tolerance': timedelta(days=20)}, +]) +def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): + with pytest.raises(KeyError): + da.sel(time=[date_type(1, 2, 1), date_type(1, 5, 1)], **sel_kwargs) + + def test_isel(da, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.isel(time=0) @@ -356,7 +492,7 @@ def test_indexing_in_dataframe_iloc(df, index): expected = pd.Series([1], name=index[0]) result = df.iloc[0] assert result.equals(expected) - pd.util.testing.assert_series_equal(result, expected) + assert result.equals(expected) expected = pd.DataFrame([1, 2], index=index[:2]) result = df.iloc[:2] From 3cf03bc7ee07f0d2f439e1834c2c827ad4336d1f Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 11 Feb 2017 17:14:49 -0500 Subject: [PATCH 05/58] Fix failing python3 tests --- xarray/tests/test_netcdftimeindex.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 62b0c3d7397..78426ac2a33 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -42,8 +42,9 @@ def date_dict(year=None, month=None, day=None, } -@pytest.mark.parametrize(('string', 'expected'), ISO8601_STRING_TESTS.values(), - ids=ISO8601_STRING_TESTS.keys()) +@pytest.mark.parametrize(('string', 'expected'), + list(ISO8601_STRING_TESTS.values()), + ids=list(ISO8601_STRING_TESTS.keys())) def test_parse_iso8601(string, expected): result = parse_iso8601(string) assert result == expected @@ -294,8 +295,8 @@ def test_groupby(da): } -@pytest.mark.parametrize('sel_arg', SEL_STRING_OR_LIST_TESTS.values(), - ids=SEL_STRING_OR_LIST_TESTS.keys()) +@pytest.mark.parametrize('sel_arg', list(SEL_STRING_OR_LIST_TESTS.values()), + ids=list(SEL_STRING_OR_LIST_TESTS.keys())) def test_sel_string_or_list(da, index, sel_arg): expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) result = da.sel(time=sel_arg) From 53b085c669a9516fdfc55111d45151445ee138b3 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 11 Feb 2017 17:28:46 -0500 Subject: [PATCH 06/58] Match test method name to method name --- xarray/tests/test_netcdftimeindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 78426ac2a33..b792d2d47a3 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -117,7 +117,7 @@ def dec_days(date_type): return 31 -def test_assert_all_netcdftime_datetimes(date_type, index): +def test_assert_all_valid_date_type(date_type, index): from netcdftime import DatetimeNoLeap, DatetimeAllLeap if date_type is DatetimeNoLeap: From a177f89d1f4f63fc58040f4a4f68e175407a67c4 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Wed, 10 May 2017 16:28:39 -0400 Subject: [PATCH 07/58] First attempts at integrating NetCDFTimeIndex into xarray This is a first pass at the following: - Resetting the logic for decoding datetimes such that `np.datetime64` objects are never used for non-standard calendars - Adding logic to use a `NetCDFTimeIndex` whenever `netcdftime.datetime` objects are used in an array being cast as an index (so if one reads in a Dataset from a netCDF file or creates one in Python, which is indexed by a time coordinate that uses `netcdftime.datetime` objects a NetCDFTimeIndex will be used rather than a generic object-based index) - Adding logic to encode `netcdftime.datetime` objects when saving out to netCDF files --- xarray/conventions/coding.py | 68 ++++++--- xarray/core/utils.py | 5 + xarray/tests/test_backends.py | 12 ++ xarray/tests/test_coding.py | 211 ++++++++++++++++++++------- xarray/tests/test_netcdftimeindex.py | 1 + xarray/tests/test_utils.py | 14 +- 6 files changed, 237 insertions(+), 74 deletions(-) diff --git a/xarray/conventions/coding.py b/xarray/conventions/coding.py index 3d9b7b62afb..2309c53ab44 100644 --- a/xarray/conventions/coding.py +++ b/xarray/conventions/coding.py @@ -97,20 +97,22 @@ def _decode_datetime_with_netcdf4(num_dates, units, calendar): import netCDF4 as nc4 dates = np.asarray(nc4.num2date(num_dates, units, calendar)) - if (dates[np.nanargmin(num_dates)].year < 1678 or - dates[np.nanargmax(num_dates)].year >= 2262): - warnings.warn('Unable to decode time axis into full ' - 'numpy.datetime64 objects, continuing using dummy ' - 'netCDF4.datetime objects instead, reason: dates out' - ' of range', RuntimeWarning, stacklevel=3) - else: - try: + if calendar in _STANDARD_CALENDARS: + if (dates[np.nanargmin(num_dates)].year < 1678 or + dates[np.nanargmax(num_dates)].year >= 2262): + warnings.warn( + 'Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using dummy ' + 'netCDF4.datetime objects instead, reason: dates out' + ' of range', RuntimeWarning, stacklevel=3) + else: dates = nctime_to_nptime(dates) - except ValueError as e: - warnings.warn('Unable to decode time axis into full ' - 'numpy.datetime64 objects, continuing using ' - 'dummy netCDF4.datetime objects instead, reason:' - '{0}'.format(e), RuntimeWarning, stacklevel=3) + else: + warnings.warn('Unable to decode time axis into full numpy.datetime64 ' + 'objects, because dates are encoded using a ' + 'non-standard calendar ({}). Using netCDF4.datetime ' + 'objects instead'.format(calendar), + RuntimeWarning, stacklevel=3) return dates @@ -190,18 +192,39 @@ def _infer_time_units_from_diff(unique_timedeltas): return 'seconds' +def infer_calendar_name(dates): + """Given an array of datetimes, infer the CF calendar name""" + if np.asarray(dates).dtype == 'datetime64[ns]': + return 'proleptic_gregorian' + else: + try: + return np.asarray(dates)[0].calendar + except IndexError: + return np.asarray(dates).item().calendar + + def infer_datetime_units(dates): """Given an array of datetimes, returns a CF compatible time-unit string of the form "{time_unit} since {date[0]}", where `time_unit` is 'days', 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - dates = pd.to_datetime(np.asarray(dates).ravel(), box=False) - dates = dates[pd.notnull(dates)] - unique_timedeltas = np.unique(np.diff(dates)) + # There is a test that uses a list of strings as input (not consistent with + # the docstring). Should this be allowed or not? We could potentially + # continue supporting this, but it would require knowledge of the calendar + # type to decode the strings into the appropriate datetimes. + if np.asarray(dates).dtype == 'datetime64[ns]': + dates = pd.to_datetime(np.asarray(dates).ravel(), box=False) + dates = dates[pd.notnull(dates)] + unique_timedeltas = np.unique(np.diff(dates)) + reference_date = dates[0] if len(dates) > 0 else '1970-01-01' + reference_date = pd.Timestamp(reference_date) + else: + dates = np.asarray(dates).ravel() + unique_timedeltas = np.unique(pd.to_timedelta(np.diff(dates))) + reference_date = dates[0] if len(dates) > 0 else '1970-01-01' units = _infer_time_units_from_diff(unique_timedeltas) - reference_date = dates[0] if len(dates) > 0 else '1970-01-01' - return '%s since %s' % (units, pd.Timestamp(reference_date)) + return '%s since %s' % (units, reference_date) def infer_timedelta_units(deltas): @@ -221,7 +244,8 @@ def nctime_to_nptime(times): times = np.asarray(times) new = np.empty(times.shape, dtype='M8[ns]') for i, t in np.ndenumerate(times): - dt = datetime(t.year, t.month, t.day, t.hour, t.minute, t.second) + dt = datetime(t.year, t.month, t.day, t.hour, + t.minute, t.second) new[i] = np.datetime64(dt) return new @@ -279,7 +303,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): units = _cleanup_netcdf_time_units(units) if calendar is None: - calendar = 'proleptic_gregorian' + calendar = infer_calendar_name(dates) delta, ref_date = _unpack_netcdf_time_units(units) try: @@ -575,7 +599,9 @@ def _var_as_tuple(var): def maybe_encode_datetime(var): - if np.issubdtype(var.dtype, np.datetime64): + from netcdftime._netcdftime import datetime as ncdatetime + is_netcdftime = isinstance(var.data.flatten()[0], ncdatetime) + if np.issubdtype(var.dtype, np.datetime64) or is_netcdftime: dims, data, attrs, encoding = _var_as_tuple(var) (data, units, calendar) = encode_cf_datetime( data, encoding.pop('units', None), encoding.pop('calendar', None)) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 89d1462328c..982f296ba08 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -10,6 +10,7 @@ import warnings from collections import Mapping, MutableMapping, Iterable +from netcdftime._netcdftime import datetime as ncdatetime import numpy as np import pandas as pd @@ -46,6 +47,7 @@ def safe_cast_to_index(array): this function will not attempt to do automatic type conversion but will always return an index with dtype=object. """ + from ..conventions.netcdftimeindex import NetCDFTimeIndex if isinstance(array, pd.Index): index = array elif hasattr(array, 'to_index'): @@ -55,6 +57,9 @@ def safe_cast_to_index(array): if hasattr(array, 'dtype') and array.dtype.kind == 'O': kwargs['dtype'] = object index = pd.Index(np.asarray(array), **kwargs) + if len(index): + if isinstance(index[0], ncdatetime): + index = NetCDFTimeIndex(index) return index diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a4218a24d75..b8fbe821fa3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -278,6 +278,18 @@ def test_roundtrip_datetime_data(self): self.assertEquals(actual.t0.encoding['units'], 'days since 1950-01-01') + def test_roundtrip_netcdftime_datetime_data(self): + from .test_coding import _all_netcdftime_date_types + date_types = _all_netcdftime_date_types() + for date_type in date_types.values(): + times = [date_type(1, 1, 1), date_type(1, 1, 2)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} + with self.roundtrip(expected, save_kwargs=kwds) as actual: + self.assertDatasetIdentical(expected, actual) + self.assertEquals(actual.t0.encoding['units'], + 'days since 0001-01-01') + def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) expected = Dataset({'td': ('td', time_deltas), 'td0': time_deltas[0]}) diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index e4824948a84..01b3af605b0 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -126,6 +126,31 @@ def _ensure_naive_tz(dt): return dt +def _non_standard_netcdftime_date_types(): + from netcdftime import ( + DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, Datetime360Day) + return {'noleap': DatetimeNoLeap, + '365_day': DatetimeNoLeap, + '360_day': Datetime360Day, + 'julian': DatetimeJulian, + 'all_leap': DatetimeAllLeap, + '366_day': DatetimeAllLeap} + + +def _all_netcdftime_date_types(): + from netcdftime import ( + DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, Datetime360Day, + DatetimeGregorian, DatetimeProlepticGregorian) + return {'noleap': DatetimeNoLeap, + '365_day': DatetimeNoLeap, + '360_day': Datetime360Day, + 'julian': DatetimeJulian, + 'all_leap': DatetimeAllLeap, + '366_day': DatetimeAllLeap, + 'gregorian': DatetimeGregorian, + 'proleptic_gregorian': DatetimeProlepticGregorian} + + class TestDatetime(TestCase): @requires_netCDF4 def test_cf_datetime(self): @@ -211,7 +236,7 @@ def test_decode_cf_datetime_overflow(self): def test_decode_cf_datetime_transition_to_invalid(self): # manually create dataset with not-decoded date from datetime import datetime - ds = Dataset(coords={'time' : [0, 266 * 365]}) + ds = Dataset(coords={'time': [0, 266 * 365]}) units = 'days since 2000-01-01 00:00:00' ds.time.attrs = dict(units=units) ds_decoded = coding.decode_cf(ds) @@ -291,21 +316,21 @@ def test_decode_cf_datetime_non_iso_strings(self): @requires_netCDF4 def test_decode_non_standard_calendar(self): + from datetime import datetime import netCDF4 as nc4 - for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', - '366_day']: + date_types = _non_standard_netcdftime_date_types() + for calendar, date_type in date_types.items(): units = 'days since 0001-01-01' - times = pd.date_range('2001-04-01-00', end='2001-04-30-23', - freq='H') - noleap_time = nc4.date2num(times.to_pydatetime(), units, - calendar=calendar) - expected = times.values + times = [datetime(1, 4, 1, h) for h in range(1, 5)] + noleap_time = nc4.date2num(times, units, calendar=calendar) with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = coding.decode_cf_datetime(noleap_time, units, - calendar=calendar) - self.assertEqual(actual.dtype, np.dtype('M8[ns]')) + calendar=calendar) + assert all(isinstance(value, date_type) for value in actual) + expected = np.array( + [date_type(1, 4, 1, h) for h in range(1, 5)]) abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, # we could do this check with near microsecond accuracy: @@ -313,60 +338,120 @@ def test_decode_non_standard_calendar(self): self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) @requires_netCDF4 - def test_decode_non_standard_calendar_single_element(self): + def test_decode_standard_calendar_single_element_ns_range(self): units = 'days since 0001-01-01' - for calendar in ['noleap', '365_day', '360_day', 'julian', 'all_leap', - '366_day']: + for calendar in coding._STANDARD_CALENDARS: for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = coding.decode_cf_datetime(num_time, units, - calendar=calendar) - self.assertEqual(actual.dtype, np.dtype('M8[ns]')) + calendar=calendar) + assert actual.dtype == 'datetime64[ns]' @requires_netCDF4 - def test_decode_non_standard_calendar_single_element_fallback(self): - import netCDF4 as nc4 + def test_decode_standard_calendar_single_element_non_ns_range(self): + from datetime import datetime + from netcdftime import DatetimeGregorian units = 'days since 0001-01-01' - dt = nc4.netcdftime.datetime(2001, 2, 29) - for calendar in ['360_day', 'all_leap', '366_day']: - num_time = nc4.date2num(dt, units, calendar) - with self.assertWarns('Unable to decode time axis'): - actual = coding.decode_cf_datetime(num_time, units, - calendar=calendar) - expected = np.asarray(nc4.num2date(num_time, units, calendar)) - print(num_time, calendar, actual, expected) - self.assertEqual(actual.dtype, np.dtype('O')) - self.assertEqual(expected, actual) + for days in [1, 1470376]: + for calendar in coding._STANDARD_CALENDARS: + for num_time in [days, [days], [[days]]]: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + actual = coding.decode_cf_datetime(num_time, units, + calendar=calendar) + # Confusing, but this is how netCDF4.num2date behaves -- + # According to the documentation this is supposed to have + # something to do with whether the date falls before or + # after the breakpoint between the Julian + # and Gregorian calendars (1582-10-15). + if calendar == 'standard' and days == 1: + assert isinstance(actual.item(), DatetimeGregorian) + else: + assert isinstance(actual.item(), datetime) @requires_netCDF4 - def test_decode_non_standard_calendar_multidim_time(self): + def test_decode_non_standard_calendar_single_element(self): + date_types = _non_standard_netcdftime_date_types() + units = 'days since 0001-01-01' + for days in [1, 735368]: + for calendar, date_type in date_types.items(): + for num_time in [days, [days], [[days]]]: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + actual = coding.decode_cf_datetime(num_time, units, + calendar=calendar) + assert isinstance(actual.item(), date_type) + + @requires_netCDF4 + def test_decode_standard_calendar_multidim_time(self): import netCDF4 as nc4 - calendar = 'noleap' - units = 'days since 0001-01-01' - times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') - times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') - noleap_time1 = nc4.date2num(times1.to_pydatetime(), units, - calendar=calendar) - noleap_time2 = nc4.date2num(times2.to_pydatetime(), units, - calendar=calendar) - mdim_time = np.empty((len(noleap_time1), 2), ) - mdim_time[:, 0] = noleap_time1 - mdim_time[:, 1] = noleap_time2 - - expected1 = times1.values - expected2 = times2.values - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'Unable to decode time axis') + for calendar in coding._STANDARD_CALENDARS: + units = 'days since 0001-01-01' + times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') + times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') + noleap_time1 = nc4.date2num(times1.to_pydatetime(), + units, calendar=calendar) + noleap_time2 = nc4.date2num(times2.to_pydatetime(), + units, calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + expected1 = times1.values + expected2 = times2.values actual = coding.decode_cf_datetime(mdim_time, units, - calendar=calendar) - self.assertEqual(actual.dtype, np.dtype('M8[ns]')) - self.assertArrayEqual(actual[:, 0], expected1) - self.assertArrayEqual(actual[:, 1], expected2) + calendar=calendar) + self.assertEqual(actual.dtype, np.dtype('M8[ns]')) + + abs_diff1 = abs(actual[:, 0] - expected1) + abs_diff2 = abs(actual[:, 1] - expected2) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + self.assertTrue((abs_diff1 <= np.timedelta64(1, 's')).all()) + self.assertTrue((abs_diff2 <= np.timedelta64(1, 's')).all()) + @requires_netCDF4 + def test_decode_non_standard_calendar_multidim_time(self): + from datetime import datetime + import netCDF4 as nc4 + + date_types = _non_standard_netcdftime_date_types() + for calendar, date_type in date_types.items(): + units = 'days since 0001-01-01' + times1 = [datetime(1, 4, day) for day in range(1, 6)] + times2 = [datetime(1, 5, day) for day in range(1, 6)] + noleap_time1 = nc4.date2num(times1, units, calendar=calendar) + noleap_time2 = nc4.date2num(times2, units, calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + expected1 = np.array( + [date_type(1, 4, day) for day in range(1, 6)]) + expected2 = np.array( + [date_type(1, 5, day) for day in range(1, 6)]) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + actual = coding.decode_cf_datetime(mdim_time, units, + calendar=calendar) + self.assertEqual(actual.dtype, np.dtype('O')) + + abs_diff1 = abs(actual[:, 0] - expected1) + abs_diff2 = abs(actual[:, 1] - expected2) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + self.assertTrue((abs_diff1 <= np.timedelta64(1, 's')).all()) + self.assertTrue((abs_diff2 <= np.timedelta64(1, 's')).all()) + + # I think this test may now be redundant @requires_netCDF4 def test_decode_non_standard_calendar_fallback(self): import netCDF4 as nc4 @@ -381,7 +466,7 @@ def test_decode_non_standard_calendar_fallback(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') actual = coding.decode_cf_datetime(num_times, units, - calendar=calendar) + calendar=calendar) self.assertEqual(len(w), 1) self.assertIn('Unable to decode time axis', str(w[0].message)) @@ -414,14 +499,14 @@ def test_decoded_cf_datetime_array_2d(self): self.assertArrayEqual(np.asarray(array), expected) def test_infer_datetime_units(self): + # Temporarily removed (['1900-01-01', '1900-01-02', + # '1900-01-02 00:00:01'], + # 'seconds since 1900-01-01 00:00:00') case for dates, expected in [(pd.date_range('1900-01-01', periods=5), 'days since 1900-01-01 00:00:00'), (pd.date_range('1900-01-01 12:00:00', freq='H', periods=2), 'hours since 1900-01-01 12:00:00'), - (['1900-01-01', '1900-01-02', - '1900-01-02 00:00:01'], - 'seconds since 1900-01-01 00:00:00'), (pd.to_datetime(['1900-01-01', '1900-01-02', 'NaT']), 'days since 1900-01-01 00:00:00'), (pd.to_datetime(['1900-01-01', @@ -434,6 +519,28 @@ def test_infer_datetime_units(self): ]: self.assertEqual(expected, coding.infer_datetime_units(dates)) + @requires_netCDF4 + def test_infer_netcdftime_datetime_units(self): + date_types = _all_netcdftime_date_types() + # What is the expected behavior of these tests using netcdftime + # objects? Currently it is inconsistent with the tests using + # np.datetime64 objects; is this a problem? + for date_type in date_types.values(): + for dates, expected in [([date_type(1900, 1, 1), + date_type(1900, 1, 2)], + 'days since 1900-01-01 00:00:00'), + ([date_type(1900, 1, 1, 12), + date_type(1900, 1, 1, 13)], + 'seconds since 1900-01-01 12:00:00'), + ([date_type(1900, 1, 1), + date_type(1900, 1, 2), + date_type(1900, 1, 2, 0, 0, 1)], + 'seconds since 1900-01-01 00:00:00'), + ([date_type(1900, 1, 1), + date_type(1900, 1, 2, 0, 0, 0, 5)], + 'days since 1900-01-01 00:00:00')]: + self.assertEqual(expected, coding.infer_datetime_units(dates)) + def test_cf_timedelta(self): examples = [ ('1D', 'days', np.int64(1)), diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index b792d2d47a3..352b1d22d8f 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -19,6 +19,7 @@ def date_dict(year=None, month=None, day=None, return dict(year=year, month=month, day=day, hour=hour, minute=minute, second=second) + ISO8601_STRING_TESTS = { 'year': ('1999', date_dict(year='1999')), 'month': ('199901', date_dict(year='1999', month='01')), diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 79d458a16ac..c7f9f9f6a80 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -7,9 +7,11 @@ import numpy as np import pandas as pd +from xarray.conventions.netcdftimeindex import NetCDFTimeIndex from xarray.core import duck_array_ops, utils from xarray.core.pycompat import OrderedDict -from . import TestCase +from . import TestCase, requires_netCDF4 +from .test_coding import _all_netcdftime_date_types class TestAlias(TestCase): @@ -37,6 +39,16 @@ def test(self): self.assertArrayEqual(expected, actual) self.assertEqual(expected.dtype, actual.dtype) + @requires_netCDF4 + def test_netcdftimeindex(self): + date_types = _all_netcdftime_date_types() + for date_type in date_types.values(): + dates = [date_type(1, 1, day) for day in range(1, 20)] + expected = NetCDFTimeIndex(dates) + actual = utils.safe_cast_to_index(np.array(dates)) + self.assertArrayEqual(expected, actual) + self.assertEqual(expected.dtype, actual.dtype) + def test_multiindex_from_product_levels(): result = utils.multiindex_from_product_levels([['b', 'a'], [1, 3, 2]]) From 48ec51927ec91a315fe78b6f5a90e9df3529bdd9 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Wed, 10 May 2017 21:04:44 -0400 Subject: [PATCH 08/58] Cleanup --- xarray/conventions/coding.py | 13 ++++++++++--- xarray/core/utils.py | 18 +++++++++++++----- xarray/tests/test_backends.py | 21 ++++++++++++++++++--- xarray/tests/test_utils.py | 1 + 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/xarray/conventions/coding.py b/xarray/conventions/coding.py index 2309c53ab44..54e99d505c3 100644 --- a/xarray/conventions/coding.py +++ b/xarray/conventions/coding.py @@ -310,6 +310,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): if calendar not in _STANDARD_CALENDARS or dates.dtype.kind == 'O': # parse with netCDF4 instead raise OutOfBoundsDatetime + assert dates.dtype == 'datetime64[ns]' delta_units = _netcdf_to_numpy_timeunit(delta) @@ -598,10 +599,16 @@ def _var_as_tuple(var): return var.dims, var.data, var.attrs.copy(), var.encoding.copy() +def _contains_netcdftimes(var): + try: + from netcdftime._netcdftime import datetime as ncdatetime + return isinstance(var.data.flatten()[0], ncdatetime) + except ImportError: + return False + + def maybe_encode_datetime(var): - from netcdftime._netcdftime import datetime as ncdatetime - is_netcdftime = isinstance(var.data.flatten()[0], ncdatetime) - if np.issubdtype(var.dtype, np.datetime64) or is_netcdftime: + if np.issubdtype(var.dtype, np.datetime64) or _contains_netcdftimes(var): dims, data, attrs, encoding = _var_as_tuple(var) (data, units, calendar) = encode_cf_datetime( data, encoding.pop('units', None), encoding.pop('calendar', None)) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 982f296ba08..e17de71b4c6 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -10,7 +10,6 @@ import warnings from collections import Mapping, MutableMapping, Iterable -from netcdftime._netcdftime import datetime as ncdatetime import numpy as np import pandas as pd @@ -38,6 +37,18 @@ def wrapper(*args, **kwargs): return wrapper +def _maybe_cast_to_netcdftimeindex(index): + try: + from netcdftime._netcdftime import datetime as ncdatetime + from ..conventions.netcdftimeindex import NetCDFTimeIndex + if len(index): + if isinstance(index[0], ncdatetime): + index = NetCDFTimeIndex(index) + return index + except ImportError: + return index + + def safe_cast_to_index(array): """Given an array, safely cast it to a pandas.Index. @@ -47,7 +58,6 @@ def safe_cast_to_index(array): this function will not attempt to do automatic type conversion but will always return an index with dtype=object. """ - from ..conventions.netcdftimeindex import NetCDFTimeIndex if isinstance(array, pd.Index): index = array elif hasattr(array, 'to_index'): @@ -57,9 +67,7 @@ def safe_cast_to_index(array): if hasattr(array, 'dtype') and array.dtype.kind == 'O': kwargs['dtype'] = object index = pd.Index(np.asarray(array), **kwargs) - if len(index): - if isinstance(index[0], ncdatetime): - index = NetCDFTimeIndex(index) + index = _maybe_cast_to_netcdftimeindex(index) return index diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b8fbe821fa3..028a760d8f9 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -279,16 +279,31 @@ def test_roundtrip_datetime_data(self): 'days since 1950-01-01') def test_roundtrip_netcdftime_datetime_data(self): - from .test_coding import _all_netcdftime_date_types - date_types = _all_netcdftime_date_types() + from .test_coding import _non_standard_netcdftime_date_types + date_types = _non_standard_netcdftime_date_types() for date_type in date_types.values(): times = [date_type(1, 1, 1), date_type(1, 1, 2)] expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} with self.roundtrip(expected, save_kwargs=kwds) as actual: - self.assertDatasetIdentical(expected, actual) + expected_t = np.array(times) + abs_diff = abs(actual.t.values - expected_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + self.assertEquals(actual.t.encoding['units'], + 'days since 1-01-01 00:00:00') + self.assertEquals(actual.t.encoding['calendar'], + times[0].calendar) + + expected_t0 = np.array([date_type(1, 1, 1)]) + abs_diff = abs(actual.t0.values - expected_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + self.assertEquals(actual.t0.encoding['units'], 'days since 0001-01-01') + self.assertEquals(actual.t.encoding['calendar'], + times[0].calendar) + def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index c7f9f9f6a80..55722b98271 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -48,6 +48,7 @@ def test_netcdftimeindex(self): actual = utils.safe_cast_to_index(np.array(dates)) self.assertArrayEqual(expected, actual) self.assertEqual(expected.dtype, actual.dtype) + assert isinstance(actual, NetCDFTimeIndex) def test_multiindex_from_product_levels(): From 2a7b439c9be4193a9d137b7ed85b93c142242b91 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 11 May 2017 07:08:14 -0400 Subject: [PATCH 09/58] Fix DataFrame and Series test failures for NetCDFTimeIndex These were related to a recent minor upstream change in pandas: https://github.com/pandas-dev/pandas/blame/master/pandas/core/indexing.py#L1433 --- xarray/conventions/netcdftimeindex.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/conventions/netcdftimeindex.py b/xarray/conventions/netcdftimeindex.py index 6c9fe300e9a..4f408635a05 100644 --- a/xarray/conventions/netcdftimeindex.py +++ b/xarray/conventions/netcdftimeindex.py @@ -212,3 +212,7 @@ def __contains__(self, key): (isinstance(result, np.ndarray) and result.size)) except (KeyError, TypeError, ValueError): return False + + def contains(self, key): + """Needed for .loc based partial-string indexing""" + return self.__contains__(key) From b94272403aabaf64c7c75ac6bc42c38a4fe871c8 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 11 May 2017 08:40:04 -0400 Subject: [PATCH 10/58] First pass at making NetCDFTimeIndex compatible with #1356 --- xarray/conventions/netcdftimeindex.py | 2 +- xarray/core/accessors.py | 33 ++++++++++++++++++++++----- xarray/core/common.py | 19 ++++++++++++++- xarray/core/dataset.py | 5 ++-- 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/xarray/conventions/netcdftimeindex.py b/xarray/conventions/netcdftimeindex.py index 4f408635a05..5f592de904e 100644 --- a/xarray/conventions/netcdftimeindex.py +++ b/xarray/conventions/netcdftimeindex.py @@ -101,7 +101,7 @@ def _parsed_string_to_bounds(date_type, resolution, parsed): def get_date_field(datetimes, field): """Adapted from pandas.tslib.get_date_field""" - return [getattr(date, field) for date in datetimes] + return np.array([getattr(date, field) for date in datetimes]) def _field_accessor(name, docstring=None): diff --git a/xarray/core/accessors.py b/xarray/core/accessors.py index 7360b9764ae..c6838f59846 100644 --- a/xarray/core/accessors.py +++ b/xarray/core/accessors.py @@ -2,7 +2,7 @@ from __future__ import division from __future__ import print_function -from .common import is_datetime_like +from .common import is_np_datetime_like, _contains_datetime_like_objects from .pycompat import dask_array_type from functools import partial @@ -20,6 +20,20 @@ def _season_from_months(months): return seasons[(months // 3) % 4] +def _access_through_netcdftimeindex(values, name): + """Coerce an array of datetime-like values to a NetCDFTimeIndex + and access requested datetime component + """ + from ..conventions.netcdftimeindex import NetCDFTimeIndex + values_as_netcdftimeindex = NetCDFTimeIndex(values) + if name == 'season': + months = values_as_netcdftimeindex.month + field_values = _season_from_months(months) + else: + field_values = getattr(values_as_netcdftimeindex, name) + return field_values.reshape(values.shape) + + def _access_through_series(values, name): """Coerce an array of datetime-like values to a pandas Series and access requested datetime component @@ -52,12 +66,17 @@ def _get_date_field(values, name, dtype): Array-like of datetime fields accessed for each element in values """ + if is_np_datetime_like(values.dtype): + access_method = _access_through_series + else: + access_method = _access_through_netcdftimeindex + if isinstance(values, dask_array_type): from dask.array import map_blocks - return map_blocks(_access_through_series, + return map_blocks(access_method, values, name, dtype=dtype) else: - return _access_through_series(values, name) + return access_method(values, name) class DatetimeAccessor(object): @@ -83,9 +102,11 @@ class DatetimeAccessor(object): """ def __init__(self, xarray_obj): - if not is_datetime_like(xarray_obj.dtype): + if not _contains_datetime_like_objects(xarray_obj): raise TypeError("'dt' accessor only available for " - "DataArray with datetime64 or timedelta64 dtype") + "DataArray with datetime64 timedelta64 dtype or " + "for arrays containing netcdftime datetime " + "objects.") self._obj = xarray_obj def _tslib_field_accessor(name, docstring=None, dtype=None): @@ -147,4 +168,4 @@ def f(self, dtype=dtype): time = _tslib_field_accessor( "time", "Timestamps corresponding to datetimes", object - ) \ No newline at end of file + ) diff --git a/xarray/core/common.py b/xarray/core/common.py index d61e2cdb15f..7d91773c0ad 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -763,8 +763,25 @@ def ones_like(other, dtype=None): return full_like(other, 1, dtype) -def is_datetime_like(dtype): +def is_np_datetime_like(dtype): """Check if a dtype is a subclass of the numpy datetime types """ return (np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)) + + +def _contains_netcdftime_datetimes(var): + """Check if a variable contains netcdftime datetime objects""" + from netcdftime._netcdftime import datetime + return isinstance(var.data.ravel()[0], datetime) + + +def _contains_datetime_like_objects(var): + """Check if a variable contains datetime like objects (either + np.datetime64, np.timedelta64, or netcdftime._netcdftime.datetime)""" + if is_np_datetime_like(var.dtype): + return True + try: + return _contains_netcdftime_datetimes(var) + except ImportError: + return False diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2b9c74a7b33..151545a197b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -21,7 +21,8 @@ from .alignment import align from ..conventions import coding from .coordinates import DatasetCoordinates, LevelCoordinatesSource, Indexes -from .common import ImplementsDatasetReduce, BaseDataObject, is_datetime_like +from .common import (ImplementsDatasetReduce, BaseDataObject, + _contains_datetime_like_objects) from .merge import (dataset_update_method, dataset_merge_method, merge_data_and_coords) from .utils import (Frozen, SortedKeysDict, maybe_wrap_array, hashable, @@ -76,7 +77,7 @@ def _get_virtual_variable(variables, key, level_vars=None, dim_sizes=None): virtual_var = ref_var var_name = key else: - if is_datetime_like(ref_var.dtype): + if _contains_datetime_like_objects(ref_var): ref_var = xr.DataArray(ref_var) data = getattr(ref_var.dt, var_name).data else: From a9ed3c80e0c80318cd2f14e6c6b1e661fde524d6 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 25 Jun 2017 20:55:14 -0400 Subject: [PATCH 11/58] Address initial review comments --- xarray/conventions/coding.py | 14 ++++---- xarray/conventions/netcdftimeindex.py | 51 +++++++++++++++++++++++---- xarray/tests/test_backends.py | 1 - xarray/tests/test_coding.py | 6 ---- 4 files changed, 51 insertions(+), 21 deletions(-) diff --git a/xarray/conventions/coding.py b/xarray/conventions/coding.py index 1a409c7e48c..1dc1634b6c9 100644 --- a/xarray/conventions/coding.py +++ b/xarray/conventions/coding.py @@ -107,16 +107,18 @@ def _decode_datetime_with_netcdf4(num_dates, units, calendar): warnings.warn( 'Unable to decode time axis into full ' 'numpy.datetime64 objects, continuing using dummy ' - 'netCDF4.datetime objects instead, reason: dates out' - ' of range', RuntimeWarning, stacklevel=3) + 'netCDF4.datetime objects instead, reason: dates out ' + 'of range', RuntimeWarning, stacklevel=3) else: dates = nctime_to_nptime(dates) else: warnings.warn('Unable to decode time axis into full numpy.datetime64 ' 'objects, because dates are encoded using a ' 'non-standard calendar ({}). Using netCDF4.datetime ' - 'objects instead'.format(calendar), - RuntimeWarning, stacklevel=3) + 'objects instead. Time indexing will be done using a ' + 'NetCDFTimeIndex rather than ' + 'a DatetimeIndex'.format(calendar), + DeprecationWarning, stacklevel=3) return dates @@ -213,10 +215,6 @@ def infer_datetime_units(dates): 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ - # There is a test that uses a list of strings as input (not consistent with - # the docstring). Should this be allowed or not? We could potentially - # continue supporting this, but it would require knowledge of the calendar - # type to decode the strings into the appropriate datetimes. if np.asarray(dates).dtype == 'datetime64[ns]': dates = pd.to_datetime(np.asarray(dates).ravel(), box=False) dates = dates[pd.notnull(dates)] diff --git a/xarray/conventions/netcdftimeindex.py b/xarray/conventions/netcdftimeindex.py index 5f592de904e..7d76440296f 100644 --- a/xarray/conventions/netcdftimeindex.py +++ b/xarray/conventions/netcdftimeindex.py @@ -74,24 +74,24 @@ def _parsed_string_to_bounds(date_type, resolution, parsed): if resolution == 'year': return (date_type(parsed.year, 1, 1), date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1)) - if resolution == 'month': + elif resolution == 'month': if parsed.month == 12: end = date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1) else: end = (date_type(parsed.year, parsed.month + 1, 1) - timedelta(microseconds=1)) return date_type(parsed.year, parsed.month, 1), end - if resolution == 'day': + elif resolution == 'day': start = date_type(parsed.year, parsed.month, parsed.day) return start, start + timedelta(days=1, microseconds=-1) - if resolution == 'hour': + elif resolution == 'hour': start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour) return start, start + timedelta(hours=1, microseconds=-1) - if resolution == 'minute': + elif resolution == 'minute': start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute) return start, start + timedelta(minutes=1, microseconds=-1) - if resolution == 'second': + elif resolution == 'second': start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute, parsed.second) return start, start + timedelta(seconds=1, microseconds=-1) @@ -158,7 +158,46 @@ def __new__(cls, data): def _partial_date_slice(self, resolution, parsed): """Adapted from - pandas.tseries.index.DatetimeIndex._partial_date_slice""" + pandas.tseries.index.DatetimeIndex._partial_date_slice + + Note that when using a NetCDFTimeIndex, if a partial-date selection + returns a single element, it will never be converted to a scalar + coordinate; this is in slight contrast to the behavior when using + a DatetimeIndex, which sometimes will return a DataArray with a scalar + coordinate depending on the resolution of the datetimes used in + defining the index. For example: + + >>> from netcdftime import DatetimeNoLeap + >>> import pandas as pd + >>> import xarray as xr + >>> da = xr.DataArray([1, 2], + coords=[[DatetimeNoLeap(2001, 1, 1), + DatetimeNoLeap(2001, 2, 1)]], + dims=['time']) + >>> da.sel(time='2001-01-01') + + array([1]) + Coordinates: + * time (time) object 2001-01-01 00:00:00 + >>> da = xr.DataArray([1, 2], + coords=[[pd.Timestamp(2001, 1, 1), + pd.Timestamp(2001, 2, 1)]], + dims=['time']) + >>> da.sel(time='2001-01-01') + + array(1) + Coordinates: + time datetime64[ns] 2001-01-01 + >>> da = xr.DataArray([1, 2], + coords=[[pd.Timestamp(2001, 1, 1, 1), + pd.Timestamp(2001, 2, 1)]], + dims=['time']) + >>> da.sel(time='2001-01-01') + + array([1]) + Coordinates: + * time (time) datetime64[ns] 2001-01-01T01:00:00 + """ start, end = _parsed_string_to_bounds(self.date_type, resolution, parsed) lhs_mask = (self._data >= start) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e0659a6aec8..4ebd5d6825b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -304,7 +304,6 @@ def test_roundtrip_netcdftime_datetime_data(self): self.assertEquals(actual.t.encoding['calendar'], times[0].calendar) - def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) expected = Dataset({'td': ('td', time_deltas), 'td0': time_deltas[0]}) diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 01b3af605b0..f253741329f 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -499,9 +499,6 @@ def test_decoded_cf_datetime_array_2d(self): self.assertArrayEqual(np.asarray(array), expected) def test_infer_datetime_units(self): - # Temporarily removed (['1900-01-01', '1900-01-02', - # '1900-01-02 00:00:01'], - # 'seconds since 1900-01-01 00:00:00') case for dates, expected in [(pd.date_range('1900-01-01', periods=5), 'days since 1900-01-01 00:00:00'), (pd.date_range('1900-01-01 12:00:00', freq='H', @@ -522,9 +519,6 @@ def test_infer_datetime_units(self): @requires_netCDF4 def test_infer_netcdftime_datetime_units(self): date_types = _all_netcdftime_date_types() - # What is the expected behavior of these tests using netcdftime - # objects? Currently it is inconsistent with the tests using - # np.datetime64 objects; is this a problem? for date_type in date_types.values(): for dates, expected in [([date_type(1900, 1, 1), date_type(1900, 1, 2)], From f00f59a19a73f4a7e0a122df6ef346357d420637 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 22 Jan 2018 11:33:52 -0500 Subject: [PATCH 12/58] Restore test_conventions.py --- xarray/tests/test_conventions.py | 382 +++++++++++++++++++++++++++++++ 1 file changed, 382 insertions(+) create mode 100644 xarray/tests/test_conventions.py diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py new file mode 100644 index 00000000000..6a509368017 --- /dev/null +++ b/xarray/tests/test_conventions.py @@ -0,0 +1,382 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import warnings +import numpy as np +import pandas as pd +import pytest + +from xarray import conventions, Variable, Dataset, open_dataset +from xarray.core import utils, indexing +from xarray.testing import assert_identical +from . import ( + TestCase, requires_netCDF4, unittest, raises_regex, IndexerMaker, + assert_array_equal) +from .test_backends import CFEncodedDataTest +from xarray.core.pycompat import iteritems +from xarray.backends.memory import InMemoryDataStore +from xarray.backends.common import WritableCFDataStore +from xarray.conventions import decode_cf + + +B = IndexerMaker(indexing.BasicIndexer) +V = IndexerMaker(indexing.VectorizedIndexer) + + +class TestStackedBytesArray(TestCase): + def test_wrapper_class(self): + array = np.array([[b'a', b'b', b'c'], [b'd', b'e', b'f']], dtype='S') + actual = conventions.StackedBytesArray(array) + expected = np.array([b'abc', b'def'], dtype='S') + assert actual.dtype == expected.dtype + assert actual.shape == expected.shape + assert actual.size == expected.size + assert actual.ndim == expected.ndim + assert len(actual) == len(expected) + assert_array_equal(expected, actual) + assert_array_equal(expected[:1], actual[B[:1]]) + with pytest.raises(IndexError): + actual[B[:, :2]] + + def test_scalar(self): + array = np.array([b'a', b'b', b'c'], dtype='S') + actual = conventions.StackedBytesArray(array) + + expected = np.array(b'abc') + assert actual.dtype == expected.dtype + assert actual.shape == expected.shape + assert actual.size == expected.size + assert actual.ndim == expected.ndim + with pytest.raises(TypeError): + len(actual) + np.testing.assert_array_equal(expected, actual) + with pytest.raises(IndexError): + actual[B[:2]] + assert str(actual) == str(expected) + + def test_char_to_bytes(self): + array = np.array([['a', 'b', 'c'], ['d', 'e', 'f']]) + expected = np.array(['abc', 'def']) + actual = conventions.char_to_bytes(array) + assert_array_equal(actual, expected) + + expected = np.array(['ad', 'be', 'cf']) + actual = conventions.char_to_bytes(array.T) # non-contiguous + assert_array_equal(actual, expected) + + def test_char_to_bytes_ndim_zero(self): + expected = np.array('a') + actual = conventions.char_to_bytes(expected) + assert_array_equal(actual, expected) + + def test_char_to_bytes_size_zero(self): + array = np.zeros((3, 0), dtype='S1') + expected = np.array([b'', b'', b'']) + actual = conventions.char_to_bytes(array) + assert_array_equal(actual, expected) + + def test_bytes_to_char(self): + array = np.array([['ab', 'cd'], ['ef', 'gh']]) + expected = np.array([[['a', 'b'], ['c', 'd']], + [['e', 'f'], ['g', 'h']]]) + actual = conventions.bytes_to_char(array) + assert_array_equal(actual, expected) + + expected = np.array([[['a', 'b'], ['e', 'f']], + [['c', 'd'], ['g', 'h']]]) + actual = conventions.bytes_to_char(array.T) + assert_array_equal(actual, expected) + + def test_vectorized_indexing(self): + array = np.array([[b'a', b'b', b'c'], [b'd', b'e', b'f']], dtype='S') + stacked = conventions.StackedBytesArray(array) + expected = np.array([[b'abc', b'def'], [b'def', b'abc']]) + indexer = V[np.array([[0, 1], [1, 0]])] + actual = stacked[indexer] + assert_array_equal(actual, expected) + + +class TestBytesToStringArray(TestCase): + + def test_encoding(self): + encoding = 'utf-8' + raw_array = np.array([b'abc', u'ß∂µ∆'.encode(encoding)]) + actual = conventions.BytesToStringArray(raw_array, encoding=encoding) + expected = np.array([u'abc', u'ß∂µ∆'], dtype=object) + + assert actual.dtype == expected.dtype + assert actual.shape == expected.shape + assert actual.size == expected.size + assert actual.ndim == expected.ndim + assert_array_equal(expected, actual) + assert_array_equal(expected[0], actual[B[0]]) + + def test_scalar(self): + expected = np.array(u'abc', dtype=object) + actual = conventions.BytesToStringArray( + np.array(b'abc'), encoding='utf-8') + assert actual.dtype == expected.dtype + assert actual.shape == expected.shape + assert actual.size == expected.size + assert actual.ndim == expected.ndim + with pytest.raises(TypeError): + len(actual) + np.testing.assert_array_equal(expected, actual) + with pytest.raises(IndexError): + actual[B[:2]] + assert str(actual) == str(expected) + + def test_decode_bytes_array(self): + encoding = 'utf-8' + raw_array = np.array([b'abc', u'ß∂µ∆'.encode(encoding)]) + expected = np.array([u'abc', u'ß∂µ∆'], dtype=object) + actual = conventions.decode_bytes_array(raw_array, encoding) + np.testing.assert_array_equal(actual, expected) + + +class TestBoolTypeArray(TestCase): + def test_booltype_array(self): + x = np.array([1, 0, 1, 1, 0], dtype='i1') + bx = conventions.BoolTypeArray(x) + assert bx.dtype == np.bool + assert_array_equal(bx, np.array([True, False, True, True, False], + dtype=np.bool)) + + +class TestNativeEndiannessArray(TestCase): + def test(self): + x = np.arange(5, dtype='>i8') + expected = np.arange(5, dtype='int64') + a = conventions.NativeEndiannessArray(x) + assert a.dtype == expected.dtype + assert a.dtype == expected[:].dtype + assert_array_equal(a, expected) + + +def test_decode_cf_with_conflicting_fill_missing_value(): + var = Variable(['t'], np.arange(10), + {'units': 'foobar', + 'missing_value': 0, + '_FillValue': 1}) + with raises_regex(ValueError, "_FillValue and missing_value"): + conventions.decode_cf_variable('t', var) + + expected = Variable(['t'], np.arange(10), {'units': 'foobar'}) + + var = Variable(['t'], np.arange(10), + {'units': 'foobar', + 'missing_value': np.nan, + '_FillValue': np.nan}) + actual = conventions.decode_cf_variable('t', var) + assert_identical(actual, expected) + + var = Variable(['t'], np.arange(10), + {'units': 'foobar', + 'missing_value': np.float32(np.nan), + '_FillValue': np.float32(np.nan)}) + actual = conventions.decode_cf_variable('t', var) + assert_identical(actual, expected) + + +@requires_netCDF4 +class TestEncodeCFVariable(TestCase): + def test_incompatible_attributes(self): + invalid_vars = [ + Variable(['t'], pd.date_range('2000-01-01', periods=3), + {'units': 'foobar'}), + Variable(['t'], pd.to_timedelta(['1 day']), {'units': 'foobar'}), + Variable(['t'], [0, 1, 2], {'add_offset': 0}, {'add_offset': 2}), + Variable(['t'], [0, 1, 2], {'_FillValue': 0}, {'_FillValue': 2}), + ] + for var in invalid_vars: + with pytest.raises(ValueError): + conventions.encode_cf_variable(var) + + def test_missing_fillvalue(self): + v = Variable(['x'], np.array([np.nan, 1, 2, 3])) + v.encoding = {'dtype': 'int16'} + with pytest.warns(Warning, match='floating point data as an integer'): + conventions.encode_cf_variable(v) + + def test_multidimensional_coordinates(self): + # regression test for GH1763 + # Set up test case with coordinates that have overlapping (but not + # identical) dimensions. + zeros1 = np.zeros((1, 5, 3)) + zeros2 = np.zeros((1, 6, 3)) + zeros3 = np.zeros((1, 5, 4)) + orig = Dataset({ + 'lon1': (['x1', 'y1'], zeros1.squeeze(0), {}), + 'lon2': (['x2', 'y1'], zeros2.squeeze(0), {}), + 'lon3': (['x1', 'y2'], zeros3.squeeze(0), {}), + 'lat1': (['x1', 'y1'], zeros1.squeeze(0), {}), + 'lat2': (['x2', 'y1'], zeros2.squeeze(0), {}), + 'lat3': (['x1', 'y2'], zeros3.squeeze(0), {}), + 'foo1': (['time', 'x1', 'y1'], zeros1, + {'coordinates': 'lon1 lat1'}), + 'foo2': (['time', 'x2', 'y1'], zeros2, + {'coordinates': 'lon2 lat2'}), + 'foo3': (['time', 'x1', 'y2'], zeros3, + {'coordinates': 'lon3 lat3'}), + 'time': ('time', [0.], {'units': 'hours since 2017-01-01'}), + }) + orig = conventions.decode_cf(orig) + # Encode the coordinates, as they would be in a netCDF output file. + enc, attrs = conventions.encode_dataset_coordinates(orig) + # Make sure we have the right coordinates for each variable. + foo1_coords = enc['foo1'].attrs.get('coordinates', '') + foo2_coords = enc['foo2'].attrs.get('coordinates', '') + foo3_coords = enc['foo3'].attrs.get('coordinates', '') + assert set(foo1_coords.split()) == set(['lat1', 'lon1']) + assert set(foo2_coords.split()) == set(['lat2', 'lon2']) + assert set(foo3_coords.split()) == set(['lat3', 'lon3']) + # Should not have any global coordinates. + assert 'coordinates' not in attrs + + +@requires_netCDF4 +class TestDecodeCF(TestCase): + def test_dataset(self): + original = Dataset({ + 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), + 'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}), + 'y': ('t', [5, 10, -999], {'_FillValue': -999}) + }) + expected = Dataset({'foo': ('t', [0, 0, 0], {'units': 'bar'})}, + {'t': pd.date_range('2000-01-01', periods=3), + 'y': ('t', [5.0, 10.0, np.nan])}) + actual = conventions.decode_cf(original) + assert_identical(expected, actual) + + def test_invalid_coordinates(self): + # regression test for GH308 + original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})}) + actual = conventions.decode_cf(original) + assert_identical(original, actual) + + def test_decode_coordinates(self): + # regression test for GH610 + original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'x'}), + 'x': ('t', [4, 5])}) + actual = conventions.decode_cf(original) + assert actual.foo.encoding['coordinates'] == 'x' + + def test_0d_int32_encoding(self): + original = Variable((), np.int32(0), encoding={'dtype': 'int64'}) + expected = Variable((), np.int64(0)) + actual = conventions.maybe_encode_nonstring_dtype(original) + assert_identical(expected, actual) + + def test_decode_cf_with_multiple_missing_values(self): + original = Variable(['t'], [0, 1, 2], + {'missing_value': np.array([0, 1])}) + expected = Variable(['t'], [np.nan, np.nan, 2], {}) + with warnings.catch_warnings(record=True) as w: + actual = conventions.decode_cf_variable('t', original) + assert_identical(expected, actual) + assert 'has multiple fill' in str(w[0].message) + + def test_decode_cf_with_drop_variables(self): + original = Dataset({ + 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), + 'x': ("x", [9, 8, 7], {'units': 'km'}), + 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], + {'units': 'bar'}), + 'y': ('t', [5, 10, -999], {'_FillValue': -999}) + }) + expected = Dataset({ + 't': pd.date_range('2000-01-01', periods=3), + 'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], + {'units': 'bar'}), + 'y': ('t', [5, 10, np.nan]) + }) + actual = conventions.decode_cf(original, drop_variables=("x",)) + actual2 = conventions.decode_cf(original, drop_variables="x") + assert_identical(expected, actual) + assert_identical(expected, actual2) + + def test_invalid_time_units_raises_eagerly(self): + ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})}) + with raises_regex(ValueError, 'unable to decode time'): + decode_cf(ds) + + @requires_netCDF4 + def test_dataset_repr_with_netcdf4_datetimes(self): + # regression test for #347 + attrs = {'units': 'days since 0001-01-01', 'calendar': 'noleap'} + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'unable to decode time') + ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) + assert '(time) object' in repr(ds) + + attrs = {'units': 'days since 1900-01-01'} + ds = decode_cf(Dataset({'time': ('time', [0, 1], attrs)})) + assert '(time) datetime64[ns]' in repr(ds) + + @requires_netCDF4 + def test_decode_cf_datetime_transition_to_invalid(self): + # manually create dataset with not-decoded date + from datetime import datetime + ds = Dataset(coords={'time': [0, 266 * 365]}) + units = 'days since 2000-01-01 00:00:00' + ds.time.attrs = dict(units=units) + ds_decoded = conventions.decode_cf(ds) + + expected = [datetime(2000, 1, 1, 0, 0), + datetime(2265, 10, 28, 0, 0)] + + assert_array_equal(ds_decoded.time.values, expected) + + +class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore): + pass + + +class NullWrapper(utils.NDArrayMixin): + """ + Just for testing, this lets us create a numpy array directly + but make it look like its not in memory yet. + """ + + def __init__(self, array): + self.array = array + + def __getitem__(self, key): + return self.array[indexing.orthogonal_indexer(key, self.shape)] + + +def null_wrap(ds): + """ + Given a data store this wraps each variable in a NullWrapper so that + it appears to be out of memory. + """ + variables = dict((k, Variable(v.dims, NullWrapper(v.values), v.attrs)) + for k, v in iteritems(ds)) + return InMemoryDataStore(variables=variables, attributes=ds.attrs) + + +@requires_netCDF4 +class TestCFEncodedDataStore(CFEncodedDataTest, TestCase): + @contextlib.contextmanager + def create_store(self): + yield CFEncodedInMemoryStore() + + @contextlib.contextmanager + def roundtrip(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + store = CFEncodedInMemoryStore() + data.dump_to_store(store, **save_kwargs) + yield open_dataset(store, **open_kwargs) + + def test_roundtrip_coordinates(self): + raise unittest.SkipTest('cannot roundtrip coordinates yet for ' + 'CFEncodedInMemoryStore') + + def test_invalid_dataarray_names_raise(self): + pass + + def test_encoding_kwarg(self): + pass From b34879d270ace467eb758f33f4ded531543036e1 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 22 Jan 2018 11:55:16 -0500 Subject: [PATCH 13/58] Fix failing test in test_utils.py --- xarray/core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index bd398ad718d..8a2a34a8b0b 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -40,7 +40,7 @@ def wrapper(*args, **kwargs): def _maybe_cast_to_netcdftimeindex(index): try: from netcdftime._netcdftime import datetime as ncdatetime - from ..conventions.netcdftimeindex import NetCDFTimeIndex + from ..coding.netcdftimeindex import NetCDFTimeIndex if len(index): if isinstance(index[0], ncdatetime): index = NetCDFTimeIndex(index) From e93b62d35f683a688ac62ebb568243a01e243758 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 22 Jan 2018 13:21:49 -0500 Subject: [PATCH 14/58] flake8 --- xarray/coding/times.py | 4 ++-- xarray/core/accessors.py | 2 +- xarray/tests/test_backends.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 8c38ae28438..2904481ec5d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -17,7 +17,7 @@ # pandas < 0.20 from pandas.tslib import OutOfBoundsDatetime -from .variables import (SerializationWarning, VariableCoder, +from .variables import (VariableCoder, lazy_elemwise_func, pop_to, safe_setitem, unpack_for_decoding, unpack_for_encoding) from ..core import indexing @@ -222,7 +222,7 @@ def infer_datetime_units(dates): reference_date = dates[0] if len(dates) > 0 else '1970-01-01' units = _infer_time_units_from_diff(unique_timedeltas) return '%s since %s' % (units, reference_date) - + def infer_timedelta_units(deltas): """Given an array of timedeltas, returns a CF compatible time-unit from diff --git a/xarray/core/accessors.py b/xarray/core/accessors.py index e409834475e..71ade45f3b1 100644 --- a/xarray/core/accessors.py +++ b/xarray/core/accessors.py @@ -19,7 +19,7 @@ def _season_from_months(months): def _access_through_netcdftimeindex(values, name): - """Coerce an array of datetime-like values to a NetCDFTimeIndex + """Coerce an array of datetime-like values to a NetCDFTimeIndex and access requested datetime component """ from ..coding.netcdftimeindex import NetCDFTimeIndex diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 17ed119248b..0015891c554 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1859,7 +1859,7 @@ def test_roundtrip_netcdftime_datetime_data(self): expected_t = np.array(times) abs_diff = abs(actual.t.values - expected_t) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - + expected_t0 = np.array([date_type(1, 1, 1)]) abs_diff = abs(actual.t0.values - expected_t0) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) From 32d798688011e1429d634ad2d2c10a644a9e7b7e Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 1 Mar 2018 11:58:38 -0500 Subject: [PATCH 15/58] Update for standalone netcdftime --- xarray/coding/times.py | 10 +++++++ xarray/tests/test_backends.py | 29 +++++++++++++------ xarray/tests/test_coding_times.py | 42 +++++++++++++++++++--------- xarray/tests/test_netcdftimeindex.py | 4 +-- 4 files changed, 61 insertions(+), 24 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index cd603bbe612..f676ab990d3 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -236,10 +236,20 @@ def infer_datetime_units(dates): dates = np.asarray(dates).ravel() unique_timedeltas = np.unique(pd.to_timedelta(np.diff(dates))) reference_date = dates[0] if len(dates) > 0 else '1970-01-01' + reference_date = format_netcdftime_datetime(reference_date) units = _infer_time_units_from_diff(unique_timedeltas) return '%s since %s' % (units, reference_date) +def format_netcdftime_datetime(date): + """Converts a netcdftime.datetime object to a string with the format: + YYYY-MM-DD HH:MM:SS.UUUUUU + """ + return '{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d}'.format( + date.year, date.month, date.day, date.hour, date.minute, date.second, + date.microsecond) + + def infer_timedelta_units(deltas): """Given an array of timedeltas, returns a CF compatible time-unit from {'days', 'hours', 'minutes' 'seconds'} (the first one that can evenly diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 01a0ace899e..25cce35ab5b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -350,23 +350,32 @@ def test_roundtrip_datetime_data(self): assert actual.t0.encoding['units'] == 'days since 1950-01-01' def test_roundtrip_netcdftime_datetime_data(self): - from .test_coding_times import _non_standard_netcdftime_date_types - date_types = _non_standard_netcdftime_date_types() + from datetime import datetime + from .test_coding_times import _all_netcdftime_date_types + + date_types = _all_netcdftime_date_types() for date_type in date_types.values(): times = [date_type(1, 1, 1), date_type(1, 1, 2)] expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} + if times[0].calendar == 'gregorian': + # netcdftime.num2date decodes dates from the Gregorian calendar + # to datetime.datetime objects + expected_decoded_t = np.array( + [datetime(1, 1, 1), datetime(1, 1, 2)]) + expected_decoded_t0 = np.array([datetime(1, 1, 1)]) + else: + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1, 1, 1)]) with self.roundtrip(expected, save_kwargs=kwds) as actual: - expected_t = np.array(times) - abs_diff = abs(actual.t.values - expected_t) + abs_diff = abs(actual.t.values - expected_decoded_t) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) self.assertEquals(actual.t.encoding['units'], - 'days since 1-01-01 00:00:00') + 'days since 0001-01-01 00:00:00.000000') self.assertEquals(actual.t.encoding['calendar'], times[0].calendar) - expected_t0 = np.array([date_type(1, 1, 1)]) - abs_diff = abs(actual.t0.values - expected_t0) + abs_diff = abs(actual.t0.values - expected_decoded_t0) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) self.assertEquals(actual.t0.encoding['units'], @@ -1894,11 +1903,13 @@ def test_roundtrip_datetime_data(self): def test_roundtrip_netcdftime_datetime_data(self): # Override method in DatasetIOTestCases - remove not applicable # save_kwds - from .test_coding_times import _non_standard_netcdftime_date_types - date_types = _non_standard_netcdftime_date_types() + from .test_coding_times import _all_netcdftime_date_types + + date_types = _all_netcdftime_date_types() for date_type in date_types.values(): times = [date_type(1, 1, 1), date_type(1, 1, 2)] expected = Dataset({'t': ('t', times), 't0': times[0]}) + with self.roundtrip(expected) as actual: expected_t = np.array(times) abs_diff = abs(actual.t.values - expected_t) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 6c82d8db79a..d4e434c091c 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -376,19 +376,20 @@ def test_infer_datetime_units(self): def test_infer_netcdftime_datetime_units(self): date_types = _all_netcdftime_date_types() for date_type in date_types.values(): - for dates, expected in [([date_type(1900, 1, 1), - date_type(1900, 1, 2)], - 'days since 1900-01-01 00:00:00'), - ([date_type(1900, 1, 1, 12), - date_type(1900, 1, 1, 13)], - 'seconds since 1900-01-01 12:00:00'), - ([date_type(1900, 1, 1), - date_type(1900, 1, 2), - date_type(1900, 1, 2, 0, 0, 1)], - 'seconds since 1900-01-01 00:00:00'), - ([date_type(1900, 1, 1), - date_type(1900, 1, 2, 0, 0, 0, 5)], - 'days since 1900-01-01 00:00:00')]: + for dates, expected in [ + ([date_type(1900, 1, 1), + date_type(1900, 1, 2)], + 'days since 1900-01-01 00:00:00.000000'), + ([date_type(1900, 1, 1, 12), + date_type(1900, 1, 1, 13)], + 'seconds since 1900-01-01 12:00:00.000000'), + ([date_type(1900, 1, 1), + date_type(1900, 1, 2), + date_type(1900, 1, 2, 0, 0, 1)], + 'seconds since 1900-01-01 00:00:00.000000'), + ([date_type(1900, 1, 1), + date_type(1900, 1, 2, 0, 0, 0, 5)], + 'days since 1900-01-01 00:00:00.000000')]: assert expected == coding.times.infer_datetime_units(dates) def test_cf_timedelta(self): @@ -442,3 +443,18 @@ def test_infer_timedelta_units(self): (pd.to_timedelta(['1m', '2m', np.nan]), 'minutes'), (pd.to_timedelta(['1m3s', '1m4s']), 'seconds')]: assert expected == coding.times.infer_timedelta_units(deltas) + + +@pytest.mark.parametrize(['date_args', 'expected'], + [((1, 2, 3, 4, 5, 6), + '0001-02-03 04:05:06.000000'), + ((10, 2, 3, 4, 5, 6), + '0010-02-03 04:05:06.000000'), + ((100, 2, 3, 4, 5, 6), + '0100-02-03 04:05:06.000000'), + ((1000, 2, 3, 4, 5, 6), + '1000-02-03 04:05:06.000000')]) +def test_format_netcdftime_datetime(date_args, expected): + for date_type in _all_netcdftime_date_types().values(): + result = coding.times.format_netcdftime_datetime(date_type(*date_args)) + assert result == expected diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 1b5dc862cfc..ff2d763853e 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -10,8 +10,8 @@ from xarray.tests import assert_array_equal, assert_identical # Putting this at the module level for now, though technically we -# don't need netCDF4 to test the string parser. -pytest.importorskip('netCDF4') +# don't need netcdftime to test the string parser. +pytest.importorskip('netcdftime') def date_dict(year=None, month=None, day=None, From 985517688e1cadbc6026193a8d486a1fd57f691d Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 1 Mar 2018 12:05:02 -0500 Subject: [PATCH 16/58] Address stickler-ci comments --- xarray/coding/netcdftimeindex.py | 1 + xarray/tests/test_netcdftimeindex.py | 2 ++ xarray/tests/test_utils.py | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/coding/netcdftimeindex.py b/xarray/coding/netcdftimeindex.py index 7d76440296f..8e4f543f4a0 100644 --- a/xarray/coding/netcdftimeindex.py +++ b/xarray/coding/netcdftimeindex.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import re from datetime import timedelta diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index ff2d763853e..533fcf123ec 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import + import pytest import pandas as pd diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index aa8f789dc0f..ae658a4aa16 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -9,7 +9,7 @@ from xarray.core.pycompat import OrderedDict from .test_coding_times import _all_netcdftime_date_types from . import (TestCase, requires_dask, assert_array_equal, - requires_netCDF4) + requires_netcdftime) class TestAlias(TestCase): @@ -37,7 +37,7 @@ def test(self): assert_array_equal(expected, actual) assert expected.dtype == actual.dtype - @requires_netCDF4 + @requires_netcdftime def test_netcdftimeindex(self): date_types = _all_netcdftime_date_types() for date_type in date_types.values(): From 8d61fdb43caf500598aa7e0b5864622c93cde35b Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 1 Mar 2018 14:19:45 -0500 Subject: [PATCH 17/58] Skip test_format_netcdftime_datetime if netcdftime not installed --- xarray/tests/test_coding_times.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index d4e434c091c..3d5f43aa539 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -455,6 +455,7 @@ def test_infer_timedelta_units(self): ((1000, 2, 3, 4, 5, 6), '1000-02-03 04:05:06.000000')]) def test_format_netcdftime_datetime(date_args, expected): + pytest.importorskip('netcdftime') for date_type in _all_netcdftime_date_types().values(): result = coding.times.format_netcdftime_datetime(date_type(*date_args)) assert result == expected From 6b87da7beb0eac0c4109aaf4d4636a131cf8c522 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 9 Mar 2018 13:15:57 -0500 Subject: [PATCH 18/58] A start on documentation --- doc/time-series.rst | 88 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/doc/time-series.rst b/doc/time-series.rst index 4d9a995051a..cf4d89ade77 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -59,7 +59,10 @@ You can manual decode arrays in this form by passing a dataset to One unfortunate limitation of using ``datetime64[ns]`` is that it limits the native representation of dates to those that fall between the years 1678 and 2262. When a netCDF file contains dates outside of these bounds, dates will be -returned as arrays of ``netcdftime.datetime`` objects. +returned as arrays of ``netcdftime.datetime`` objects and a ``NetCDFTimeIndex`` +will be used for indexing. The ``NetCDFTimeIndex`` enables only a subset of +the indexing functionality of a ``pandas.DatetimeIndex``. See +:ref:`NetCDFTimeIndex` for more information. Datetime indexing ----------------- @@ -196,3 +199,86 @@ Dataset and DataArray objects with an arbitrary number of dimensions. For more examples of using grouped operations on a time dimension, see :ref:`toy weather data`. + + +.. _NetCDFTimeIndex: + +Non-standard calendars and dates outside the Timestamp-valid range +------------------------------------------------------------------ + +.. note:: + + In a change from prior behavior, as of version 0.??.0, if a dataset is + encoded using a non-standard calendar type it will always be read in using + the corresponding date type from ``netcdftime``. This is different from the + prior behavior where if the dates were within the Timestamp-valid range and + representable by standard datetimes (e.g. for a ``'noleap'`` calendar) they + would be decoded into standard datetimes indexed with a + ``pandas.DatetimeIndex``. + + As of version 0.??.0, a ``NetCDFTimeIndex`` will be used for time indexing + if any of the following are true: + + - The dates are from a non-standard calendar + - Any dates are outside the Timestamp-valid range + + Otherwise a ``pandas.DatetimeIndex`` will be used. + +Through the optional ``netcdftime`` library and a custom subclass of +``pandas.Index``, xarray supports a subset of the indexing functionality enabled +through the standard ``pandas.DatetimeIndex`` for dates from non-standard +calendars or dates using a standard calendar, but outside the +`Timestamp-valid range`_ (approximately between years 1678 and 2262). +For instance, you can create a DataArray indexed by a time +coordinate with a no-leap calendar and it will automatically be indexed using a +``NetCDFTimeIndex``. + +.. ipython:: python + + from netcdftime import DatetimeNoLeap + + dates = [DatetimeNoLeap((month - 1) // 12 + 1, + (month - 1) % 12 + 1, 1) for month in range(1, 25)] + da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], + name='foo') + +For data indexed by a ``NetCDFTimeIndex`` xarray currently supports `partial +datetime string indexing`_ using strictly `ISO8601-format`_ partial datetime +strings: + +.. ipython:: python + + da.sel(time='0001') + da.sel(time=slice('0001-05', '0002-02')) + +access of basic datetime components via the ``dt`` accessor (in this case just +"year", "month", "day", "hour", "minute", "second", "microsecond", and "season"): + +.. ipython:: python + + da.time.dt.year + da.time.dt.month + da.time.dt.season + +group-by operations based on datetime accessor attributes (e.g. by month of the +year): + +.. ipython:: python + + da.groupby('time.month').sum() + +and serialization: + +.. ipython:: python + + da.to_netcdf('example.nc') + xr.open_dataset('example.nc') + +.. note:: + + Currently resampling along the time dimension for data indexed by a + ``NetCDFTimeIndex`` is not supported. + +.. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#timestamp-limitations +.. _ISO8601-format: https://en.wikipedia.org/wiki/ISO_8601 +.. _partial datetime string indexing: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#partial-string-indexing From 3610e6e47965214eab0c26a1e271a9930e17e813 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 9 Mar 2018 14:15:57 -0500 Subject: [PATCH 19/58] Fix failing zarr tests related to netcdftime encoding --- xarray/backends/zarr.py | 3 ++- xarray/coding/times.py | 14 ++------------ xarray/core/common.py | 16 +++++++--------- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 8797e3104a1..b60505457cf 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -8,6 +8,7 @@ from .. import Variable, coding, conventions from ..core import indexing +from ..core.common import contains_netcdftime_datetimes from ..core.pycompat import OrderedDict, integer_types, iteritems from ..core.utils import FrozenOrderedDict, HiddenKeyDict from .common import AbstractWritableDataStore, ArrayWriter, BackendArray @@ -221,7 +222,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None): A variable which has been encoded as described above. """ - if var.dtype.kind == 'O': + if var.dtype.kind == 'O' and not contains_netcdftime_datetimes(var): raise NotImplementedError("Variable `%s` is an object. Zarr " "store can't yet encode objects." % name) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index f676ab990d3..69e6478e3ff 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd +from ..core.common import contains_netcdftime_datetimes from ..core import indexing from ..core.formatting import first_n_items, format_timestamp, last_item from ..core.pycompat import PY3 @@ -357,23 +358,12 @@ def encode_cf_timedelta(timedeltas, units=None): return (num, units) -def _contains_netcdftimes(data): - """Check if the first element of an array contains a - netcdftime.datetime object. - """ - try: - from netcdftime._netcdftime import datetime as ncdatetime - return isinstance(data.flatten()[0], ncdatetime) - except ImportError: - return False - - class CFDatetimeCoder(VariableCoder): def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) if (np.issubdtype(data.dtype, np.datetime64) or - _contains_netcdftimes(data)): + contains_netcdftime_datetimes(variable)): (data, units, calendar) = encode_cf_datetime( data, encoding.pop('units', None), diff --git a/xarray/core/common.py b/xarray/core/common.py index d6e4abe8e5d..4c3b3ebcaaf 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -832,18 +832,16 @@ def is_np_datetime_like(dtype): np.issubdtype(dtype, np.timedelta64)) -def _contains_netcdftime_datetimes(var): +def contains_netcdftime_datetimes(var): """Check if a variable contains netcdftime datetime objects""" - from netcdftime._netcdftime import datetime - return isinstance(var.data.ravel()[0], datetime) + try: + from netcdftime._netcdftime import datetime + return isinstance(var.data.flatten()[0], datetime) + except ImportError: + return False def _contains_datetime_like_objects(var): """Check if a variable contains datetime like objects (either np.datetime64, np.timedelta64, or netcdftime._netcdftime.datetime)""" - if is_np_datetime_like(var.dtype): - return True - try: - return _contains_netcdftime_datetimes(var) - except ImportError: - return False + return is_np_datetime_like(var.dtype) or contains_netcdftime_datetimes(var) From 8f69a9030dc6c0e4c25853be8b1168c82cd946bb Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 9 Mar 2018 14:38:40 -0500 Subject: [PATCH 20/58] Simplify test_decode_standard_calendar_single_element_non_ns_range --- xarray/tests/test_coding_times.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 3d5f43aa539..fe380103145 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -186,9 +186,7 @@ def test_decode_standard_calendar_single_element_ns_range(self): @requires_netcdftime def test_decode_standard_calendar_single_element_non_ns_range(self): - from datetime import datetime - from netcdftime import DatetimeGregorian - + nctime = _import_netcdftime() units = 'days since 0001-01-01' for days in [1, 1470376]: for calendar in coding.times._STANDARD_CALENDARS: @@ -198,15 +196,8 @@ def test_decode_standard_calendar_single_element_non_ns_range(self): 'Unable to decode time axis') actual = coding.times.decode_cf_datetime( num_time, units, calendar=calendar) - # Confusing, but this is how netCDF4.num2date behaves -- - # According to the documentation this is supposed to have - # something to do with whether the date falls before or - # after the breakpoint between the Julian - # and Gregorian calendars (1582-10-15). - if calendar == 'standard' and days == 1: - assert isinstance(actual.item(), DatetimeGregorian) - else: - assert isinstance(actual.item(), datetime) + expected = nctime.num2date(days, units, calendar) + assert isinstance(actual.item(), type(expected)) @requires_netcdftime def test_decode_non_standard_calendar_single_element(self): From cec909c5c75ea6927a8ec61bb1e9755f68067d2a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 10 Mar 2018 08:13:11 -0500 Subject: [PATCH 21/58] Address a couple review comments --- xarray/coding/netcdftimeindex.py | 12 ++++++------ xarray/core/common.py | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/xarray/coding/netcdftimeindex.py b/xarray/coding/netcdftimeindex.py index 8e4f543f4a0..a12876501cf 100644 --- a/xarray/coding/netcdftimeindex.py +++ b/xarray/coding/netcdftimeindex.py @@ -141,12 +141,6 @@ def assert_all_valid_date_type(data): class NetCDFTimeIndex(pd.Index): - def __new__(cls, data): - result = object.__new__(cls) - assert_all_valid_date_type(data) - result._data = np.array(data) - return result - year = _field_accessor('year', 'The year of the datetime') month = _field_accessor('month', 'The month of the datetime') day = _field_accessor('day', 'The days of the datetime') @@ -157,6 +151,12 @@ def __new__(cls, data): 'The microseconds of the datetime') date_type = property(get_date_type) + def __new__(cls, data): + result = object.__new__(cls) + assert_all_valid_date_type(data) + result._data = np.array(data) + return result + def _partial_date_slice(self, resolution, parsed): """Adapted from pandas.tseries.index.DatetimeIndex._partial_date_slice diff --git a/xarray/core/common.py b/xarray/core/common.py index 4c3b3ebcaaf..cfd3eb9bc27 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -836,9 +836,10 @@ def contains_netcdftime_datetimes(var): """Check if a variable contains netcdftime datetime objects""" try: from netcdftime._netcdftime import datetime - return isinstance(var.data.flatten()[0], datetime) except ImportError: return False + else: + return isinstance(var.data.flatten()[0], datetime) def _contains_datetime_like_objects(var): From 422792b6d89e99aa2fa8617b944503ffe13d41be Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 10 Mar 2018 09:07:37 -0500 Subject: [PATCH 22/58] Use else clause in _maybe_cast_to_netcdftimeindex --- xarray/core/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 4ffa8a678f7..86e8079604b 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -40,12 +40,13 @@ def _maybe_cast_to_netcdftimeindex(index): try: from netcdftime._netcdftime import datetime as ncdatetime from ..coding.netcdftimeindex import NetCDFTimeIndex + except ImportError: + return index + else: if len(index): if isinstance(index[0], ncdatetime): index = NetCDFTimeIndex(index) return index - except ImportError: - return index def safe_cast_to_index(array): From de74037c9f35cd18266e18c70fe335b757e94872 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 10 Mar 2018 13:29:50 -0500 Subject: [PATCH 23/58] Start on adding enable_netcdftimeindex option --- doc/time-series.rst | 5 +- xarray/coding/times.py | 49 +- xarray/core/options.py | 4 + xarray/core/utils.py | 20 +- xarray/tests/test_coding_times.py | 898 ++++++++++++++++++------------ xarray/tests/test_utils.py | 57 +- 6 files changed, 612 insertions(+), 421 deletions(-) diff --git a/doc/time-series.rst b/doc/time-series.rst index cf4d89ade77..b63532f772b 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -235,10 +235,11 @@ coordinate with a no-leap calendar and it will automatically be indexed using a .. ipython:: python + from itertools import product from netcdftime import DatetimeNoLeap - dates = [DatetimeNoLeap((month - 1) // 12 + 1, - (month - 1) % 12 + 1, 1) for month in range(1, 25)] + dates = [DatetimeNoLeap(year, month, 1) for year, month in + product(range(1, 3), range(1, 13))] da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 69e6478e3ff..4b4870e2ede 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -12,10 +12,11 @@ from ..core.common import contains_netcdftime_datetimes from ..core import indexing from ..core.formatting import first_n_items, format_timestamp, last_item +from ..core.options import OPTIONS from ..core.pycompat import PY3 from ..core.variable import Variable from .variables import ( - VariableCoder, lazy_elemwise_func, pop_to, + SerializationWarning, VariableCoder, lazy_elemwise_func, pop_to, safe_setitem, unpack_for_decoding, unpack_for_encoding) try: @@ -81,24 +82,36 @@ def _unpack_netcdf_time_units(units): def _decode_datetime_with_netcdftime(num_dates, units, calendar): nctime = _import_netcdftime() dates = np.asarray(nctime.num2date(num_dates, units, calendar)) - if calendar in _STANDARD_CALENDARS: - if (dates[np.nanargmin(num_dates)].year < 1678 or - dates[np.nanargmax(num_dates)].year >= 2262): - warnings.warn( - 'Unable to decode time axis into full ' - 'numpy.datetime64 objects, continuing using dummy ' - 'netCDF4.datetime objects instead, reason: dates out ' - 'of range', RuntimeWarning, stacklevel=3) - else: - dates = nctime_to_nptime(dates) + + if (dates[np.nanargmin(num_dates)].year < 1678 or + dates[np.nanargmax(num_dates)].year >= 2262): + warnings.warn( + 'Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using dummy ' + 'netCDF4.datetime objects instead, reason: dates out ' + 'of range', SerializationWarning, stacklevel=3) else: - warnings.warn('Unable to decode time axis into full numpy.datetime64 ' - 'objects, because dates are encoded using a ' - 'non-standard calendar ({}). Using netCDF4.datetime ' - 'objects instead. Time indexing will be done using a ' - 'NetCDFTimeIndex rather than ' - 'a DatetimeIndex'.format(calendar), - DeprecationWarning, stacklevel=3) + if OPTIONS['enable_netcdftimeindex']: + if calendar in _STANDARD_CALENDARS: + dates = nctime_to_nptime(dates) + else: + warnings.warn( + 'Unable to decode time axis into full numpy.datetime64 ' + 'objects, because dates are encoded using a ' + 'non-standard calendar ({}). Using netCDF4.datetime ' + 'objects instead. Time indexing will be done using a ' + 'NetCDFTimeIndex rather than ' + 'a DatetimeIndex'.format(calendar), + SerializationWarning, stacklevel=3) + else: + try: + dates = nctime_to_nptime(dates) + except ValueError as e: + warnings.warn( + 'Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using ' + 'dummy netcdftime.datetime objects instead, reason:' + '{0}'.format(e), SerializationWarning, stacklevel=3) return dates diff --git a/xarray/core/options.py b/xarray/core/options.py index b2968a2a02f..29a1dbe7f01 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -3,6 +3,7 @@ OPTIONS = { 'display_width': 80, 'arithmetic_join': 'inner', + 'enable_netcdftimeindex': False } @@ -15,6 +16,9 @@ class set_options(object): Default: ``80``. - ``arithmetic_join``: DataArray/Dataset alignment in binary operations. Default: ``'inner'``. + - ``enable_netcdftimeindex``: flag to enable using a ``NetCDFTimeIndex`` + for time indexes with non-standard calendars or dates outside the + Timestamp-valid range. Default: ``False``. You can use ``set_options`` either as a context manager: diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 86e8079604b..a94e96f5747 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -12,6 +12,7 @@ import numpy as np import pandas as pd +from .options import OPTIONS from .pycompat import ( OrderedDict, basestring, bytes_type, dask_array_type, iteritems) @@ -37,16 +38,19 @@ def wrapper(*args, **kwargs): def _maybe_cast_to_netcdftimeindex(index): - try: - from netcdftime._netcdftime import datetime as ncdatetime - from ..coding.netcdftimeindex import NetCDFTimeIndex - except ImportError: + if not OPTIONS['enable_netcdftimeindex']: return index else: - if len(index): - if isinstance(index[0], ncdatetime): - index = NetCDFTimeIndex(index) - return index + try: + from netcdftime._netcdftime import datetime as ncdatetime + from ..coding.netcdftimeindex import NetCDFTimeIndex + except ImportError: + return index + else: + if len(index): + if isinstance(index[0], ncdatetime): + index = NetCDFTimeIndex(index) + return index def safe_cast_to_index(array): diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index fe380103145..6e037fdf950 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1,15 +1,17 @@ from __future__ import absolute_import, division, print_function +from itertools import product import warnings import numpy as np import pandas as pd import pytest -from xarray import Variable, coding +from xarray import Variable, coding, set_options from xarray.coding.times import _import_netcdftime +from xarray.coding.variables import SerializationWarning -from . import TestCase, assert_array_equal, requires_netcdftime +from . import assert_array_equal, has_netcdftime, requires_netcdftime @np.vectorize @@ -45,395 +47,551 @@ def _all_netcdftime_date_types(): 'proleptic_gregorian': DatetimeProlepticGregorian} -class TestDatetime(TestCase): - @requires_netcdftime - def test_cf_datetime(self): - nctime = _import_netcdftime() - for num_dates, units in [ - (np.arange(10), 'days since 2000-01-01'), - (np.arange(10).astype('float64'), 'days since 2000-01-01'), - (np.arange(10).astype('float32'), 'days since 2000-01-01'), - (np.arange(10).reshape(2, 5), 'days since 2000-01-01'), - (12300 + np.arange(5), 'hours since 1680-01-01 00:00:00'), - # here we add a couple minor formatting errors to test - # the robustness of the parsing algorithm. - (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), - (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), - (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), - (10, 'days since 2000-01-01'), - ([10], 'daYs since 2000-01-01'), - ([[10]], 'days since 2000-01-01'), - ([10, 10], 'days since 2000-01-01'), - (np.array(10), 'days since 2000-01-01'), - (0, 'days since 1000-01-01'), - ([0], 'days since 1000-01-01'), - ([[0]], 'days since 1000-01-01'), - (np.arange(2), 'days since 1000-01-01'), - (np.arange(0, 100000, 20000), 'days since 1900-01-01'), - (17093352.0, 'hours since 1-1-1 00:00:0.0'), - ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), - (0, 'milliseconds since 2000-01-01T00:00:00'), - (0, 'microseconds since 2000-01-01T00:00:00'), - ]: - for calendar in ['standard', 'gregorian', 'proleptic_gregorian']: - expected = _ensure_naive_tz( - nctime.num2date(num_dates, units, calendar)) - print(num_dates, units, calendar) - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(num_dates, units, - calendar) - if (isinstance(actual, np.ndarray) and - np.issubdtype(actual.dtype, np.datetime64)): - # self.assertEqual(actual.dtype.kind, 'M') - # For some reason, numpy 1.8 does not compare ns precision - # datetime64 arrays as equal to arrays of datetime objects, - # but it works for us precision. Thus, convert to us - # precision for the actual array equal comparison... - actual_cmp = actual.astype('M8[us]') - else: - actual_cmp = actual - assert_array_equal(expected, actual_cmp) - encoded, _, _ = coding.times.encode_cf_datetime(actual, units, - calendar) - if '1-1-1' not in units: - # pandas parses this date very strangely, so the original - # units/encoding cannot be preserved in this case: - # (Pdb) pd.to_datetime('1-1-1 00:00:0.0') - # Timestamp('2001-01-01 00:00:00') +@requires_netcdftime +def test_cf_datetime(): + nctime = _import_netcdftime() + for num_dates, units in [ + (np.arange(10), 'days since 2000-01-01'), + (np.arange(10).astype('float64'), 'days since 2000-01-01'), + (np.arange(10).astype('float32'), 'days since 2000-01-01'), + (np.arange(10).reshape(2, 5), 'days since 2000-01-01'), + (12300 + np.arange(5), 'hours since 1680-01-01 00:00:00'), + # here we add a couple minor formatting errors to test + # the robustness of the parsing algorithm. + (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), + (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), + (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), + (10, 'days since 2000-01-01'), + ([10], 'daYs since 2000-01-01'), + ([[10]], 'days since 2000-01-01'), + ([10, 10], 'days since 2000-01-01'), + (np.array(10), 'days since 2000-01-01'), + (0, 'days since 1000-01-01'), + ([0], 'days since 1000-01-01'), + ([[0]], 'days since 1000-01-01'), + (np.arange(2), 'days since 1000-01-01'), + (np.arange(0, 100000, 20000), 'days since 1900-01-01'), + (17093352.0, 'hours since 1-1-1 00:00:0.0'), + ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), + (0, 'milliseconds since 2000-01-01T00:00:00'), + (0, 'microseconds since 2000-01-01T00:00:00'), + ]: + for calendar in ['standard', 'gregorian', 'proleptic_gregorian']: + expected = _ensure_naive_tz( + nctime.num2date(num_dates, units, calendar)) + print(num_dates, units, calendar) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime(num_dates, units, + calendar) + if (isinstance(actual, np.ndarray) and + np.issubdtype(actual.dtype, np.datetime64)): + # self.assertEqual(actual.dtype.kind, 'M') + # For some reason, numpy 1.8 does not compare ns precision + # datetime64 arrays as equal to arrays of datetime objects, + # but it works for us precision. Thus, convert to us + # precision for the actual array equal comparison... + actual_cmp = actual.astype('M8[us]') + else: + actual_cmp = actual + assert_array_equal(expected, actual_cmp) + encoded, _, _ = coding.times.encode_cf_datetime(actual, units, + calendar) + if '1-1-1' not in units: + # pandas parses this date very strangely, so the original + # units/encoding cannot be preserved in this case: + # (Pdb) pd.to_datetime('1-1-1 00:00:0.0') + # Timestamp('2001-01-01 00:00:00') + assert_array_equal(num_dates, np.around(encoded, 1)) + if (hasattr(num_dates, 'ndim') and num_dates.ndim == 1 and + '1000' not in units): + # verify that wrapping with a pandas.Index works + # note that it *does not* currently work to even put + # non-datetime64 compatible dates into a pandas.Index + encoded, _, _ = coding.times.encode_cf_datetime( + pd.Index(actual), units, calendar) assert_array_equal(num_dates, np.around(encoded, 1)) - if (hasattr(num_dates, 'ndim') and num_dates.ndim == 1 and - '1000' not in units): - # verify that wrapping with a pandas.Index works - # note that it *does not* currently work to even put - # non-datetime64 compatible dates into a pandas.Index - encoded, _, _ = coding.times.encode_cf_datetime( - pd.Index(actual), units, calendar) - assert_array_equal(num_dates, np.around(encoded, 1)) - - @requires_netcdftime - def test_decode_cf_datetime_overflow(self): - # checks for - # https://github.com/pydata/pandas/issues/14068 - # https://github.com/pydata/xarray/issues/975 - - from datetime import datetime - units = 'days since 2000-01-01 00:00:00' - - # date after 2262 and before 1678 - days = (-117608, 95795) - expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) - - for i, day in enumerate(days): - result = coding.times.decode_cf_datetime(day, units) - assert result == expected[i] - - def test_decode_cf_datetime_non_standard_units(self): - expected = pd.date_range(periods=100, start='1970-01-01', freq='h') - # netCDFs from madis.noaa.gov use this format for their time units - # they cannot be parsed by netcdftime, but pd.Timestamp works - units = 'hours since 1-1-1970' - actual = coding.times.decode_cf_datetime(np.arange(100), units) + + +@requires_netcdftime +def test_decode_cf_datetime_overflow(): + # checks for + # https://github.com/pydata/pandas/issues/14068 + # https://github.com/pydata/xarray/issues/975 + + from datetime import datetime + units = 'days since 2000-01-01 00:00:00' + + # date after 2262 and before 1678 + days = (-117608, 95795) + expected = (datetime(1677, 12, 31), datetime(2262, 4, 12)) + + for i, day in enumerate(days): + result = coding.times.decode_cf_datetime(day, units) + assert result == expected[i] + + +def test_decode_cf_datetime_non_standard_units(): + expected = pd.date_range(periods=100, start='1970-01-01', freq='h') + # netCDFs from madis.noaa.gov use this format for their time units + # they cannot be parsed by netcdftime, but pd.Timestamp works + units = 'hours since 1-1-1970' + actual = coding.times.decode_cf_datetime(np.arange(100), units) + assert_array_equal(actual, expected) + + +@requires_netcdftime +def test_decode_cf_datetime_non_iso_strings(): + # datetime strings that are _almost_ ISO compliant but not quite, + # but which netCDF4.num2date can still parse correctly + expected = pd.date_range(periods=100, start='2000-01-01', freq='h') + cases = [(np.arange(100), 'hours since 2000-01-01 0'), + (np.arange(100), 'hours since 2000-1-1 0'), + (np.arange(100), 'hours since 2000-01-01 0:00')] + for num_dates, units in cases: + actual = coding.times.decode_cf_datetime(num_dates, units) assert_array_equal(actual, expected) - @requires_netcdftime - def test_decode_cf_datetime_non_iso_strings(self): - # datetime strings that are _almost_ ISO compliant but not quite, - # but which netCDF4.num2date can still parse correctly - expected = pd.date_range(periods=100, start='2000-01-01', freq='h') - cases = [(np.arange(100), 'hours since 2000-01-01 0'), - (np.arange(100), 'hours since 2000-1-1 0'), - (np.arange(100), 'hours since 2000-01-01 0:00')] - for num_dates, units in cases: - actual = coding.times.decode_cf_datetime(num_dates, units) - assert_array_equal(actual, expected) - - @requires_netcdftime - def test_decode_non_standard_calendar(self): - from datetime import datetime - nctime = _import_netcdftime() - - date_types = _non_standard_netcdftime_date_types() - for calendar, date_type in date_types.items(): - units = 'days since 0001-01-01' - times = [datetime(1, 4, 1, h) for h in range(1, 5)] - noleap_time = nctime.date2num(times, units, calendar=calendar) - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(noleap_time, units, - calendar=calendar) - assert all(isinstance(value, date_type) for value in actual) - expected = np.array( - [date_type(1, 4, 1, h) for h in range(1, 5)]) - abs_diff = abs(actual - expected) - # once we no longer support versions of netCDF4 older than 1.1.5, - # we could do this check with near microsecond accuracy: - # https://github.com/Unidata/netcdf4-python/issues/355 - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - @requires_netcdftime - def test_decode_standard_calendar_single_element_ns_range(self): - units = 'days since 0001-01-01' - for calendar in coding.times._STANDARD_CALENDARS: - for num_time in [735368, [735368], [[735368]]]: - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(num_time, units, - calendar=calendar) - assert actual.dtype == np.dtype('M8[ns]') - - @requires_netcdftime - def test_decode_standard_calendar_single_element_non_ns_range(self): - nctime = _import_netcdftime() - units = 'days since 0001-01-01' - for days in [1, 1470376]: - for calendar in coding.times._STANDARD_CALENDARS: - for num_time in [days, [days], [[days]]]: - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime( - num_time, units, calendar=calendar) - expected = nctime.num2date(days, units, calendar) - assert isinstance(actual.item(), type(expected)) - - @requires_netcdftime - def test_decode_non_standard_calendar_single_element(self): - date_types = _non_standard_netcdftime_date_types() - units = 'days since 0001-01-01' - for days in [1, 735368]: - for calendar, date_type in date_types.items(): - for num_time in [days, [days], [[days]]]: - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime( - num_time, units, calendar=calendar) - assert isinstance(actual.item(), date_type) - - @requires_netcdftime - def test_decode_standard_calendar_multidim_time(self): - nctime = _import_netcdftime() - - for calendar in coding.times._STANDARD_CALENDARS: - units = 'days since 0001-01-01' - times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') - times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') - noleap_time1 = nctime.date2num(times1.to_pydatetime(), - units, calendar=calendar) - noleap_time2 = nctime.date2num(times2.to_pydatetime(), - units, calendar=calendar) - mdim_time = np.empty((len(noleap_time1), 2), ) - mdim_time[:, 0] = noleap_time1 - mdim_time[:, 1] = noleap_time2 - - expected1 = times1.values - expected2 = times2.values - actual = coding.times.decode_cf_datetime(mdim_time, units, + +_NON_STANDARD_CALENDARS = ['noleap', '365_day', '360_day', + 'julian', 'all_leap', '366_day'] +_ALL_CALENDARS = _NON_STANDARD_CALENDARS + list( + coding.times._STANDARD_CALENDARS) + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(coding.times._STANDARD_CALENDARS, [False, True])) +def test_decode_standard_calendar_inside_timestamp_range( + calendar, enable_netcdftimeindex): + nctime = _import_netcdftime() + units = 'days since 0001-01-01' + times = pd.date_range('2001-04-01-00', end='2001-04-30-23', + freq='H') + noleap_time = nctime.date2num(times.to_pydatetime(), units, + calendar=calendar) + expected = times.values + expected_dtype = np.dtype('M8[ns]') + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(noleap_time, units, calendar=calendar) - assert actual.dtype == np.dtype('M8[ns]') + assert actual.dtype == expected_dtype + abs_diff = abs(actual - expected) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(_NON_STANDARD_CALENDARS, [False, True])) +def test_decode_non_standard_calendar_inside_timestamp_range( + calendar, enable_netcdftimeindex): + nctime = _import_netcdftime() + units = 'days since 0001-01-01' + times = pd.date_range('2001-04-01-00', end='2001-04-30-23', + freq='H') + noleap_time = nctime.date2num(times.to_pydatetime(), units, + calendar=calendar) + if enable_netcdftimeindex: + expected = nctime.num2date(noleap_time, units, calendar=calendar) + expected_dtype = np.dtype('O') + else: + expected = times.values + expected_dtype = np.dtype('M8[ns]') - abs_diff1 = abs(actual[:, 0] - expected1) - abs_diff2 = abs(actual[:, 1] - expected2) - # once we no longer support versions of netCDF4 older than 1.1.5, - # we could do this check with near microsecond accuracy: - # https://github.com/Unidata/netcdf4-python/issues/355 - self.assertTrue((abs_diff1 <= np.timedelta64(1, 's')).all()) - self.assertTrue((abs_diff2 <= np.timedelta64(1, 's')).all()) - - @requires_netcdftime - def test_decode_non_standard_calendar_multidim_time(self): - from datetime import datetime - nctime = _import_netcdftime() - - date_types = _non_standard_netcdftime_date_types() - for calendar, date_type in date_types.items(): - units = 'days since 0001-01-01' - times1 = [datetime(1, 4, day) for day in range(1, 6)] - times2 = [datetime(1, 5, day) for day in range(1, 6)] - noleap_time1 = nctime.date2num(times1, units, calendar=calendar) - noleap_time2 = nctime.date2num(times2, units, calendar=calendar) - mdim_time = np.empty((len(noleap_time1), 2), ) - mdim_time[:, 0] = noleap_time1 - mdim_time[:, 1] = noleap_time2 - - expected1 = np.array( - [date_type(1, 4, day) for day in range(1, 6)]) - expected2 = np.array( - [date_type(1, 5, day) for day in range(1, 6)]) - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(mdim_time, units, + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(noleap_time, units, + calendar=calendar) + assert actual.dtype == expected_dtype + abs_diff = abs(actual - expected) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(_ALL_CALENDARS, [False, True])) +def test_decode_dates_outside_timestamp_range( + calendar, enable_netcdftimeindex): + from datetime import datetime + nctime = _import_netcdftime() + + units = 'days since 0001-01-01' + times = [datetime(1, 4, 1, h) for h in range(1, 5)] + noleap_time = nctime.date2num(times, units, calendar=calendar) + expected = nctime.num2date(noleap_time, units, calendar=calendar) + expected_date_type = type(expected[0]) + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(noleap_time, units, + calendar=calendar) + assert all(isinstance(value, expected_date_type) for value in actual) + abs_diff = abs(actual - expected) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(coding.times._STANDARD_CALENDARS, [False, True])) +def test_decode_standard_calendar_single_element_inside_timestamp_range( + calendar, enable_netcdftimeindex): + units = 'days since 0001-01-01' + for num_time in [735368, [735368], [[735368]]]: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar) - assert actual.dtype == np.dtype('O') - - abs_diff1 = abs(actual[:, 0] - expected1) - abs_diff2 = abs(actual[:, 1] - expected2) - # once we no longer support versions of netCDF4 older than 1.1.5, - # we could do this check with near microsecond accuracy: - # https://github.com/Unidata/netcdf4-python/issues/355 - self.assertTrue((abs_diff1 <= np.timedelta64(1, 's')).all()) - self.assertTrue((abs_diff2 <= np.timedelta64(1, 's')).all()) - - @requires_netcdftime - def test_decode_non_standard_calendar_single_element_fallback(self): - nctime = _import_netcdftime() - - units = 'days since 0001-01-01' - try: - dt = nctime.netcdftime.datetime(2001, 2, 29) - except AttributeError: - # Must be using standalone netcdftime library - dt = nctime.datetime(2001, 2, 29) - for calendar in ['360_day', 'all_leap', '366_day']: - num_time = nctime.date2num(dt, units, calendar) - with pytest.warns(DeprecationWarning, - match='Unable to decode time axis'): + assert actual.dtype == np.dtype('M8[ns]') + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(_NON_STANDARD_CALENDARS, [False, True])) +def test_decode_non_standard_calendar_single_element_inside_timestamp_range( + calendar, enable_netcdftimeindex): + units = 'days since 0001-01-01' + for num_time in [735368, [735368], [[735368]]]: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): actual = coding.times.decode_cf_datetime(num_time, units, calendar=calendar) - expected = np.asarray(nctime.num2date(num_time, units, calendar)) + if enable_netcdftimeindex: assert actual.dtype == np.dtype('O') - assert expected == actual - - @requires_netcdftime - def test_decode_non_standard_calendar_fallback(self): - nctime = _import_netcdftime() - # ensure leap year doesn't matter - for year in [2010, 2011, 2012, 2013, 2014]: - for calendar in ['360_day', '366_day', 'all_leap']: - calendar = '360_day' - units = 'days since {0}-01-01'.format(year) - num_times = np.arange(100) - expected = nctime.num2date(num_times, units, calendar) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter('always') - actual = coding.times.decode_cf_datetime(num_times, units, - calendar=calendar) - assert len(w) == 1 - assert 'Unable to decode time axis' in \ - str(w[0].message) - - assert actual.dtype == np.dtype('O') - assert_array_equal(actual, expected) - - @requires_netcdftime - def test_cf_datetime_nan(self): - for num_dates, units, expected_list in [ - ([np.nan], 'days since 2000-01-01', ['NaT']), - ([np.nan, 0], 'days since 2000-01-01', - ['NaT', '2000-01-01T00:00:00Z']), - ([np.nan, 0, 1], 'days since 2000-01-01', - ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z']), - ]: + else: + assert actual.dtype == np.dtype('M8[ns]') + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(_NON_STANDARD_CALENDARS, [False, True])) +def test_decode_single_element_outside_timestamp_range( + calendar, enable_netcdftimeindex): + nctime = _import_netcdftime() + units = 'days since 0001-01-01' + for days in [1, 1470376]: + for num_time in [days, [days], [[days]]]: with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') - actual = coding.times.decode_cf_datetime(num_dates, units) - expected = np.array(expected_list, dtype='datetime64[ns]') - assert_array_equal(expected, actual) + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + with set_options( + enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar) + expected = nctime.num2date(days, units, calendar) + assert isinstance(actual.item(), type(expected)) + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(coding.times._STANDARD_CALENDARS, [False, True])) +def test_decode_standard_calendar_multidim_time_inside_timestamp_range( + calendar, enable_netcdftimeindex): + nctime = _import_netcdftime() + + units = 'days since 0001-01-01' + times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') + times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') + noleap_time1 = nctime.date2num(times1.to_pydatetime(), + units, calendar=calendar) + noleap_time2 = nctime.date2num(times2.to_pydatetime(), + units, calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + expected1 = times1.values + expected2 = times2.values + + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(mdim_time, units, + calendar=calendar) + assert actual.dtype == np.dtype('M8[ns]') + + abs_diff1 = abs(actual[:, 0] - expected1) + abs_diff2 = abs(actual[:, 1] - expected2) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff1 <= np.timedelta64(1, 's')).all() + assert (abs_diff2 <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(_NON_STANDARD_CALENDARS, [False, True])) +def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( + calendar, enable_netcdftimeindex): + nctime = _import_netcdftime() + + units = 'days since 0001-01-01' + times1 = pd.date_range('2001-04-01', end='2001-04-05', freq='D') + times2 = pd.date_range('2001-05-01', end='2001-05-05', freq='D') + noleap_time1 = nctime.date2num(times1.to_pydatetime(), + units, calendar=calendar) + noleap_time2 = nctime.date2num(times2.to_pydatetime(), + units, calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + if enable_netcdftimeindex: + expected1 = nctime.num2date(noleap_time1, units, calendar) + expected2 = nctime.num2date(noleap_time2, units, calendar) + expected_dtype = np.dtype('O') + else: + expected1 = times1.values + expected2 = times2.values + expected_dtype = np.dtype('M8[ns]') + + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(mdim_time, units, + calendar=calendar) + assert actual.dtype == expected_dtype + abs_diff1 = abs(actual[:, 0] - expected1) + abs_diff2 = abs(actual[:, 1] - expected2) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff1 <= np.timedelta64(1, 's')).all() + assert (abs_diff2 <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(_ALL_CALENDARS, [False, True])) +def test_decode_multidim_time_outside_timestamp_range( + calendar, enable_netcdftimeindex): + from datetime import datetime + nctime = _import_netcdftime() + + units = 'days since 0001-01-01' + times1 = [datetime(1, 4, day) for day in range(1, 6)] + times2 = [datetime(1, 5, day) for day in range(1, 6)] + noleap_time1 = nctime.date2num(times1, units, calendar=calendar) + noleap_time2 = nctime.date2num(times2, units, calendar=calendar) + mdim_time = np.empty((len(noleap_time1), 2), ) + mdim_time[:, 0] = noleap_time1 + mdim_time[:, 1] = noleap_time2 + + expected1 = nctime.num2date(noleap_time1, units, calendar) + expected2 = nctime.num2date(noleap_time2, units, calendar) + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'Unable to decode time axis') + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(mdim_time, units, + calendar=calendar) + assert actual.dtype == np.dtype('O') + + abs_diff1 = abs(actual[:, 0] - expected1) + abs_diff2 = abs(actual[:, 1] - expected2) + # once we no longer support versions of netCDF4 older than 1.1.5, + # we could do this check with near microsecond accuracy: + # https://github.com/Unidata/netcdf4-python/issues/355 + assert (abs_diff1 <= np.timedelta64(1, 's')).all() + assert (abs_diff2 <= np.timedelta64(1, 's')).all() + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(['360_day', 'all_leap', '366_day'], [False, True])) +def test_decode_non_standard_calendar_single_element_fallback( + calendar, enable_netcdftimeindex): + nctime = _import_netcdftime() + + units = 'days since 0001-01-01' + try: + dt = nctime.netcdftime.datetime(2001, 2, 29) + except AttributeError: + # Must be using standalone netcdftime library + dt = nctime.datetime(2001, 2, 29) + + num_time = nctime.date2num(dt, units, calendar) + with pytest.warns(SerializationWarning, + match='Unable to decode time axis'): + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(num_time, units, + calendar=calendar) + expected = np.asarray(nctime.num2date(num_time, units, calendar)) + assert actual.dtype == np.dtype('O') + assert expected == actual + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(['360_day'], [False, True])) +def test_decode_non_standard_calendar_fallback( + calendar, enable_netcdftimeindex): + nctime = _import_netcdftime() + # ensure leap year doesn't matter + for year in [2010, 2011, 2012, 2013, 2014]: + units = 'days since {0}-01-01'.format(year) + num_times = np.arange(100) + expected = nctime.num2date(num_times, units, calendar) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = coding.times.decode_cf_datetime(num_times, units, + calendar=calendar) + assert len(w) == 1 + assert 'Unable to decode time axis' in str(w[0].message) + + assert actual.dtype == np.dtype('O') + assert_array_equal(actual, expected) + - @requires_netcdftime - def test_decoded_cf_datetime_array_2d(self): - # regression test for GH1229 - variable = Variable(('x', 'y'), np.array([[0, 1], [2, 3]]), - {'units': 'days since 2000-01-01'}) - result = coding.times.CFDatetimeCoder().decode(variable) - assert result.dtype == 'datetime64[ns]' - expected = pd.date_range('2000-01-01', periods=4).values.reshape(2, 2) - assert_array_equal(np.asarray(result), expected) - - def test_infer_datetime_units(self): - for dates, expected in [(pd.date_range('1900-01-01', periods=5), - 'days since 1900-01-01 00:00:00'), - (pd.date_range('1900-01-01 12:00:00', freq='H', - periods=2), - 'hours since 1900-01-01 12:00:00'), - (pd.to_datetime( - ['1900-01-01', '1900-01-02', 'NaT']), - 'days since 1900-01-01 00:00:00'), - (pd.to_datetime(['1900-01-01', - '1900-01-02T00:00:00.005']), - 'seconds since 1900-01-01 00:00:00'), - (pd.to_datetime(['NaT', '1900-01-01']), - 'days since 1900-01-01 00:00:00'), - (pd.to_datetime(['NaT']), - 'days since 1970-01-01 00:00:00'), - ]: +@requires_netcdftime +def test_cf_datetime_nan(): + for num_dates, units, expected_list in [ + ([np.nan], 'days since 2000-01-01', ['NaT']), + ([np.nan, 0], 'days since 2000-01-01', + ['NaT', '2000-01-01T00:00:00Z']), + ([np.nan, 0, 1], 'days since 2000-01-01', + ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z']), + ]: + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN') + actual = coding.times.decode_cf_datetime(num_dates, units) + expected = np.array(expected_list, dtype='datetime64[ns]') + assert_array_equal(expected, actual) + + +@requires_netcdftime +def test_decoded_cf_datetime_array_2d(): + # regression test for GH1229 + variable = Variable(('x', 'y'), np.array([[0, 1], [2, 3]]), + {'units': 'days since 2000-01-01'}) + result = coding.times.CFDatetimeCoder().decode(variable) + assert result.dtype == 'datetime64[ns]' + expected = pd.date_range('2000-01-01', periods=4).values.reshape(2, 2) + assert_array_equal(np.asarray(result), expected) + + +def test_infer_datetime_units(): + for dates, expected in [(pd.date_range('1900-01-01', periods=5), + 'days since 1900-01-01 00:00:00'), + (pd.date_range('1900-01-01 12:00:00', freq='H', + periods=2), + 'hours since 1900-01-01 12:00:00'), + (pd.to_datetime( + ['1900-01-01', '1900-01-02', 'NaT']), + 'days since 1900-01-01 00:00:00'), + (pd.to_datetime(['1900-01-01', + '1900-01-02T00:00:00.005']), + 'seconds since 1900-01-01 00:00:00'), + (pd.to_datetime(['NaT', '1900-01-01']), + 'days since 1900-01-01 00:00:00'), + (pd.to_datetime(['NaT']), + 'days since 1970-01-01 00:00:00'), + ]: + assert expected == coding.times.infer_datetime_units(dates) + + +@requires_netcdftime +def test_infer_netcdftime_datetime_units(): + date_types = _all_netcdftime_date_types() + for date_type in date_types.values(): + for dates, expected in [ + ([date_type(1900, 1, 1), + date_type(1900, 1, 2)], + 'days since 1900-01-01 00:00:00.000000'), + ([date_type(1900, 1, 1, 12), + date_type(1900, 1, 1, 13)], + 'seconds since 1900-01-01 12:00:00.000000'), + ([date_type(1900, 1, 1), + date_type(1900, 1, 2), + date_type(1900, 1, 2, 0, 0, 1)], + 'seconds since 1900-01-01 00:00:00.000000'), + ([date_type(1900, 1, 1), + date_type(1900, 1, 2, 0, 0, 0, 5)], + 'days since 1900-01-01 00:00:00.000000')]: assert expected == coding.times.infer_datetime_units(dates) - @requires_netcdftime - def test_infer_netcdftime_datetime_units(self): - date_types = _all_netcdftime_date_types() - for date_type in date_types.values(): - for dates, expected in [ - ([date_type(1900, 1, 1), - date_type(1900, 1, 2)], - 'days since 1900-01-01 00:00:00.000000'), - ([date_type(1900, 1, 1, 12), - date_type(1900, 1, 1, 13)], - 'seconds since 1900-01-01 12:00:00.000000'), - ([date_type(1900, 1, 1), - date_type(1900, 1, 2), - date_type(1900, 1, 2, 0, 0, 1)], - 'seconds since 1900-01-01 00:00:00.000000'), - ([date_type(1900, 1, 1), - date_type(1900, 1, 2, 0, 0, 0, 5)], - 'days since 1900-01-01 00:00:00.000000')]: - assert expected == coding.times.infer_datetime_units(dates) - - def test_cf_timedelta(self): - examples = [ - ('1D', 'days', np.int64(1)), - (['1D', '2D', '3D'], 'days', np.array([1, 2, 3], 'int64')), - ('1h', 'hours', np.int64(1)), - ('1ms', 'milliseconds', np.int64(1)), - ('1us', 'microseconds', np.int64(1)), - (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), - (['30m', '60m'], 'hours', [0.5, 1.0]), - (np.timedelta64('NaT', 'ns'), 'days', np.nan), - (['NaT', 'NaT'], 'days', [np.nan, np.nan]), - ] - - for timedeltas, units, numbers in examples: - timedeltas = pd.to_timedelta(timedeltas, box=False) - numbers = np.array(numbers) - - expected = numbers - actual, _ = coding.times.encode_cf_timedelta(timedeltas, units) + +def test_cf_timedelta(): + examples = [ + ('1D', 'days', np.int64(1)), + (['1D', '2D', '3D'], 'days', np.array([1, 2, 3], 'int64')), + ('1h', 'hours', np.int64(1)), + ('1ms', 'milliseconds', np.int64(1)), + ('1us', 'microseconds', np.int64(1)), + (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), + (['30m', '60m'], 'hours', [0.5, 1.0]), + (np.timedelta64('NaT', 'ns'), 'days', np.nan), + (['NaT', 'NaT'], 'days', [np.nan, np.nan]), + ] + + for timedeltas, units, numbers in examples: + timedeltas = pd.to_timedelta(timedeltas, box=False) + numbers = np.array(numbers) + + expected = numbers + actual, _ = coding.times.encode_cf_timedelta(timedeltas, units) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + if units is not None: + expected = timedeltas + actual = coding.times.decode_cf_timedelta(numbers, units) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype - if units is not None: - expected = timedeltas - actual = coding.times.decode_cf_timedelta(numbers, units) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype + expected = np.timedelta64('NaT', 'ns') + actual = coding.times.decode_cf_timedelta(np.array(np.nan), 'days') + assert_array_equal(expected, actual) - expected = np.timedelta64('NaT', 'ns') - actual = coding.times.decode_cf_timedelta(np.array(np.nan), 'days') - assert_array_equal(expected, actual) - def test_cf_timedelta_2d(self): - timedeltas = ['1D', '2D', '3D'] - units = 'days' - numbers = np.atleast_2d([1, 2, 3]) +def test_cf_timedelta_2d(): + timedeltas = ['1D', '2D', '3D'] + units = 'days' + numbers = np.atleast_2d([1, 2, 3]) - timedeltas = np.atleast_2d(pd.to_timedelta(timedeltas, box=False)) - expected = timedeltas + timedeltas = np.atleast_2d(pd.to_timedelta(timedeltas, box=False)) + expected = timedeltas + + actual = coding.times.decode_cf_timedelta(numbers, units) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype - actual = coding.times.decode_cf_timedelta(numbers, units) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype - def test_infer_timedelta_units(self): - for deltas, expected in [ - (pd.to_timedelta(['1 day', '2 days']), 'days'), - (pd.to_timedelta(['1h', '1 day 1 hour']), 'hours'), - (pd.to_timedelta(['1m', '2m', np.nan]), 'minutes'), - (pd.to_timedelta(['1m3s', '1m4s']), 'seconds')]: - assert expected == coding.times.infer_timedelta_units(deltas) +def test_infer_timedelta_units(): + for deltas, expected in [ + (pd.to_timedelta(['1 day', '2 days']), 'days'), + (pd.to_timedelta(['1h', '1 day 1 hour']), 'hours'), + (pd.to_timedelta(['1m', '2m', np.nan]), 'minutes'), + (pd.to_timedelta(['1m3s', '1m4s']), 'seconds')]: + assert expected == coding.times.infer_timedelta_units(deltas) @pytest.mark.parametrize(['date_args', 'expected'], diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index ae658a4aa16..8186ce400fc 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -6,10 +6,11 @@ from xarray.coding.netcdftimeindex import NetCDFTimeIndex from xarray.core import duck_array_ops, utils +from xarray.core.options import set_options from xarray.core.pycompat import OrderedDict from .test_coding_times import _all_netcdftime_date_types from . import (TestCase, requires_dask, assert_array_equal, - requires_netcdftime) + has_netcdftime) class TestAlias(TestCase): @@ -22,31 +23,41 @@ def new_method(): old_method() -class TestSafeCastToIndex(TestCase): - def test(self): - dates = pd.date_range('2000-01-01', periods=10) - x = np.arange(5) - td = x * np.timedelta64(1, 'D') - for expected, array in [ - (dates, dates.values), - (pd.Index(x, dtype=object), x.astype(object)), - (pd.Index(td), td), - (pd.Index(td, dtype=object), td.astype(object)), - ]: - actual = utils.safe_cast_to_index(array) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype - - @requires_netcdftime - def test_netcdftimeindex(self): - date_types = _all_netcdftime_date_types() - for date_type in date_types.values(): - dates = [date_type(1, 1, day) for day in range(1, 20)] +def test_safe_cast_to_index(): + dates = pd.date_range('2000-01-01', periods=10) + x = np.arange(5) + td = x * np.timedelta64(1, 'D') + for expected, array in [ + (dates, dates.values), + (pd.Index(x, dtype=object), x.astype(object)), + (pd.Index(td), td), + (pd.Index(td, dtype=object), td.astype(object)), + ]: + actual = utils.safe_cast_to_index(array) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize('enable_netcdftimeindex', [False, True]) +def test_safe_cast_to_index_netcdftimeindex(enable_netcdftimeindex): + date_types = _all_netcdftime_date_types() + for date_type in date_types.values(): + dates = [date_type(1, 1, day) for day in range(1, 20)] + if enable_netcdftimeindex: expected = NetCDFTimeIndex(dates) + else: + expected = pd.Index(dates) + + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): actual = utils.safe_cast_to_index(np.array(dates)) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + if enable_netcdftimeindex: assert isinstance(actual, NetCDFTimeIndex) + else: + assert isinstance(actual, pd.Index) def test_multiindex_from_product_levels(): From 2993e3c3089c59754da2eed8552bdb3713b53c93 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 10 Mar 2018 18:09:41 -0500 Subject: [PATCH 24/58] Continue parametrizing tests in test_coding_times.py --- xarray/tests/test_coding_times.py | 282 +++++++++++++++--------------- 1 file changed, 137 insertions(+), 145 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 6e037fdf950..d1fd772e189 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -14,6 +14,41 @@ from . import assert_array_equal, has_netcdftime, requires_netcdftime +_NON_STANDARD_CALENDARS = {'noleap', '365_day', '360_day', + 'julian', 'all_leap', '366_day'} +_ALL_CALENDARS = _NON_STANDARD_CALENDARS.union( + coding.times._STANDARD_CALENDARS) +_CF_DATETIME_NUM_DATES_UNITS = [ + (np.arange(10), 'days since 2000-01-01'), + (np.arange(10).astype('float64'), 'days since 2000-01-01'), + (np.arange(10).astype('float32'), 'days since 2000-01-01'), + (np.arange(10).reshape(2, 5), 'days since 2000-01-01'), + (12300 + np.arange(5), 'hours since 1680-01-01 00:00:00'), + # here we add a couple minor formatting errors to test + # the robustness of the parsing algorithm. + (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), + (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), + (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), + (10, 'days since 2000-01-01'), + ([10], 'daYs since 2000-01-01'), + ([[10]], 'days since 2000-01-01'), + ([10, 10], 'days since 2000-01-01'), + (np.array(10), 'days since 2000-01-01'), + (0, 'days since 1000-01-01'), + ([0], 'days since 1000-01-01'), + ([[0]], 'days since 1000-01-01'), + (np.arange(2), 'days since 1000-01-01'), + (np.arange(0, 100000, 20000), 'days since 1900-01-01'), + (17093352.0, 'hours since 1-1-1 00:00:0.0'), + ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), + (0, 'milliseconds since 2000-01-01T00:00:00'), + (0, 'microseconds since 2000-01-01T00:00:00') +] +_CF_DATETIME_TESTS = [num_dates_units + (calendar,) for num_dates_units, + calendar in product(_CF_DATETIME_NUM_DATES_UNITS, + coding.times._STANDARD_CALENDARS)] + + @np.vectorize def _ensure_naive_tz(dt): if hasattr(dt, 'tzinfo'): @@ -22,17 +57,6 @@ def _ensure_naive_tz(dt): return dt -def _non_standard_netcdftime_date_types(): - from netcdftime import ( - DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, Datetime360Day) - return {'noleap': DatetimeNoLeap, - '365_day': DatetimeNoLeap, - '360_day': Datetime360Day, - 'julian': DatetimeJulian, - 'all_leap': DatetimeAllLeap, - '366_day': DatetimeAllLeap} - - def _all_netcdftime_date_types(): from netcdftime import ( DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, Datetime360Day, @@ -47,71 +71,45 @@ def _all_netcdftime_date_types(): 'proleptic_gregorian': DatetimeProlepticGregorian} -@requires_netcdftime -def test_cf_datetime(): +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize(['num_dates', 'units', 'calendar'], + _CF_DATETIME_TESTS) +def test_cf_datetime(num_dates, units, calendar): nctime = _import_netcdftime() - for num_dates, units in [ - (np.arange(10), 'days since 2000-01-01'), - (np.arange(10).astype('float64'), 'days since 2000-01-01'), - (np.arange(10).astype('float32'), 'days since 2000-01-01'), - (np.arange(10).reshape(2, 5), 'days since 2000-01-01'), - (12300 + np.arange(5), 'hours since 1680-01-01 00:00:00'), - # here we add a couple minor formatting errors to test - # the robustness of the parsing algorithm. - (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), - (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), - (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), - (10, 'days since 2000-01-01'), - ([10], 'daYs since 2000-01-01'), - ([[10]], 'days since 2000-01-01'), - ([10, 10], 'days since 2000-01-01'), - (np.array(10), 'days since 2000-01-01'), - (0, 'days since 1000-01-01'), - ([0], 'days since 1000-01-01'), - ([[0]], 'days since 1000-01-01'), - (np.arange(2), 'days since 1000-01-01'), - (np.arange(0, 100000, 20000), 'days since 1900-01-01'), - (17093352.0, 'hours since 1-1-1 00:00:0.0'), - ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), - (0, 'milliseconds since 2000-01-01T00:00:00'), - (0, 'microseconds since 2000-01-01T00:00:00'), - ]: - for calendar in ['standard', 'gregorian', 'proleptic_gregorian']: - expected = _ensure_naive_tz( - nctime.num2date(num_dates, units, calendar)) - print(num_dates, units, calendar) - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', - 'Unable to decode time axis') - actual = coding.times.decode_cf_datetime(num_dates, units, - calendar) - if (isinstance(actual, np.ndarray) and - np.issubdtype(actual.dtype, np.datetime64)): - # self.assertEqual(actual.dtype.kind, 'M') - # For some reason, numpy 1.8 does not compare ns precision - # datetime64 arrays as equal to arrays of datetime objects, - # but it works for us precision. Thus, convert to us - # precision for the actual array equal comparison... - actual_cmp = actual.astype('M8[us]') - else: - actual_cmp = actual - assert_array_equal(expected, actual_cmp) - encoded, _, _ = coding.times.encode_cf_datetime(actual, units, - calendar) - if '1-1-1' not in units: - # pandas parses this date very strangely, so the original - # units/encoding cannot be preserved in this case: - # (Pdb) pd.to_datetime('1-1-1 00:00:0.0') - # Timestamp('2001-01-01 00:00:00') - assert_array_equal(num_dates, np.around(encoded, 1)) - if (hasattr(num_dates, 'ndim') and num_dates.ndim == 1 and - '1000' not in units): - # verify that wrapping with a pandas.Index works - # note that it *does not* currently work to even put - # non-datetime64 compatible dates into a pandas.Index - encoded, _, _ = coding.times.encode_cf_datetime( - pd.Index(actual), units, calendar) - assert_array_equal(num_dates, np.around(encoded, 1)) + expected = _ensure_naive_tz( + nctime.num2date(num_dates, units, calendar)) + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', + 'Unable to decode time axis') + actual = coding.times.decode_cf_datetime(num_dates, units, + calendar) + if (isinstance(actual, np.ndarray) and + np.issubdtype(actual.dtype, np.datetime64)): + # self.assertEqual(actual.dtype.kind, 'M') + # For some reason, numpy 1.8 does not compare ns precision + # datetime64 arrays as equal to arrays of datetime objects, + # but it works for us precision. Thus, convert to us + # precision for the actual array equal comparison... + actual_cmp = actual.astype('M8[us]') + else: + actual_cmp = actual + assert_array_equal(expected, actual_cmp) + encoded, _, _ = coding.times.encode_cf_datetime(actual, units, + calendar) + if '1-1-1' not in units: + # pandas parses this date very strangely, so the original + # units/encoding cannot be preserved in this case: + # (Pdb) pd.to_datetime('1-1-1 00:00:0.0') + # Timestamp('2001-01-01 00:00:00') + assert_array_equal(num_dates, np.around(encoded, 1)) + if (hasattr(num_dates, 'ndim') and num_dates.ndim == 1 and + '1000' not in units): + # verify that wrapping with a pandas.Index works + # note that it *does not* currently work to even put + # non-datetime64 compatible dates into a pandas.Index + encoded, _, _ = coding.times.encode_cf_datetime( + pd.Index(actual), units, calendar) + assert_array_equal(num_dates, np.around(encoded, 1)) @requires_netcdftime @@ -154,12 +152,6 @@ def test_decode_cf_datetime_non_iso_strings(): assert_array_equal(actual, expected) -_NON_STANDARD_CALENDARS = ['noleap', '365_day', '360_day', - 'julian', 'all_leap', '366_day'] -_ALL_CALENDARS = _NON_STANDARD_CALENDARS + list( - coding.times._STANDARD_CALENDARS) - - @pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') @pytest.mark.parametrize( ['calendar', 'enable_netcdftimeindex'], @@ -471,20 +463,20 @@ def test_decode_non_standard_calendar_fallback( assert_array_equal(actual, expected) -@requires_netcdftime -def test_cf_datetime_nan(): - for num_dates, units, expected_list in [ - ([np.nan], 'days since 2000-01-01', ['NaT']), - ([np.nan, 0], 'days since 2000-01-01', - ['NaT', '2000-01-01T00:00:00Z']), - ([np.nan, 0, 1], 'days since 2000-01-01', - ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z']), - ]: - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', 'All-NaN') - actual = coding.times.decode_cf_datetime(num_dates, units) - expected = np.array(expected_list, dtype='datetime64[ns]') - assert_array_equal(expected, actual) +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['num_dates', 'units', 'expected_list'], + [([np.nan], 'days since 2000-01-01', ['NaT']), + ([np.nan, 0], 'days since 2000-01-01', + ['NaT', '2000-01-01T00:00:00Z']), + ([np.nan, 0, 1], 'days since 2000-01-01', + ['NaT', '2000-01-01T00:00:00Z', '2000-01-02T00:00:00Z'])]) +def test_cf_datetime_nan(num_dates, units, expected_list): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'All-NaN') + actual = coding.times.decode_cf_datetime(num_dates, units) + expected = np.array(expected_list, dtype='datetime64[ns]') + assert_array_equal(expected, actual) @requires_netcdftime @@ -498,24 +490,25 @@ def test_decoded_cf_datetime_array_2d(): assert_array_equal(np.asarray(result), expected) -def test_infer_datetime_units(): - for dates, expected in [(pd.date_range('1900-01-01', periods=5), - 'days since 1900-01-01 00:00:00'), - (pd.date_range('1900-01-01 12:00:00', freq='H', - periods=2), - 'hours since 1900-01-01 12:00:00'), - (pd.to_datetime( - ['1900-01-01', '1900-01-02', 'NaT']), - 'days since 1900-01-01 00:00:00'), - (pd.to_datetime(['1900-01-01', - '1900-01-02T00:00:00.005']), - 'seconds since 1900-01-01 00:00:00'), - (pd.to_datetime(['NaT', '1900-01-01']), - 'days since 1900-01-01 00:00:00'), - (pd.to_datetime(['NaT']), - 'days since 1970-01-01 00:00:00'), - ]: - assert expected == coding.times.infer_datetime_units(dates) +@pytest.mark.parametrize( + ['dates', 'expected'], + [(pd.date_range('1900-01-01', periods=5), + 'days since 1900-01-01 00:00:00'), + (pd.date_range('1900-01-01 12:00:00', freq='H', + periods=2), + 'hours since 1900-01-01 12:00:00'), + (pd.to_datetime( + ['1900-01-01', '1900-01-02', 'NaT']), + 'days since 1900-01-01 00:00:00'), + (pd.to_datetime(['1900-01-01', + '1900-01-02T00:00:00.005']), + 'seconds since 1900-01-01 00:00:00'), + (pd.to_datetime(['NaT', '1900-01-01']), + 'days since 1900-01-01 00:00:00'), + (pd.to_datetime(['NaT']), + 'days since 1970-01-01 00:00:00')]) +def test_infer_datetime_units(dates, expected): + assert expected == coding.times.infer_datetime_units(dates) @requires_netcdftime @@ -539,34 +532,32 @@ def test_infer_netcdftime_datetime_units(): assert expected == coding.times.infer_datetime_units(dates) -def test_cf_timedelta(): - examples = [ - ('1D', 'days', np.int64(1)), - (['1D', '2D', '3D'], 'days', np.array([1, 2, 3], 'int64')), - ('1h', 'hours', np.int64(1)), - ('1ms', 'milliseconds', np.int64(1)), - ('1us', 'microseconds', np.int64(1)), - (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), - (['30m', '60m'], 'hours', [0.5, 1.0]), - (np.timedelta64('NaT', 'ns'), 'days', np.nan), - (['NaT', 'NaT'], 'days', [np.nan, np.nan]), - ] - - for timedeltas, units, numbers in examples: - timedeltas = pd.to_timedelta(timedeltas, box=False) - numbers = np.array(numbers) - - expected = numbers - actual, _ = coding.times.encode_cf_timedelta(timedeltas, units) +@pytest.mark.parametrize( + ['timedeltas', 'units', 'numbers'], + [('1D', 'days', np.int64(1)), + (['1D', '2D', '3D'], 'days', np.array([1, 2, 3], 'int64')), + ('1h', 'hours', np.int64(1)), + ('1ms', 'milliseconds', np.int64(1)), + ('1us', 'microseconds', np.int64(1)), + (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), + (['30m', '60m'], 'hours', [0.5, 1.0]), + (np.timedelta64('NaT', 'ns'), 'days', np.nan), + (['NaT', 'NaT'], 'days', [np.nan, np.nan])]) +def test_cf_timedelta(timedeltas, units, numbers): + timedeltas = pd.to_timedelta(timedeltas, box=False) + numbers = np.array(numbers) + + expected = numbers + actual, _ = coding.times.encode_cf_timedelta(timedeltas, units) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + if units is not None: + expected = timedeltas + actual = coding.times.decode_cf_timedelta(numbers, units) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype - if units is not None: - expected = timedeltas - actual = coding.times.decode_cf_timedelta(numbers, units) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype - expected = np.timedelta64('NaT', 'ns') actual = coding.times.decode_cf_timedelta(np.array(np.nan), 'days') assert_array_equal(expected, actual) @@ -585,13 +576,14 @@ def test_cf_timedelta_2d(): assert expected.dtype == actual.dtype -def test_infer_timedelta_units(): - for deltas, expected in [ - (pd.to_timedelta(['1 day', '2 days']), 'days'), - (pd.to_timedelta(['1h', '1 day 1 hour']), 'hours'), - (pd.to_timedelta(['1m', '2m', np.nan]), 'minutes'), - (pd.to_timedelta(['1m3s', '1m4s']), 'seconds')]: - assert expected == coding.times.infer_timedelta_units(deltas) +@pytest.mark.parametrize( + ['deltas', 'expected'], + [(pd.to_timedelta(['1 day', '2 days']), 'days'), + (pd.to_timedelta(['1h', '1 day 1 hour']), 'hours'), + (pd.to_timedelta(['1m', '2m', np.nan]), 'minutes'), + (pd.to_timedelta(['1m3s', '1m4s']), 'seconds')]) +def test_infer_timedelta_units(deltas, expected): + assert expected == coding.times.infer_timedelta_units(deltas) @pytest.mark.parametrize(['date_args', 'expected'], From f3438fda8879b32a03e245d64c4c44c80b33e0ea Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 10 Mar 2018 18:37:19 -0500 Subject: [PATCH 25/58] Update time-series.rst for enable_netcdftimeindex option --- doc/time-series.rst | 55 ++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/doc/time-series.rst b/doc/time-series.rst index b63532f772b..37fc92065a3 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -60,7 +60,7 @@ One unfortunate limitation of using ``datetime64[ns]`` is that it limits the native representation of dates to those that fall between the years 1678 and 2262. When a netCDF file contains dates outside of these bounds, dates will be returned as arrays of ``netcdftime.datetime`` objects and a ``NetCDFTimeIndex`` -will be used for indexing. The ``NetCDFTimeIndex`` enables only a subset of +can be used for indexing. The ``NetCDFTimeIndex`` enables only a subset of the indexing functionality of a ``pandas.DatetimeIndex``. See :ref:`NetCDFTimeIndex` for more information. @@ -206,32 +206,20 @@ For more examples of using grouped operations on a time dimension, see Non-standard calendars and dates outside the Timestamp-valid range ------------------------------------------------------------------ -.. note:: - - In a change from prior behavior, as of version 0.??.0, if a dataset is - encoded using a non-standard calendar type it will always be read in using - the corresponding date type from ``netcdftime``. This is different from the - prior behavior where if the dates were within the Timestamp-valid range and - representable by standard datetimes (e.g. for a ``'noleap'`` calendar) they - would be decoded into standard datetimes indexed with a - ``pandas.DatetimeIndex``. - - As of version 0.??.0, a ``NetCDFTimeIndex`` will be used for time indexing - if any of the following are true: - - - The dates are from a non-standard calendar - - Any dates are outside the Timestamp-valid range - - Otherwise a ``pandas.DatetimeIndex`` will be used. - Through the optional ``netcdftime`` library and a custom subclass of ``pandas.Index``, xarray supports a subset of the indexing functionality enabled through the standard ``pandas.DatetimeIndex`` for dates from non-standard calendars or dates using a standard calendar, but outside the -`Timestamp-valid range`_ (approximately between years 1678 and 2262). +`Timestamp-valid range`_ (approximately between years 1678 and 2262). This +behavior has not yet been turned on by default; to take advantage of this +functionality, you must have the ``enable_netcdftimeindex`` option set to +``True`` within your context (see :meth:`xarray.set_options` for more +information). + For instance, you can create a DataArray indexed by a time -coordinate with a no-leap calendar and it will automatically be indexed using a -``NetCDFTimeIndex``. +coordinate with a no-leap calendar within a context manager setting the +``enable_netcdftimeindex`` option, and the time index will be cast to a +``NetCDFTimeIndex``: .. ipython:: python @@ -240,11 +228,26 @@ coordinate with a no-leap calendar and it will automatically be indexed using a dates = [DatetimeNoLeap(year, month, 1) for year, month in product(range(1, 3), range(1, 13))] - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], - name='foo') + with xr.set_options(enable_netcdftimeindex=True): + da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], + name='foo') + +.. note:: + + With the ``enable_netcdftimeindex`` option activated, a ``NetCDFTimeIndex`` + will be used for time indexing if any of the following are true: + + - The dates are from a non-standard calendar + - Any dates are outside the Timestamp-valid range + Otherwise a ``pandas.DatetimeIndex`` will be used. In addition, if any + variable (not just an index variable) is encoded using a non-standard + calendar, its times will be decoded into ``netcdftime.datetime`` objects, + regardless of whether or not they can be represented using + ``np.datetime64[ns]`` objects. + For data indexed by a ``NetCDFTimeIndex`` xarray currently supports `partial -datetime string indexing`_ using strictly `ISO8601-format`_ partial datetime +datetime string indexing`_ using strictly `ISO 8601-format`_ partial datetime strings: .. ipython:: python @@ -281,5 +284,5 @@ and serialization: ``NetCDFTimeIndex`` is not supported. .. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#timestamp-limitations -.. _ISO8601-format: https://en.wikipedia.org/wiki/ISO_8601 +.. _ISO 8601-format: https://en.wikipedia.org/wiki/ISO_8601 .. _partial datetime string indexing: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#partial-string-indexing From c35364e27b76d8377fe230841a7ab98f52b32ea5 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 10 Mar 2018 18:40:38 -0500 Subject: [PATCH 26/58] Use :py:func: in rst for xarray.set_options --- doc/time-series.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/time-series.rst b/doc/time-series.rst index 37fc92065a3..42b141f3b89 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -213,7 +213,7 @@ calendars or dates using a standard calendar, but outside the `Timestamp-valid range`_ (approximately between years 1678 and 2262). This behavior has not yet been turned on by default; to take advantage of this functionality, you must have the ``enable_netcdftimeindex`` option set to -``True`` within your context (see :meth:`xarray.set_options` for more +``True`` within your context (see :py:func:`~xarray.set_options` for more information). For instance, you can create a DataArray indexed by a time From 62ce0aed9ae7d0bedced9495e1382c41ac3c52cc Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 10 Mar 2018 19:14:17 -0500 Subject: [PATCH 27/58] Add a what's new entry and test that resample raises a TypeError --- doc/whats-new.rst | 8 ++++++++ xarray/tests/test_netcdftimeindex.py | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 963a0454f88..66879a3b997 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -60,6 +60,14 @@ Enhancements By `Keisuke Fujii `_. - :py:func:`~plot.line()` learned to make plots with data on x-axis if so specified. (:issue:`575`) By `Deepak Cherian `_. +- Add an option for using a ``NetCDFTimeIndex`` for indexing times with + non-standard calendars and/or outside the Timestamp-valid range; this index + enables a subset of the functionality of a standard + ``pandas.DatetimeIndex`` (:issue:`789`, :issue:`1084`, :issue:`1252`). + By `Spencer Clark `_ with help from + `Stephan Hoyer `_. +- Allow for serialization of ``netcdftime.datetime`` objects (:issue:`789`, + :issue:`1084`, :issue:`1252`). By `Spencer Clark `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 533fcf123ec..5ff80896d31 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -291,6 +291,11 @@ def test_groupby(da): assert_identical(result, expected) +def test_resample_error(da): + with pytest.raises(TypeError): + da.resample(time='Y') + + SEL_STRING_OR_LIST_TESTS = { 'string': '0001', 'string-slice': slice('0001-01-01', '0001-12-30'), From d5a3cef58fe39bc728c774958d3013ffe069f69c Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 16 Mar 2018 17:04:23 -0400 Subject: [PATCH 28/58] Move what's new entry to the version 0.10.3 section --- doc/whats-new.rst | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 11a9dfcd0cc..45dbf6c253a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,16 @@ Documentation Enhancements ~~~~~~~~~~~~ +- Add an option for using a ``NetCDFTimeIndex`` for indexing times with + non-standard calendars and/or outside the Timestamp-valid range; this index + enables a subset of the functionality of a standard + ``pandas.DatetimeIndex`` (:issue:`789`, :issue:`1084`, :issue:`1252`). + By `Spencer Clark `_ with help from + `Stephan Hoyer `_. +- Allow for serialization of ``netcdftime.datetime`` objects (:issue:`789`, + :issue:`1084`, :issue:`1252`). By `Spencer Clark + `_. + Bug fixes ~~~~~~~~~ @@ -102,14 +112,6 @@ Enhancements By `Keisuke Fujii `_. - :py:func:`~plot.line()` learned to make plots with data on x-axis if so specified. (:issue:`575`) By `Deepak Cherian `_. -- Add an option for using a ``NetCDFTimeIndex`` for indexing times with - non-standard calendars and/or outside the Timestamp-valid range; this index - enables a subset of the functionality of a standard - ``pandas.DatetimeIndex`` (:issue:`789`, :issue:`1084`, :issue:`1252`). - By `Spencer Clark `_ with help from - `Stephan Hoyer `_. -- Allow for serialization of ``netcdftime.datetime`` objects (:issue:`789`, - :issue:`1084`, :issue:`1252`). By `Spencer Clark `_. Bug fixes ~~~~~~~~~ From e721d2609a36eb7d1cd6e1474b970877a82d8810 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 16 Mar 2018 20:32:51 -0400 Subject: [PATCH 29/58] Add version-dependent pathway for importing netcdftime.datetime --- xarray/coding/times.py | 21 +++++++++++++++++++++ xarray/core/common.py | 6 ++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 4b4870e2ede..7d8e5b1e5cd 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -60,6 +60,27 @@ def _import_netcdftime(): return nctime +def _import_netcdftime_datetime(): + """Helper function to handle importing netcdftime.datetime across the + transition between the version of netcdftime packaged with netCDF4 and the + standalone version""" + try: + # Will raise an ImportError if not using standalone netcdftime + from netcdftime import num2date # noqa: F401 + + # Generic netcdftime datetime is exposed in the public API in the + # standalone version of netcdftime + from netcdftime import datetime + except ImportError: + # Need to use private API to import generic netcdftime datetime in + # older versions. See https://github.com/Unidata/netcdftime/issues/8 + try: + from netcdftime._netcdftime import datetime + except ImportError: + raise ImportError("Failed to import netcdftime") + return datetime + + def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith('s'): diff --git a/xarray/core/common.py b/xarray/core/common.py index 9c7ac36aaac..2c896c2800f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -829,12 +829,14 @@ def is_np_datetime_like(dtype): def contains_netcdftime_datetimes(var): """Check if a variable contains netcdftime datetime objects""" + from ..coding.times import _import_netcdftime_datetime + try: - from netcdftime._netcdftime import datetime + netcdftime_datetime = _import_netcdftime_datetime() except ImportError: return False else: - return isinstance(var.data.flatten()[0], datetime) + return isinstance(var.data.flatten()[0], netcdftime_datetime) def _contains_datetime_like_objects(var): From 5e1c4a891c22f3c6b54d31ece8c29306483b36a0 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 20 Mar 2018 13:19:19 -0400 Subject: [PATCH 30/58] Make NetCDFTimeIndex and date decoding/encoding compatible with datetime.datetime --- xarray/backends/zarr.py | 6 +- xarray/coding/netcdftimeindex.py | 9 +- xarray/coding/times.py | 26 ++++- xarray/core/common.py | 9 +- xarray/core/utils.py | 4 +- xarray/tests/test_backends.py | 160 +++++++++++++++++++++++---- xarray/tests/test_coding_times.py | 27 ++++- xarray/tests/test_netcdftimeindex.py | 5 +- 8 files changed, 210 insertions(+), 36 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 8e5cb8b6089..80587d726a8 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -8,7 +8,8 @@ from .. import Variable, coding, conventions from ..core import indexing -from ..core.common import contains_netcdftime_datetimes +from ..core.common import (contains_netcdftime_datetimes, + contains_datetime_datetimes) from ..core.pycompat import OrderedDict, integer_types, iteritems from ..core.utils import FrozenOrderedDict, HiddenKeyDict from .common import AbstractWritableDataStore, ArrayWriter, BackendArray @@ -222,7 +223,8 @@ def encode_zarr_variable(var, needs_copy=True, name=None): A variable which has been encoded as described above. """ - if var.dtype.kind == 'O' and not contains_netcdftime_datetimes(var): + if var.dtype.kind == 'O' and not (contains_netcdftime_datetimes(var) or + contains_datetime_datetimes(var)): raise NotImplementedError("Variable `%s` is an object. Zarr " "store can't yet encode objects." % name) diff --git a/xarray/coding/netcdftimeindex.py b/xarray/coding/netcdftimeindex.py index a12876501cf..d3fde605e0a 100644 --- a/xarray/coding/netcdftimeindex.py +++ b/xarray/coding/netcdftimeindex.py @@ -1,6 +1,6 @@ from __future__ import absolute_import import re -from datetime import timedelta +from datetime import datetime, timedelta import numpy as np import pandas as pd @@ -126,17 +126,18 @@ def assert_all_valid_date_type(data): valid_types = (DatetimeJulian, DatetimeNoLeap, DatetimeAllLeap, DatetimeGregorian, DatetimeProlepticGregorian, - Datetime360Day) + Datetime360Day, datetime) sample = data[0] date_type = type(sample) if not isinstance(sample, valid_types): raise TypeError( 'NetCDFTimeIndex requires netcdftime._netcdftime.datetime ' - 'objects. Got object of {}.'.format(date_type)) + 'or datetime.datetime objects. ' + 'Got object of {}.'.format(date_type)) if not all(isinstance(value, date_type) for value in data): raise TypeError( - 'NetCDFTimeIndex requires using netcdftime._netcdftime.datetime ' + 'NetCDFTimeIndex requires using datetime ' 'objects of all the same type. Got\n{}.'.format(data)) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 7d8e5b1e5cd..b6815be7d44 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -9,7 +9,8 @@ import numpy as np import pandas as pd -from ..core.common import contains_netcdftime_datetimes +from ..core.common import (contains_netcdftime_datetimes, + contains_datetime_datetimes) from ..core import indexing from ..core.formatting import first_n_items, format_timestamp, last_item from ..core.options import OPTIONS @@ -244,15 +245,27 @@ def _infer_time_units_from_diff(unique_timedeltas): return 'seconds' +def _infer_calendar_from_min_date(date): + """Given a single datetime, infer the calendar type""" + from netcdftime import DatetimeGregorian + + gregorian_breakpoint = type(date)(1582, 10, 15) + if date >= gregorian_breakpoint and isinstance(date, datetime): + return 'standard' + elif date < gregorian_breakpoint and isinstance(date, datetime): + return 'gregorian' + elif date < gregorian_breakpoint and isinstance(date, DatetimeGregorian): + return 'standard' + else: + return date.calendar + + def infer_calendar_name(dates): """Given an array of datetimes, infer the CF calendar name""" if np.asarray(dates).dtype == 'datetime64[ns]': return 'proleptic_gregorian' else: - try: - return np.asarray(dates)[0].calendar - except IndexError: - return np.asarray(dates).item().calendar + return _infer_calendar_from_min_date(np.min(np.asarray(dates))) def infer_datetime_units(dates): @@ -397,7 +410,8 @@ class CFDatetimeCoder(VariableCoder): def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) if (np.issubdtype(data.dtype, np.datetime64) or - contains_netcdftime_datetimes(variable)): + contains_netcdftime_datetimes(variable) or + contains_datetime_datetimes(variable)): (data, units, calendar) = encode_cf_datetime( data, encoding.pop('units', None), diff --git a/xarray/core/common.py b/xarray/core/common.py index 2c896c2800f..e26bbfa94b7 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1,6 +1,7 @@ from __future__ import absolute_import, division, print_function import warnings +from datetime import datetime import numpy as np import pandas as pd @@ -839,7 +840,13 @@ def contains_netcdftime_datetimes(var): return isinstance(var.data.flatten()[0], netcdftime_datetime) +def contains_datetime_datetimes(var): + """Check if a variable contains datetime.datetime objects""" + return isinstance(var.data.flatten()[0], datetime) + + def _contains_datetime_like_objects(var): """Check if a variable contains datetime like objects (either np.datetime64, np.timedelta64, or netcdftime._netcdftime.datetime)""" - return is_np_datetime_like(var.dtype) or contains_netcdftime_datetimes(var) + return (is_np_datetime_like(var.dtype) or + contains_netcdftime_datetimes(var) or contains_datetime_datetimes(var)) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index a94e96f5747..b17911910de 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -8,6 +8,7 @@ import re import warnings from collections import Iterable, Mapping, MutableMapping, MutableSet +from datetime import datetime import numpy as np import pandas as pd @@ -48,7 +49,8 @@ def _maybe_cast_to_netcdftimeindex(index): return index else: if len(index): - if isinstance(index[0], ncdatetime): + if (isinstance(index[0], ncdatetime) or + isinstance(index[0], datetime)): index = NetCDFTimeIndex(index) return index diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index dd67e04aa42..3ae09c11cfe 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -341,7 +341,7 @@ def test_roundtrip_string_encoded_characters(self): assert_identical(expected, actual) self.assertEqual(actual['x'].encoding['_Encoding'], 'ascii') - def test_roundtrip_datetime_data(self): + def test_roundtrip_numpy_datetime_data(self): times = pd.to_datetime(['2000-01-01', '2000-01-02', 'NaT']) expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 1950-01-01'}}} @@ -349,8 +349,8 @@ def test_roundtrip_datetime_data(self): assert_identical(expected, actual) assert actual.t0.encoding['units'] == 'days since 1950-01-01' - def test_roundtrip_netcdftime_datetime_data(self): - from datetime import datetime + def test_roundtrip_netcdftime_datetime_data_pre_gregorian(self): + from netcdftime import DatetimeGregorian from .test_coding_times import _all_netcdftime_date_types date_types = _all_netcdftime_date_types() @@ -358,22 +358,19 @@ def test_roundtrip_netcdftime_datetime_data(self): times = [date_type(1, 1, 1), date_type(1, 1, 2)] expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} - if times[0].calendar == 'gregorian': - # netcdftime.num2date decodes dates from the Gregorian calendar - # to datetime.datetime objects - expected_decoded_t = np.array( - [datetime(1, 1, 1), datetime(1, 1, 2)]) - expected_decoded_t0 = np.array([datetime(1, 1, 1)]) + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1, 1, 1)]) + if date_type is DatetimeGregorian: + expected_calendar = 'standard' else: - expected_decoded_t = np.array(times) - expected_decoded_t0 = np.array([date_type(1, 1, 1)]) + expected_calendar = times[0].calendar with self.roundtrip(expected, save_kwargs=kwds) as actual: abs_diff = abs(actual.t.values - expected_decoded_t) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) self.assertEquals(actual.t.encoding['units'], 'days since 0001-01-01 00:00:00.000000') self.assertEquals(actual.t.encoding['calendar'], - times[0].calendar) + expected_calendar) abs_diff = abs(actual.t0.values - expected_decoded_t0) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) @@ -381,7 +378,82 @@ def test_roundtrip_netcdftime_datetime_data(self): self.assertEquals(actual.t0.encoding['units'], 'days since 0001-01-01') self.assertEquals(actual.t.encoding['calendar'], - times[0].calendar) + expected_calendar) + + def test_roundtrip_netcdftime_datetime_data_post_gregorian(self): + from .test_coding_times import _all_netcdftime_date_types + + date_types = _all_netcdftime_date_types() + for date_type in date_types.values(): + times = [date_type(1582, 10, 15), date_type(1582, 10, 16)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + kwds = {'encoding': {'t0': {'units': 'days since 1582-10-15'}}} + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1582, 10, 15)]) + expected_calendar = times[0].calendar + with self.roundtrip(expected, save_kwargs=kwds) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + self.assertEquals(actual.t.encoding['units'], + 'days since 1582-10-15 00:00:00.000000') + self.assertEquals(actual.t.encoding['calendar'], + expected_calendar) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + self.assertEquals(actual.t0.encoding['units'], + 'days since 1582-10-15') + self.assertEquals(actual.t.encoding['calendar'], + expected_calendar) + + def test_roundtrip_datetime_datetime_data_pre_gregorian(self): + from datetime import datetime + + times = [datetime(1, 1, 1), datetime(1, 1, 2)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([datetime(1, 1, 1)]) + with self.roundtrip(expected, save_kwargs=kwds) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + self.assertEquals(actual.t.encoding['units'], + 'days since 0001-01-01 00:00:00.000000') + self.assertEquals(actual.t.encoding['calendar'], + 'gregorian') + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + self.assertEquals(actual.t0.encoding['units'], + 'days since 0001-01-01') + self.assertEquals(actual.t.encoding['calendar'], + 'gregorian') + + def test_roundtrip_datetime_datetime_data_post_gregorian(self): + from datetime import datetime + + times = [datetime(1582, 10, 15), datetime(1582, 10, 16)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + kwds = {'encoding': {'t0': {'units': 'days since 1582-10-15'}}} + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([datetime(1582, 10, 15)]) + with self.roundtrip(expected, save_kwargs=kwds) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + self.assertEquals(actual.t.encoding['units'], + 'days since 1582-10-15 00:00:00.000000') + self.assertEquals(actual.t.encoding['calendar'], + 'standard') + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + self.assertEquals(actual.t0.encoding['units'], + 'days since 1582-10-15') + self.assertEquals(actual.t.encoding['calendar'], + 'standard') def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) @@ -1902,7 +1974,7 @@ def test_roundtrip_string_encoded_characters(self): def test_roundtrip_coordinates_with_space(self): pass - def test_roundtrip_datetime_data(self): + def test_roundtrip_numpy_datetime_data(self): # Override method in DatasetIOTestCases - remove not applicable # save_kwds times = pd.to_datetime(['2000-01-01', '2000-01-02', 'NaT']) @@ -1910,7 +1982,7 @@ def test_roundtrip_datetime_data(self): with self.roundtrip(expected) as actual: assert_identical(expected, actual) - def test_roundtrip_netcdftime_datetime_data(self): + def test_roundtrip_netcdftime_datetime_data_pre_gregorian(self): # Override method in DatasetIOTestCases - remove not applicable # save_kwds from .test_coding_times import _all_netcdftime_date_types @@ -1919,16 +1991,66 @@ def test_roundtrip_netcdftime_datetime_data(self): for date_type in date_types.values(): times = [date_type(1, 1, 1), date_type(1, 1, 2)] expected = Dataset({'t': ('t', times), 't0': times[0]}) + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1, 1, 1)]) with self.roundtrip(expected) as actual: - expected_t = np.array(times) - abs_diff = abs(actual.t.values - expected_t) + abs_diff = abs(actual.t.values - expected_decoded_t) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - expected_t0 = np.array([date_type(1, 1, 1)]) - abs_diff = abs(actual.t0.values - expected_t0) + abs_diff = abs(actual.t0.values - expected_decoded_t0) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + def test_roundtrip_netcdftime_datetime_data_post_gregorian(self): + # Override method in DatasetIOTestCases - remove not applicable + # save_kwds + from .test_coding_times import _all_netcdftime_date_types + + date_types = _all_netcdftime_date_types() + for date_type in date_types.values(): + times = [date_type(1582, 10, 15), date_type(1582, 10, 16)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1582, 10, 15)]) + with self.roundtrip(expected) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + def test_roundtrip_datetime_datetime_data_pre_gregorian(self): + # Override method in DatasetIOTestCases - remove not applicable + # save_kwds + from datetime import datetime + + times = [datetime(1, 1, 1), datetime(1, 1, 2)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([datetime(1, 1, 1)]) + with self.roundtrip(expected) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + def test_roundtrip_datetime_datetime_data_post_gregorian(self): + # Override method in DatasetIOTestCases - remove not applicable + # save_kwds + from datetime import datetime + + times = [datetime(1582, 10, 15), datetime(1582, 10, 16)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([datetime(1582, 10, 15)]) + with self.roundtrip(expected) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + def test_write_store(self): # Override method in DatasetIOTestCases - not applicable to dask pass diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index d1fd772e189..a6d32f1767d 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, print_function +from datetime import datetime from itertools import product import warnings @@ -532,6 +533,27 @@ def test_infer_netcdftime_datetime_units(): assert expected == coding.times.infer_datetime_units(dates) +# datetime.datetime objects have higher precision than netcdftime.datetime +# objects. +@pytest.mark.parametrize( + ['dates', 'expected'], [ + ([datetime(1900, 1, 1), + datetime(1900, 1, 2)], + 'days since 1900-01-01 00:00:00.000000'), + ([datetime(1900, 1, 1, 12), + datetime(1900, 1, 1, 13)], + 'hours since 1900-01-01 12:00:00.000000'), + ([datetime(1900, 1, 1), + datetime(1900, 1, 2), + datetime(1900, 1, 2, 0, 0, 1)], + 'seconds since 1900-01-01 00:00:00.000000'), + ([datetime(1900, 1, 1), + datetime(1900, 1, 2, 0, 0, 0, 5)], + 'seconds since 1900-01-01 00:00:00.000000')]) +def test_infer_datetime_datetime_units(dates, expected): + assert expected == coding.times.infer_datetime_units(dates) + + @pytest.mark.parametrize( ['timedeltas', 'units', 'numbers'], [('1D', 'days', np.int64(1)), @@ -586,6 +608,7 @@ def test_infer_timedelta_units(deltas, expected): assert expected == coding.times.infer_timedelta_units(deltas) +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') @pytest.mark.parametrize(['date_args', 'expected'], [((1, 2, 3, 4, 5, 6), '0001-02-03 04:05:06.000000'), @@ -597,6 +620,8 @@ def test_infer_timedelta_units(deltas, expected): '1000-02-03 04:05:06.000000')]) def test_format_netcdftime_datetime(date_args, expected): pytest.importorskip('netcdftime') - for date_type in _all_netcdftime_date_types().values(): + date_types = _all_netcdftime_date_types() + date_types['datetime.datetime'] = datetime # Also test datetime.datetime + for date_type in date_types.values(): result = coding.times.format_netcdftime_datetime(date_type(*date_args)) assert result == expected diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 5ff80896d31..0f0ac137233 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -5,7 +5,7 @@ import pandas as pd import xarray as xr -from datetime import timedelta +from datetime import datetime, timedelta from xarray.coding.netcdftimeindex import ( parse_iso8601, NetCDFTimeIndex, assert_all_valid_date_type, _parsed_string_to_bounds, _parse_iso8601_with_reso) @@ -62,7 +62,8 @@ def netcdftime_date_types(): DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day) return [DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, - DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day] + DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day, + datetime] @pytest.fixture(params=netcdftime_date_types()) From c9d04547748f536193916f9e78ac69f983c5d5c7 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 12 Apr 2018 11:00:54 -0400 Subject: [PATCH 31/58] Remove logic to make NetCDFTimeIndex compatible with datetime.datetime --- xarray/backends/zarr.py | 6 +- xarray/coding/netcdftimeindex.py | 4 +- xarray/coding/times.py | 26 ++--- xarray/core/common.py | 9 +- xarray/core/utils.py | 4 +- xarray/tests/test_backends.py | 136 +-------------------------- xarray/tests/test_coding_times.py | 23 ----- xarray/tests/test_netcdftimeindex.py | 5 +- xarray/tests/test_utils.py | 14 +++ 9 files changed, 32 insertions(+), 195 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 80587d726a8..8e5cb8b6089 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -8,8 +8,7 @@ from .. import Variable, coding, conventions from ..core import indexing -from ..core.common import (contains_netcdftime_datetimes, - contains_datetime_datetimes) +from ..core.common import contains_netcdftime_datetimes from ..core.pycompat import OrderedDict, integer_types, iteritems from ..core.utils import FrozenOrderedDict, HiddenKeyDict from .common import AbstractWritableDataStore, ArrayWriter, BackendArray @@ -223,8 +222,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None): A variable which has been encoded as described above. """ - if var.dtype.kind == 'O' and not (contains_netcdftime_datetimes(var) or - contains_datetime_datetimes(var)): + if var.dtype.kind == 'O' and not contains_netcdftime_datetimes(var): raise NotImplementedError("Variable `%s` is an object. Zarr " "store can't yet encode objects." % name) diff --git a/xarray/coding/netcdftimeindex.py b/xarray/coding/netcdftimeindex.py index d3fde605e0a..c695d8a2c16 100644 --- a/xarray/coding/netcdftimeindex.py +++ b/xarray/coding/netcdftimeindex.py @@ -1,6 +1,6 @@ from __future__ import absolute_import import re -from datetime import datetime, timedelta +from datetime import timedelta import numpy as np import pandas as pd @@ -126,7 +126,7 @@ def assert_all_valid_date_type(data): valid_types = (DatetimeJulian, DatetimeNoLeap, DatetimeAllLeap, DatetimeGregorian, DatetimeProlepticGregorian, - Datetime360Day, datetime) + Datetime360Day) sample = data[0] date_type = type(sample) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e5918c909f3..9ee7e0df854 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -9,8 +9,7 @@ import numpy as np import pandas as pd -from ..core.common import (contains_netcdftime_datetimes, - contains_datetime_datetimes) +from ..core.common import contains_netcdftime_datetimes from ..core import indexing from ..core.formatting import first_n_items, format_timestamp, last_item from ..core.options import OPTIONS @@ -246,27 +245,15 @@ def _infer_time_units_from_diff(unique_timedeltas): return 'seconds' -def _infer_calendar_from_min_date(date): - """Given a single datetime, infer the calendar type""" - from netcdftime import DatetimeGregorian - - gregorian_breakpoint = type(date)(1582, 10, 15) - if date >= gregorian_breakpoint and isinstance(date, datetime): - return 'standard' - elif date < gregorian_breakpoint and isinstance(date, datetime): - return 'gregorian' - elif date < gregorian_breakpoint and isinstance(date, DatetimeGregorian): - return 'standard' - else: - return date.calendar - - def infer_calendar_name(dates): """Given an array of datetimes, infer the CF calendar name""" if np.asarray(dates).dtype == 'datetime64[ns]': return 'proleptic_gregorian' else: - return _infer_calendar_from_min_date(np.min(np.asarray(dates))) + try: + return np.asarray(dates)[0].calendar + except IndexError: + return np.asarray(dates).item().calendar def infer_datetime_units(dates): @@ -411,8 +398,7 @@ class CFDatetimeCoder(VariableCoder): def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) if (np.issubdtype(data.dtype, np.datetime64) or - contains_netcdftime_datetimes(variable) or - contains_datetime_datetimes(variable)): + contains_netcdftime_datetimes(variable)): (data, units, calendar) = encode_cf_datetime( data, encoding.pop('units', None), diff --git a/xarray/core/common.py b/xarray/core/common.py index f76cae4f42e..3169f8385b8 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, print_function import warnings -from datetime import datetime from distutils.version import LooseVersion import numpy as np @@ -890,13 +889,7 @@ def contains_netcdftime_datetimes(var): return isinstance(var.data.flatten()[0], netcdftime_datetime) -def contains_datetime_datetimes(var): - """Check if a variable contains datetime.datetime objects""" - return isinstance(var.data.flatten()[0], datetime) - - def _contains_datetime_like_objects(var): """Check if a variable contains datetime like objects (either np.datetime64, np.timedelta64, or netcdftime._netcdftime.datetime)""" - return (is_np_datetime_like(var.dtype) or - contains_netcdftime_datetimes(var) or contains_datetime_datetimes(var)) + return is_np_datetime_like(var.dtype) or contains_netcdftime_datetimes(var) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index b17911910de..a94e96f5747 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -8,7 +8,6 @@ import re import warnings from collections import Iterable, Mapping, MutableMapping, MutableSet -from datetime import datetime import numpy as np import pandas as pd @@ -49,8 +48,7 @@ def _maybe_cast_to_netcdftimeindex(index): return index else: if len(index): - if (isinstance(index[0], ncdatetime) or - isinstance(index[0], datetime)): + if isinstance(index[0], ncdatetime): index = NetCDFTimeIndex(index) return index diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ece89861b69..3a84c2541e1 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -349,8 +349,7 @@ def test_roundtrip_numpy_datetime_data(self): assert_identical(expected, actual) assert actual.t0.encoding['units'] == 'days since 1950-01-01' - def test_roundtrip_netcdftime_datetime_data_pre_gregorian(self): - from netcdftime import DatetimeGregorian + def test_roundtrip_netcdftime_datetime_data(self): from .test_coding_times import _all_netcdftime_date_types date_types = _all_netcdftime_date_types() @@ -360,10 +359,8 @@ def test_roundtrip_netcdftime_datetime_data_pre_gregorian(self): kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} expected_decoded_t = np.array(times) expected_decoded_t0 = np.array([date_type(1, 1, 1)]) - if date_type is DatetimeGregorian: - expected_calendar = 'standard' - else: - expected_calendar = times[0].calendar + expected_calendar = times[0].calendar + with self.roundtrip(expected, save_kwargs=kwds) as actual: abs_diff = abs(actual.t.values - expected_decoded_t) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) @@ -380,81 +377,6 @@ def test_roundtrip_netcdftime_datetime_data_pre_gregorian(self): self.assertEquals(actual.t.encoding['calendar'], expected_calendar) - def test_roundtrip_netcdftime_datetime_data_post_gregorian(self): - from .test_coding_times import _all_netcdftime_date_types - - date_types = _all_netcdftime_date_types() - for date_type in date_types.values(): - times = [date_type(1582, 10, 15), date_type(1582, 10, 16)] - expected = Dataset({'t': ('t', times), 't0': times[0]}) - kwds = {'encoding': {'t0': {'units': 'days since 1582-10-15'}}} - expected_decoded_t = np.array(times) - expected_decoded_t0 = np.array([date_type(1582, 10, 15)]) - expected_calendar = times[0].calendar - with self.roundtrip(expected, save_kwargs=kwds) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - self.assertEquals(actual.t.encoding['units'], - 'days since 1582-10-15 00:00:00.000000') - self.assertEquals(actual.t.encoding['calendar'], - expected_calendar) - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - self.assertEquals(actual.t0.encoding['units'], - 'days since 1582-10-15') - self.assertEquals(actual.t.encoding['calendar'], - expected_calendar) - - def test_roundtrip_datetime_datetime_data_pre_gregorian(self): - from datetime import datetime - - times = [datetime(1, 1, 1), datetime(1, 1, 2)] - expected = Dataset({'t': ('t', times), 't0': times[0]}) - kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} - expected_decoded_t = np.array(times) - expected_decoded_t0 = np.array([datetime(1, 1, 1)]) - with self.roundtrip(expected, save_kwargs=kwds) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - self.assertEquals(actual.t.encoding['units'], - 'days since 0001-01-01 00:00:00.000000') - self.assertEquals(actual.t.encoding['calendar'], - 'gregorian') - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - self.assertEquals(actual.t0.encoding['units'], - 'days since 0001-01-01') - self.assertEquals(actual.t.encoding['calendar'], - 'gregorian') - - def test_roundtrip_datetime_datetime_data_post_gregorian(self): - from datetime import datetime - - times = [datetime(1582, 10, 15), datetime(1582, 10, 16)] - expected = Dataset({'t': ('t', times), 't0': times[0]}) - kwds = {'encoding': {'t0': {'units': 'days since 1582-10-15'}}} - expected_decoded_t = np.array(times) - expected_decoded_t0 = np.array([datetime(1582, 10, 15)]) - with self.roundtrip(expected, save_kwargs=kwds) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - self.assertEquals(actual.t.encoding['units'], - 'days since 1582-10-15 00:00:00.000000') - self.assertEquals(actual.t.encoding['calendar'], - 'standard') - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - self.assertEquals(actual.t0.encoding['units'], - 'days since 1582-10-15') - self.assertEquals(actual.t.encoding['calendar'], - 'standard') - def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) expected = Dataset({'td': ('td', time_deltas), 'td0': time_deltas[0]}) @@ -2000,7 +1922,7 @@ def test_roundtrip_numpy_datetime_data(self): with self.roundtrip(expected) as actual: assert_identical(expected, actual) - def test_roundtrip_netcdftime_datetime_data_pre_gregorian(self): + def test_roundtrip_netcdftime_datetime_data(self): # Override method in DatasetIOTestCases - remove not applicable # save_kwds from .test_coding_times import _all_netcdftime_date_types @@ -2019,56 +1941,6 @@ def test_roundtrip_netcdftime_datetime_data_pre_gregorian(self): abs_diff = abs(actual.t0.values - expected_decoded_t0) self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - def test_roundtrip_netcdftime_datetime_data_post_gregorian(self): - # Override method in DatasetIOTestCases - remove not applicable - # save_kwds - from .test_coding_times import _all_netcdftime_date_types - - date_types = _all_netcdftime_date_types() - for date_type in date_types.values(): - times = [date_type(1582, 10, 15), date_type(1582, 10, 16)] - expected = Dataset({'t': ('t', times), 't0': times[0]}) - expected_decoded_t = np.array(times) - expected_decoded_t0 = np.array([date_type(1582, 10, 15)]) - with self.roundtrip(expected) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - def test_roundtrip_datetime_datetime_data_pre_gregorian(self): - # Override method in DatasetIOTestCases - remove not applicable - # save_kwds - from datetime import datetime - - times = [datetime(1, 1, 1), datetime(1, 1, 2)] - expected = Dataset({'t': ('t', times), 't0': times[0]}) - expected_decoded_t = np.array(times) - expected_decoded_t0 = np.array([datetime(1, 1, 1)]) - with self.roundtrip(expected) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - def test_roundtrip_datetime_datetime_data_post_gregorian(self): - # Override method in DatasetIOTestCases - remove not applicable - # save_kwds - from datetime import datetime - - times = [datetime(1582, 10, 15), datetime(1582, 10, 16)] - expected = Dataset({'t': ('t', times), 't0': times[0]}) - expected_decoded_t = np.array(times) - expected_decoded_t0 = np.array([datetime(1582, 10, 15)]) - with self.roundtrip(expected) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - def test_write_store(self): # Override method in DatasetIOTestCases - not applicable to dask pass diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index a6d32f1767d..87dca4bca71 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1,6 +1,5 @@ from __future__ import absolute_import, division, print_function -from datetime import datetime from itertools import product import warnings @@ -533,27 +532,6 @@ def test_infer_netcdftime_datetime_units(): assert expected == coding.times.infer_datetime_units(dates) -# datetime.datetime objects have higher precision than netcdftime.datetime -# objects. -@pytest.mark.parametrize( - ['dates', 'expected'], [ - ([datetime(1900, 1, 1), - datetime(1900, 1, 2)], - 'days since 1900-01-01 00:00:00.000000'), - ([datetime(1900, 1, 1, 12), - datetime(1900, 1, 1, 13)], - 'hours since 1900-01-01 12:00:00.000000'), - ([datetime(1900, 1, 1), - datetime(1900, 1, 2), - datetime(1900, 1, 2, 0, 0, 1)], - 'seconds since 1900-01-01 00:00:00.000000'), - ([datetime(1900, 1, 1), - datetime(1900, 1, 2, 0, 0, 0, 5)], - 'seconds since 1900-01-01 00:00:00.000000')]) -def test_infer_datetime_datetime_units(dates, expected): - assert expected == coding.times.infer_datetime_units(dates) - - @pytest.mark.parametrize( ['timedeltas', 'units', 'numbers'], [('1D', 'days', np.int64(1)), @@ -621,7 +599,6 @@ def test_infer_timedelta_units(deltas, expected): def test_format_netcdftime_datetime(date_args, expected): pytest.importorskip('netcdftime') date_types = _all_netcdftime_date_types() - date_types['datetime.datetime'] = datetime # Also test datetime.datetime for date_type in date_types.values(): result = coding.times.format_netcdftime_datetime(date_type(*date_args)) assert result == expected diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 0f0ac137233..5ff80896d31 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -5,7 +5,7 @@ import pandas as pd import xarray as xr -from datetime import datetime, timedelta +from datetime import timedelta from xarray.coding.netcdftimeindex import ( parse_iso8601, NetCDFTimeIndex, assert_all_valid_date_type, _parsed_string_to_bounds, _parse_iso8601_with_reso) @@ -62,8 +62,7 @@ def netcdftime_date_types(): DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day) return [DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, - DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day, - datetime] + DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day] @pytest.fixture(params=netcdftime_date_types()) diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 8186ce400fc..da366eac6a5 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -4,6 +4,7 @@ import pandas as pd import pytest +from datetime import datetime from xarray.coding.netcdftimeindex import NetCDFTimeIndex from xarray.core import duck_array_ops, utils from xarray.core.options import set_options @@ -60,6 +61,19 @@ def test_safe_cast_to_index_netcdftimeindex(enable_netcdftimeindex): assert isinstance(actual, pd.Index) +# Test that datetime.datetime objects are never used in a NetCDFTimeIndex +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize('enable_netcdftimeindex', [False, True]) +def test_safe_cast_to_index_datetime_datetime(enable_netcdftimeindex): + dates = [datetime(1, 1, day) for day in range(1, 20)] + + expected = pd.Index(dates) + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) + assert_array_equal(expected, actual) + assert isinstance(actual, pd.Index) + + def test_multiindex_from_product_levels(): result = utils.multiindex_from_product_levels([['b', 'a'], [1, 3, 2]]) np.testing.assert_array_equal( From f678714fdd1ffddfd1728fd2cddd10915c638993 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 12 Apr 2018 11:11:01 -0400 Subject: [PATCH 32/58] Documentation edits --- doc/time-series.rst | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/doc/time-series.rst b/doc/time-series.rst index e2623fa0335..51eac89be9e 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -225,7 +225,8 @@ calendars or dates using a standard calendar, but outside the behavior has not yet been turned on by default; to take advantage of this functionality, you must have the ``enable_netcdftimeindex`` option set to ``True`` within your context (see :py:func:`~xarray.set_options` for more -information). +information). It is expected that this will become the default behavior in +xarray version 0.11. For instance, you can create a DataArray indexed by a time coordinate with a no-leap calendar within a context manager setting the @@ -257,17 +258,19 @@ coordinate with a no-leap calendar within a context manager setting the regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. -For data indexed by a ``NetCDFTimeIndex`` xarray currently supports `partial -datetime string indexing`_ using strictly `ISO 8601-format`_ partial datetime -strings: +For data indexed by a ``NetCDFTimeIndex`` xarray currently supports: +- `Partial datetime string indexing`_ using strictly `ISO 8601-format`_ partial + datetime strings: + .. ipython:: python da.sel(time='0001') da.sel(time=slice('0001-05', '0002-02')) -access of basic datetime components via the ``dt`` accessor (in this case just -"year", "month", "day", "hour", "minute", "second", "microsecond", and "season"): +- Access of basic datetime components via the ``dt`` accessor (in this case + just "year", "month", "day", "hour", "minute", "second", "microsecond", and + "season"): .. ipython:: python @@ -275,14 +278,14 @@ access of basic datetime components via the ``dt`` accessor (in this case just da.time.dt.month da.time.dt.season -group-by operations based on datetime accessor attributes (e.g. by month of the -year): +- Group-by operations based on datetime accessor attributes (e.g. by month of + the year): .. ipython:: python da.groupby('time.month').sum() -and serialization: +- And serialization: .. ipython:: python From b03e38e41a6a735329600fad8f28d7f5e619b8ee Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 13 Apr 2018 09:40:45 -0400 Subject: [PATCH 33/58] Ensure proper enable_netcdftimeindex option is used under lazy decoding Prior to this, opening a dataset with enable_netcdftimeindex set to True and then accessing one of its variables outside the context manager would lead to it being decoded with the default enable_netcdftimeindex (which is False). This makes sure that lazy decoding takes into account the context under which it was called. --- xarray/coding/times.py | 22 +++++--- xarray/tests/test_coding_times.py | 94 +++++++++++++++++++------------ 2 files changed, 73 insertions(+), 43 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 9ee7e0df854..94b9d02c27c 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -100,7 +100,8 @@ def _unpack_netcdf_time_units(units): return delta_units, ref_date -def _decode_datetime_with_netcdftime(num_dates, units, calendar): +def _decode_datetime_with_netcdftime(num_dates, units, calendar, + enable_netcdftimeindex): nctime = _import_netcdftime() dates = np.asarray(nctime.num2date(num_dates, units, calendar)) @@ -112,7 +113,7 @@ def _decode_datetime_with_netcdftime(num_dates, units, calendar): 'netCDF4.datetime objects instead, reason: dates out ' 'of range', SerializationWarning, stacklevel=3) else: - if OPTIONS['enable_netcdftimeindex']: + if enable_netcdftimeindex: if calendar in _STANDARD_CALENDARS: dates = nctime_to_nptime(dates) else: @@ -136,7 +137,7 @@ def _decode_datetime_with_netcdftime(num_dates, units, calendar): return dates -def _decode_cf_datetime_dtype(data, units, calendar): +def _decode_cf_datetime_dtype(data, units, calendar, enable_netcdftimeindex): # Verify that at least the first and last date can be decoded # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. @@ -146,7 +147,8 @@ def _decode_cf_datetime_dtype(data, units, calendar): last_item(values) or [0]]) try: - result = decode_cf_datetime(example_value, units, calendar) + result = decode_cf_datetime(example_value, units, calendar, + enable_netcdftimeindex) except Exception: calendar_msg = ('the default calendar' if calendar is None else 'calendar %r' % calendar) @@ -162,7 +164,8 @@ def _decode_cf_datetime_dtype(data, units, calendar): return dtype -def decode_cf_datetime(num_dates, units, calendar=None): +def decode_cf_datetime(num_dates, units, calendar=None, + enable_netcdftimeindex=False): """Given an array of numeric dates in netCDF format, convert it into a numpy array of date time objects. @@ -213,7 +216,8 @@ def decode_cf_datetime(num_dates, units, calendar=None): except (OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_netcdftime( - flat_num_dates.astype(np.float), units, calendar) + flat_num_dates.astype(np.float), units, calendar, + enable_netcdftimeindex) return dates.reshape(num_dates.shape) @@ -414,9 +418,11 @@ def decode(self, variable, name=None): if 'units' in attrs and 'since' in attrs['units']: units = pop_to(attrs, encoding, 'units') calendar = pop_to(attrs, encoding, 'calendar') - dtype = _decode_cf_datetime_dtype(data, units, calendar) + dtype = _decode_cf_datetime_dtype( + data, units, calendar, OPTIONS['enable_netcdftimeindex']) transform = partial( - decode_cf_datetime, units=units, calendar=calendar) + decode_cf_datetime, units=units, calendar=calendar, + enable_netcdftimeindex=OPTIONS['enable_netcdftimeindex']) data = lazy_elemwise_func(data, transform, dtype) return Variable(dims, data, attrs, encoding) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 87dca4bca71..2bc0c388afa 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -7,7 +7,7 @@ import pandas as pd import pytest -from xarray import Variable, coding, set_options +from xarray import Variable, coding, set_options, DataArray, decode_cf from xarray.coding.times import _import_netcdftime from xarray.coding.variables import SerializationWarning @@ -169,9 +169,9 @@ def test_decode_standard_calendar_inside_timestamp_range( with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(noleap_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + noleap_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) assert actual.dtype == expected_dtype abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, @@ -201,9 +201,9 @@ def test_decode_non_standard_calendar_inside_timestamp_range( with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(noleap_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + noleap_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) assert actual.dtype == expected_dtype abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, @@ -229,9 +229,9 @@ def test_decode_dates_outside_timestamp_range( with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(noleap_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + noleap_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) assert all(isinstance(value, expected_date_type) for value in actual) abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, @@ -251,9 +251,9 @@ def test_decode_standard_calendar_single_element_inside_timestamp_range( with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(num_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) assert actual.dtype == np.dtype('M8[ns]') @@ -268,9 +268,9 @@ def test_decode_non_standard_calendar_single_element_inside_timestamp_range( with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(num_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) if enable_netcdftimeindex: assert actual.dtype == np.dtype('O') else: @@ -290,10 +290,9 @@ def test_decode_single_element_outside_timestamp_range( with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - with set_options( - enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime( - num_time, units, calendar=calendar) + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) expected = nctime.num2date(days, units, calendar) assert isinstance(actual.item(), type(expected)) @@ -320,9 +319,9 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range( expected1 = times1.values expected2 = times2.values - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(mdim_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + mdim_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) assert actual.dtype == np.dtype('M8[ns]') abs_diff1 = abs(actual[:, 0] - expected1) @@ -362,9 +361,10 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( expected2 = times2.values expected_dtype = np.dtype('M8[ns]') - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(mdim_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + mdim_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) + assert actual.dtype == expected_dtype abs_diff1 = abs(actual[:, 0] - expected1) abs_diff2 = abs(actual[:, 1] - expected2) @@ -398,9 +398,10 @@ def test_decode_multidim_time_outside_timestamp_range( with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(mdim_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + mdim_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) + assert actual.dtype == np.dtype('O') abs_diff1 = abs(actual[:, 0] - expected1) @@ -430,9 +431,9 @@ def test_decode_non_standard_calendar_single_element_fallback( num_time = nctime.date2num(dt, units, calendar) with pytest.warns(SerializationWarning, match='Unable to decode time axis'): - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(num_time, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) expected = np.asarray(nctime.num2date(num_time, units, calendar)) assert actual.dtype == np.dtype('O') assert expected == actual @@ -453,9 +454,9 @@ def test_decode_non_standard_calendar_fallback( with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): - actual = coding.times.decode_cf_datetime(num_times, units, - calendar=calendar) + actual = coding.times.decode_cf_datetime( + num_times, units, calendar=calendar, + enable_netcdftimeindex=enable_netcdftimeindex) assert len(w) == 1 assert 'Unable to decode time axis' in str(w[0].message) @@ -602,3 +603,26 @@ def test_format_netcdftime_datetime(date_args, expected): for date_type in date_types.values(): result = coding.times.format_netcdftime_datetime(date_type(*date_args)) assert result == expected + + +@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.parametrize( + ['calendar', 'enable_netcdftimeindex'], + product(_ALL_CALENDARS, [False, True])) +def test_decode_cf_enable_netcdftimeindex(calendar, enable_netcdftimeindex): + days = [1., 2., 3.] + da = DataArray(days, coords=[days], dims=['time'], name='test') + ds = da.to_dataset() + + for v in ['test', 'time']: + ds[v].attrs['units'] = 'days since 2000-01-01' + ds[v].attrs['calendar'] = calendar + + with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + ds = decode_cf(ds) + + if (enable_netcdftimeindex and + calendar not in coding.times._STANDARD_CALENDARS): + assert ds.test.dtype == np.dtype('O') + else: + assert ds.test.dtype == np.dtype('M8[ns]') From 890dde0574e3e9d87bcea7d436c2f2fb7e9d806f Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 13 Apr 2018 15:14:33 -0400 Subject: [PATCH 34/58] Add fix and test for concatenating variables with a NetCDFTimeIndex Previously when concatenating variables indexed by a NetCDFTimeIndex the index would be wrongly converted to a generic pd.Index --- xarray/core/utils.py | 3 +-- xarray/tests/test_netcdftimeindex.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index a94e96f5747..482d996f000 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -71,8 +71,7 @@ def safe_cast_to_index(array): if hasattr(array, 'dtype') and array.dtype.kind == 'O': kwargs['dtype'] = object index = pd.Index(np.asarray(array), **kwargs) - index = _maybe_cast_to_netcdftimeindex(index) - return index + return _maybe_cast_to_netcdftimeindex(index) def multiindex_from_product_levels(levels, names=None): diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index 5ff80896d31..b54bcd40eb6 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -506,3 +506,21 @@ def test_indexing_in_dataframe_iloc(df, index): expected = pd.DataFrame([1, 2], index=index[:2]) result = df.iloc[:2] assert result.equals(expected) + + +@pytest.mark.parametrize('enable_netcdftimeindex', [False, True]) +def test_concat_netcdftimeindex(date_type, enable_netcdftimeindex): + with xr.set_options(enable_netcdftimeindex=enable_netcdftimeindex): + da1 = xr.DataArray( + [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], + dims=['time']) + da2 = xr.DataArray( + [3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]], + dims=['time']) + da = xr.concat([da1, da2], dim='time') + + if enable_netcdftimeindex: + assert isinstance(da.indexes['time'], NetCDFTimeIndex) + else: + assert isinstance(da.indexes['time'], pd.Index) + assert not isinstance(da.indexes['time'], NetCDFTimeIndex) From 13c83582f17f56429f8798bd1ec7a35eab0e767c Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 16 Apr 2018 08:55:17 -0400 Subject: [PATCH 35/58] Further namespace changes due to netcdftime/cftime renaming --- xarray/backends/zarr.py | 4 ++-- xarray/coding/netcdftimeindex.py | 16 +++++++++------- xarray/coding/times.py | 21 ++++++++------------- xarray/core/common.py | 14 +++++++------- xarray/core/utils.py | 6 ++++-- xarray/tests/test_backends.py | 8 ++++---- xarray/tests/test_netcdftimeindex.py | 28 +++++++++++++++++----------- xarray/tests/test_utils.py | 10 +++++----- 8 files changed, 56 insertions(+), 51 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 8e5cb8b6089..16fd3ff59f8 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -8,7 +8,7 @@ from .. import Variable, coding, conventions from ..core import indexing -from ..core.common import contains_netcdftime_datetimes +from ..core.common import contains_cftime_datetimes from ..core.pycompat import OrderedDict, integer_types, iteritems from ..core.utils import FrozenOrderedDict, HiddenKeyDict from .common import AbstractWritableDataStore, ArrayWriter, BackendArray @@ -222,7 +222,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None): A variable which has been encoded as described above. """ - if var.dtype.kind == 'O' and not contains_netcdftime_datetimes(var): + if var.dtype.kind == 'O' and not contains_cftime_datetimes(var): raise NotImplementedError("Variable `%s` is an object. Zarr " "store can't yet encode objects." % name) diff --git a/xarray/coding/netcdftimeindex.py b/xarray/coding/netcdftimeindex.py index c695d8a2c16..d6c532642b1 100644 --- a/xarray/coding/netcdftimeindex.py +++ b/xarray/coding/netcdftimeindex.py @@ -120,19 +120,21 @@ def get_date_type(self): def assert_all_valid_date_type(data): - from netcdftime import ( - DatetimeJulian, DatetimeNoLeap, DatetimeAllLeap, - DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day) + try: + import cftime + except ImportError: + import netcdftime as cftime - valid_types = (DatetimeJulian, DatetimeNoLeap, DatetimeAllLeap, - DatetimeGregorian, DatetimeProlepticGregorian, - Datetime360Day) + valid_types = (cftime.DatetimeJulian, cftime.DatetimeNoLeap, + cftime.DatetimeAllLeap, cftime.DatetimeGregorian, + cftime.DatetimeProlepticGregorian, + cftime.Datetime360Day) sample = data[0] date_type = type(sample) if not isinstance(sample, valid_types): raise TypeError( - 'NetCDFTimeIndex requires netcdftime._netcdftime.datetime ' + 'NetCDFTimeIndex requires cftime.datetime ' 'or datetime.datetime objects. ' 'Got object of {}.'.format(date_type)) if not all(isinstance(value, date_type) for value in data): diff --git a/xarray/coding/times.py b/xarray/coding/times.py index c8fc022e653..54a9671d215 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -from ..core.common import contains_netcdftime_datetimes +from ..core.common import contains_cftime_datetimes from ..core import indexing from ..core.formatting import first_n_items, format_timestamp, last_item from ..core.options import OPTIONS @@ -26,7 +26,7 @@ from pandas.tslib import OutOfBoundsDatetime -# standard calendars recognized by netcdftime +# standard calendars recognized by cftime _STANDARD_CALENDARS = set(['standard', 'gregorian', 'proleptic_gregorian']) _NS_PER_TIME_DELTA = {'us': int(1e3), @@ -56,24 +56,19 @@ def _import_cftime(): return cftime -def _import_netcdftime_datetime(): - """Helper function to handle importing netcdftime.datetime across the - transition between the version of netcdftime packaged with netCDF4 and the +def _import_cftime_datetime(): + """Helper function to handle importing cftime.datetime across the + transition between the version of cftime packaged with netCDF4 and the standalone version""" try: - # Will raise an ImportError if not using standalone netcdftime - from netcdftime import num2date # noqa: F401 - - # Generic netcdftime datetime is exposed in the public API in the - # standalone version of netcdftime - from netcdftime import datetime + from cftime import datetime except ImportError: # Need to use private API to import generic netcdftime datetime in # older versions. See https://github.com/Unidata/netcdftime/issues/8 try: from netcdftime._netcdftime import datetime except ImportError: - raise ImportError("Failed to import netcdftime") + raise ImportError("Failed to import cftime.datetime") return datetime @@ -398,7 +393,7 @@ class CFDatetimeCoder(VariableCoder): def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) if (np.issubdtype(data.dtype, np.datetime64) or - contains_netcdftime_datetimes(variable)): + contains_cftime_datetimes(variable)): (data, units, calendar) = encode_cf_datetime( data, encoding.pop('units', None), diff --git a/xarray/core/common.py b/xarray/core/common.py index 3169f8385b8..061ad732530 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -877,19 +877,19 @@ def is_np_datetime_like(dtype): np.issubdtype(dtype, np.timedelta64)) -def contains_netcdftime_datetimes(var): - """Check if a variable contains netcdftime datetime objects""" - from ..coding.times import _import_netcdftime_datetime +def contains_cftime_datetimes(var): + """Check if a variable contains cftime datetime objects""" + from ..coding.times import _import_cftime_datetime try: - netcdftime_datetime = _import_netcdftime_datetime() + cftime_datetime = _import_cftime_datetime() except ImportError: return False else: - return isinstance(var.data.flatten()[0], netcdftime_datetime) + return isinstance(var.data.flatten()[0], cftime_datetime) def _contains_datetime_like_objects(var): """Check if a variable contains datetime like objects (either - np.datetime64, np.timedelta64, or netcdftime._netcdftime.datetime)""" - return is_np_datetime_like(var.dtype) or contains_netcdftime_datetimes(var) + np.datetime64, np.timedelta64, or cftime.datetime)""" + return is_np_datetime_like(var.dtype) or contains_cftime_datetimes(var) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 482d996f000..6fe0dbdcf71 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -38,17 +38,19 @@ def wrapper(*args, **kwargs): def _maybe_cast_to_netcdftimeindex(index): + from ..coding.times import _import_cftime_datetime + if not OPTIONS['enable_netcdftimeindex']: return index else: try: - from netcdftime._netcdftime import datetime as ncdatetime + cftime_datetime = _import_cftime_datetime() from ..coding.netcdftimeindex import NetCDFTimeIndex except ImportError: return index else: if len(index): - if isinstance(index[0], ncdatetime): + if isinstance(index[0], cftime_datetime): index = NetCDFTimeIndex(index) return index diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3a84c2541e1..4004e38437d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -350,9 +350,9 @@ def test_roundtrip_numpy_datetime_data(self): assert actual.t0.encoding['units'] == 'days since 1950-01-01' def test_roundtrip_netcdftime_datetime_data(self): - from .test_coding_times import _all_netcdftime_date_types + from .test_coding_times import _all_cftime_date_types - date_types = _all_netcdftime_date_types() + date_types = _all_cftime_date_types() for date_type in date_types.values(): times = [date_type(1, 1, 1), date_type(1, 1, 2)] expected = Dataset({'t': ('t', times), 't0': times[0]}) @@ -1925,9 +1925,9 @@ def test_roundtrip_numpy_datetime_data(self): def test_roundtrip_netcdftime_datetime_data(self): # Override method in DatasetIOTestCases - remove not applicable # save_kwds - from .test_coding_times import _all_netcdftime_date_types + from .test_coding_times import _all_cftime_date_types - date_types = _all_netcdftime_date_types() + date_types = _all_cftime_date_types() for date_type in date_types.values(): times = [date_type(1, 1, 1), date_type(1, 1, 2)] expected = Dataset({'t': ('t', times), 't0': times[0]}) diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_netcdftimeindex.py index b54bcd40eb6..4f886b27934 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_netcdftimeindex.py @@ -11,9 +11,17 @@ _parsed_string_to_bounds, _parse_iso8601_with_reso) from xarray.tests import assert_array_equal, assert_identical +from . import has_cftime_or_netCDF4 + # Putting this at the module level for now, though technically we # don't need netcdftime to test the string parser. -pytest.importorskip('netcdftime') +pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') + + +try: + import cftime +except ImportError: + import netcdftime as cftime def date_dict(year=None, month=None, day=None, @@ -57,15 +65,13 @@ def test_parse_iso8601(string, expected): parse_iso8601(string + '.3') -def netcdftime_date_types(): - from netcdftime import ( - DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, - DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day) - return [DatetimeNoLeap, DatetimeJulian, DatetimeAllLeap, - DatetimeGregorian, DatetimeProlepticGregorian, Datetime360Day] +def cftime_date_types(): + return [cftime.DatetimeNoLeap, cftime.DatetimeJulian, + cftime.DatetimeAllLeap, cftime.DatetimeGregorian, + cftime.DatetimeProlepticGregorian, cftime.Datetime360Day] -@pytest.fixture(params=netcdftime_date_types()) +@pytest.fixture(params=cftime_date_types()) def date_type(request): return request.param @@ -102,7 +108,7 @@ def df(index): @pytest.fixture def feb_days(date_type): - from netcdftime import DatetimeAllLeap, Datetime360Day + from cftime import DatetimeAllLeap, Datetime360Day if date_type is DatetimeAllLeap: return 29 elif date_type is Datetime360Day: @@ -113,7 +119,7 @@ def feb_days(date_type): @pytest.fixture def dec_days(date_type): - from netcdftime import Datetime360Day + from cftime import Datetime360Day if date_type is Datetime360Day: return 30 else: @@ -121,7 +127,7 @@ def dec_days(date_type): def test_assert_all_valid_date_type(date_type, index): - from netcdftime import DatetimeNoLeap, DatetimeAllLeap + from cftime import DatetimeNoLeap, DatetimeAllLeap if date_type is DatetimeNoLeap: mixed_date_types = [date_type(1, 1, 1), DatetimeAllLeap(1, 2, 1)] diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index da366eac6a5..6e934191ba8 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -9,9 +9,9 @@ from xarray.core import duck_array_ops, utils from xarray.core.options import set_options from xarray.core.pycompat import OrderedDict -from .test_coding_times import _all_netcdftime_date_types +from .test_coding_times import _all_cftime_date_types from . import (TestCase, requires_dask, assert_array_equal, - has_netcdftime) + has_cftime_or_netCDF4) class TestAlias(TestCase): @@ -39,10 +39,10 @@ def test_safe_cast_to_index(): assert expected.dtype == actual.dtype -@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize('enable_netcdftimeindex', [False, True]) def test_safe_cast_to_index_netcdftimeindex(enable_netcdftimeindex): - date_types = _all_netcdftime_date_types() + date_types = _all_cftime_date_types() for date_type in date_types.values(): dates = [date_type(1, 1, day) for day in range(1, 20)] if enable_netcdftimeindex: @@ -62,7 +62,7 @@ def test_safe_cast_to_index_netcdftimeindex(enable_netcdftimeindex): # Test that datetime.datetime objects are never used in a NetCDFTimeIndex -@pytest.mark.skipif(not has_netcdftime, reason='netcdftime not installed') +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize('enable_netcdftimeindex', [False, True]) def test_safe_cast_to_index_datetime_datetime(enable_netcdftimeindex): dates = [datetime(1, 1, day) for day in range(1, 20)] From ab46798098d288bec587f9ba260a7d49e582dd18 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 16 Apr 2018 09:22:10 -0400 Subject: [PATCH 36/58] NetCDFTimeIndex -> CFTimeIndex --- .../{netcdftimeindex.py => cftimeindex.py} | 12 +-- xarray/coding/times.py | 44 +++++----- xarray/core/accessors.py | 18 ++-- xarray/core/options.py | 4 +- xarray/core/utils.py | 10 +-- xarray/tests/test_backends.py | 4 +- ...netcdftimeindex.py => test_cftimeindex.py} | 22 ++--- xarray/tests/test_coding_times.py | 88 +++++++++---------- xarray/tests/test_utils.py | 22 ++--- 9 files changed, 112 insertions(+), 112 deletions(-) rename xarray/coding/{netcdftimeindex.py => cftimeindex.py} (96%) rename xarray/tests/{test_netcdftimeindex.py => test_cftimeindex.py} (96%) diff --git a/xarray/coding/netcdftimeindex.py b/xarray/coding/cftimeindex.py similarity index 96% rename from xarray/coding/netcdftimeindex.py rename to xarray/coding/cftimeindex.py index d6c532642b1..ac9cee4cb44 100644 --- a/xarray/coding/netcdftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -69,7 +69,7 @@ def _parse_iso8601_with_reso(date_type, timestr): def _parsed_string_to_bounds(date_type, resolution, parsed): """Generalization of pandas.tseries.index.DatetimeIndex._parsed_string_to_bounds - for use with non-standard calendars and netcdftime._netcdftime.datetime + for use with non-standard calendars and cftime.datetime objects. """ if resolution == 'year': @@ -134,16 +134,16 @@ def assert_all_valid_date_type(data): date_type = type(sample) if not isinstance(sample, valid_types): raise TypeError( - 'NetCDFTimeIndex requires cftime.datetime ' + 'CFTimeIndex requires cftime.datetime ' 'or datetime.datetime objects. ' 'Got object of {}.'.format(date_type)) if not all(isinstance(value, date_type) for value in data): raise TypeError( - 'NetCDFTimeIndex requires using datetime ' + 'CFTimeIndex requires using datetime ' 'objects of all the same type. Got\n{}.'.format(data)) -class NetCDFTimeIndex(pd.Index): +class CFTimeIndex(pd.Index): year = _field_accessor('year', 'The year of the datetime') month = _field_accessor('month', 'The month of the datetime') day = _field_accessor('day', 'The days of the datetime') @@ -164,14 +164,14 @@ def _partial_date_slice(self, resolution, parsed): """Adapted from pandas.tseries.index.DatetimeIndex._partial_date_slice - Note that when using a NetCDFTimeIndex, if a partial-date selection + Note that when using a CFTimeIndex, if a partial-date selection returns a single element, it will never be converted to a scalar coordinate; this is in slight contrast to the behavior when using a DatetimeIndex, which sometimes will return a DataArray with a scalar coordinate depending on the resolution of the datetimes used in defining the index. For example: - >>> from netcdftime import DatetimeNoLeap + >>> from cftime import DatetimeNoLeap >>> import pandas as pd >>> import xarray as xr >>> da = xr.DataArray([1, 2], diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 54a9671d215..a8bbc655e7b 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -63,8 +63,8 @@ def _import_cftime_datetime(): try: from cftime import datetime except ImportError: - # Need to use private API to import generic netcdftime datetime in - # older versions. See https://github.com/Unidata/netcdftime/issues/8 + # Need to use private API to import generic cftime datetime in + # older versions. See https://github.com/Unidata/cftime/issues/8 try: from netcdftime._netcdftime import datetime except ImportError: @@ -92,7 +92,7 @@ def _unpack_netcdf_time_units(units): def _decode_datetime_with_cftime(num_dates, units, calendar, - enable_netcdftimeindex): + enable_cftimeindex): cftime = _import_cftime() dates = np.asarray(cftime.num2date(num_dates, units, calendar)) @@ -104,7 +104,7 @@ def _decode_datetime_with_cftime(num_dates, units, calendar, 'cftime.datetime objects instead, reason: dates out ' 'of range', SerializationWarning, stacklevel=3) else: - if enable_netcdftimeindex: + if enable_cftimeindex: if calendar in _STANDARD_CALENDARS: dates = cftime_to_nptime(dates) else: @@ -113,7 +113,7 @@ def _decode_datetime_with_cftime(num_dates, units, calendar, 'objects, because dates are encoded using a ' 'non-standard calendar ({}). Using cftime.datetime ' 'objects instead. Time indexing will be done using a ' - 'NetCDFTimeIndex rather than ' + 'CFTimeIndex rather than ' 'a DatetimeIndex'.format(calendar), SerializationWarning, stacklevel=3) else: @@ -128,7 +128,7 @@ def _decode_datetime_with_cftime(num_dates, units, calendar, return dates -def _decode_cf_datetime_dtype(data, units, calendar, enable_netcdftimeindex): +def _decode_cf_datetime_dtype(data, units, calendar, enable_cftimeindex): # Verify that at least the first and last date can be decoded # successfully. Otherwise, tracebacks end up swallowed by # Dataset.__repr__ when users try to view their lazily decoded array. @@ -139,7 +139,7 @@ def _decode_cf_datetime_dtype(data, units, calendar, enable_netcdftimeindex): try: result = decode_cf_datetime(example_value, units, calendar, - enable_netcdftimeindex) + enable_cftimeindex) except Exception: calendar_msg = ('the default calendar' if calendar is None else 'calendar %r' % calendar) @@ -156,12 +156,12 @@ def _decode_cf_datetime_dtype(data, units, calendar, enable_netcdftimeindex): def decode_cf_datetime(num_dates, units, calendar=None, - enable_netcdftimeindex=False): + enable_cftimeindex=False): """Given an array of numeric dates in netCDF format, convert it into a numpy array of date time objects. For standard (Gregorian) calendars, this function uses vectorized - operations, which makes it much faster than netcdftime.num2date. In such a + operations, which makes it much faster than cftime.num2date. In such a case, the returned array will be of type np.datetime64. Note that time unit in `units` must not be smaller than microseconds and @@ -169,7 +169,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, See also -------- - netcdftime.num2date + cftime.num2date """ num_dates = np.asarray(num_dates) flat_num_dates = num_dates.ravel() @@ -187,7 +187,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, ref_date = pd.Timestamp(ref_date) except ValueError: # ValueError is raised by pd.Timestamp for non-ISO timestamp - # strings, in which case we fall back to using netcdftime + # strings, in which case we fall back to using cftime raise OutOfBoundsDatetime # fixes: https://github.com/pydata/pandas/issues/14068 @@ -208,7 +208,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, except (OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( flat_num_dates.astype(np.float), units, calendar, - enable_netcdftimeindex) + enable_cftimeindex) return dates.reshape(num_dates.shape) @@ -267,13 +267,13 @@ def infer_datetime_units(dates): dates = np.asarray(dates).ravel() unique_timedeltas = np.unique(pd.to_timedelta(np.diff(dates))) reference_date = dates[0] if len(dates) > 0 else '1970-01-01' - reference_date = format_netcdftime_datetime(reference_date) + reference_date = format_cftime_datetime(reference_date) units = _infer_time_units_from_diff(unique_timedeltas) return '%s since %s' % (units, reference_date) -def format_netcdftime_datetime(date): - """Converts a netcdftime.datetime object to a string with the format: +def format_cftime_datetime(date): + """Converts a cftime.datetime object to a string with the format: YYYY-MM-DD HH:MM:SS.UUUUUU """ return '{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}.{:06d}'.format( @@ -313,8 +313,8 @@ def _cleanup_netcdf_time_units(units): return units -def _encode_datetime_with_netcdftime(dates, units, calendar): - """Fallback method for encoding dates using netcdftime. +def _encode_datetime_with_cftime(dates, units, calendar): + """Fallback method for encoding dates using cftime. This method is more flexible than xarray's parsing using datetime64[ns] arrays but also slower because it loops over each element. @@ -346,7 +346,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): See also -------- - netcdftime.date2num + cftime.date2num """ dates = np.asarray(dates) @@ -361,7 +361,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): delta, ref_date = _unpack_netcdf_time_units(units) try: if calendar not in _STANDARD_CALENDARS or dates.dtype.kind == 'O': - # parse with netcdftime instead + # parse with cftime instead raise OutOfBoundsDatetime assert dates.dtype == 'datetime64[ns]' @@ -371,7 +371,7 @@ def encode_cf_datetime(dates, units=None, calendar=None): num = (dates - ref_date) / time_delta except (OutOfBoundsDatetime, OverflowError): - num = _encode_datetime_with_netcdftime(dates, units, calendar) + num = _encode_datetime_with_cftime(dates, units, calendar) num = cast_to_int_if_safe(num) return (num, units, calendar) @@ -410,10 +410,10 @@ def decode(self, variable, name=None): units = pop_to(attrs, encoding, 'units') calendar = pop_to(attrs, encoding, 'calendar') dtype = _decode_cf_datetime_dtype( - data, units, calendar, OPTIONS['enable_netcdftimeindex']) + data, units, calendar, OPTIONS['enable_cftimeindex']) transform = partial( decode_cf_datetime, units=units, calendar=calendar, - enable_netcdftimeindex=OPTIONS['enable_netcdftimeindex']) + enable_cftimeindex=OPTIONS['enable_cftimeindex']) data = lazy_elemwise_func(data, transform, dtype) return Variable(dims, data, attrs, encoding) diff --git a/xarray/core/accessors.py b/xarray/core/accessors.py index f26fc4c3d3f..76628cf3121 100644 --- a/xarray/core/accessors.py +++ b/xarray/core/accessors.py @@ -16,17 +16,17 @@ def _season_from_months(months): return seasons[(months // 3) % 4] -def _access_through_netcdftimeindex(values, name): - """Coerce an array of datetime-like values to a NetCDFTimeIndex +def _access_through_cftimeindex(values, name): + """Coerce an array of datetime-like values to a CFTimeIndex and access requested datetime component """ - from ..coding.netcdftimeindex import NetCDFTimeIndex - values_as_netcdftimeindex = NetCDFTimeIndex(values) + from ..coding.cftimeindex import CFTimeIndex + values_as_cftimeindex = CFTimeIndex(values) if name == 'season': - months = values_as_netcdftimeindex.month + months = values_as_cftimeindex.month field_values = _season_from_months(months) else: - field_values = getattr(values_as_netcdftimeindex, name) + field_values = getattr(values_as_cftimeindex, name) return field_values.reshape(values.shape) @@ -65,7 +65,7 @@ def _get_date_field(values, name, dtype): if is_np_datetime_like(values.dtype): access_method = _access_through_series else: - access_method = _access_through_netcdftimeindex + access_method = _access_through_cftimeindex if isinstance(values, dask_array_type): from dask.array import map_blocks @@ -130,7 +130,7 @@ class DatetimeAccessor(object): All of the pandas fields are accessible here. Note that these fields are not calendar-aware; if your datetimes are encoded with a non-Gregorian - calendar (e.g. a 360-day calendar) using netcdftime, then some fields like + calendar (e.g. a 360-day calendar) using cftime, then some fields like `dayofyear` may not be accurate. """ @@ -139,7 +139,7 @@ def __init__(self, xarray_obj): if not _contains_datetime_like_objects(xarray_obj): raise TypeError("'dt' accessor only available for " "DataArray with datetime64 timedelta64 dtype or " - "for arrays containing netcdftime datetime " + "for arrays containing cftime datetime " "objects.") self._obj = xarray_obj diff --git a/xarray/core/options.py b/xarray/core/options.py index 29a1dbe7f01..48d4567fc99 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -3,7 +3,7 @@ OPTIONS = { 'display_width': 80, 'arithmetic_join': 'inner', - 'enable_netcdftimeindex': False + 'enable_cftimeindex': False } @@ -16,7 +16,7 @@ class set_options(object): Default: ``80``. - ``arithmetic_join``: DataArray/Dataset alignment in binary operations. Default: ``'inner'``. - - ``enable_netcdftimeindex``: flag to enable using a ``NetCDFTimeIndex`` + - ``enable_cftimeindex``: flag to enable using a ``CFTimeIndex`` for time indexes with non-standard calendars or dates outside the Timestamp-valid range. Default: ``False``. diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 6fe0dbdcf71..82452271bb9 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -37,21 +37,21 @@ def wrapper(*args, **kwargs): return wrapper -def _maybe_cast_to_netcdftimeindex(index): +def _maybe_cast_to_cftimeindex(index): from ..coding.times import _import_cftime_datetime - if not OPTIONS['enable_netcdftimeindex']: + if not OPTIONS['enable_cftimeindex']: return index else: try: cftime_datetime = _import_cftime_datetime() - from ..coding.netcdftimeindex import NetCDFTimeIndex + from ..coding.cftimeindex import CFTimeIndex except ImportError: return index else: if len(index): if isinstance(index[0], cftime_datetime): - index = NetCDFTimeIndex(index) + index = CFTimeIndex(index) return index @@ -73,7 +73,7 @@ def safe_cast_to_index(array): if hasattr(array, 'dtype') and array.dtype.kind == 'O': kwargs['dtype'] = object index = pd.Index(np.asarray(array), **kwargs) - return _maybe_cast_to_netcdftimeindex(index) + return _maybe_cast_to_cftimeindex(index) def multiindex_from_product_levels(levels, names=None): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4004e38437d..81685a810d8 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -349,7 +349,7 @@ def test_roundtrip_numpy_datetime_data(self): assert_identical(expected, actual) assert actual.t0.encoding['units'] == 'days since 1950-01-01' - def test_roundtrip_netcdftime_datetime_data(self): + def test_roundtrip_cftime_datetime_data(self): from .test_coding_times import _all_cftime_date_types date_types = _all_cftime_date_types() @@ -1922,7 +1922,7 @@ def test_roundtrip_numpy_datetime_data(self): with self.roundtrip(expected) as actual: assert_identical(expected, actual) - def test_roundtrip_netcdftime_datetime_data(self): + def test_roundtrip_cftime_datetime_data(self): # Override method in DatasetIOTestCases - remove not applicable # save_kwds from .test_coding_times import _all_cftime_date_types diff --git a/xarray/tests/test_netcdftimeindex.py b/xarray/tests/test_cftimeindex.py similarity index 96% rename from xarray/tests/test_netcdftimeindex.py rename to xarray/tests/test_cftimeindex.py index 4f886b27934..0e269578670 100644 --- a/xarray/tests/test_netcdftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -6,8 +6,8 @@ import xarray as xr from datetime import timedelta -from xarray.coding.netcdftimeindex import ( - parse_iso8601, NetCDFTimeIndex, assert_all_valid_date_type, +from xarray.coding.cftimeindex import ( + parse_iso8601, CFTimeIndex, assert_all_valid_date_type, _parsed_string_to_bounds, _parse_iso8601_with_reso) from xarray.tests import assert_array_equal, assert_identical @@ -80,14 +80,14 @@ def date_type(request): def index(date_type): dates = [date_type(1, 1, 1), date_type(1, 2, 1), date_type(2, 1, 1), date_type(2, 2, 1)] - return NetCDFTimeIndex(dates) + return CFTimeIndex(dates) @pytest.fixture def monotonic_decreasing_index(date_type): dates = [date_type(2, 2, 1), date_type(2, 1, 1), date_type(1, 2, 1), date_type(1, 1, 1)] - return NetCDFTimeIndex(dates) + return CFTimeIndex(dates) @pytest.fixture @@ -150,7 +150,7 @@ def test_assert_all_valid_date_type(date_type, index): ('minute', [0, 0, 0, 0]), ('second', [0, 0, 0, 0]), ('microsecond', [0, 0, 0, 0])]) -def test_netcdftimeindex_field_accessors(index, field, expected): +def test_cftimeindex_field_accessors(index, field, expected): result = getattr(index, field) assert_array_equal(result, expected) @@ -514,9 +514,9 @@ def test_indexing_in_dataframe_iloc(df, index): assert result.equals(expected) -@pytest.mark.parametrize('enable_netcdftimeindex', [False, True]) -def test_concat_netcdftimeindex(date_type, enable_netcdftimeindex): - with xr.set_options(enable_netcdftimeindex=enable_netcdftimeindex): +@pytest.mark.parametrize('enable_cftimeindex', [False, True]) +def test_concat_cftimeindex(date_type, enable_cftimeindex): + with xr.set_options(enable_cftimeindex=enable_cftimeindex): da1 = xr.DataArray( [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], dims=['time']) @@ -525,8 +525,8 @@ def test_concat_netcdftimeindex(date_type, enable_netcdftimeindex): dims=['time']) da = xr.concat([da1, da2], dim='time') - if enable_netcdftimeindex: - assert isinstance(da.indexes['time'], NetCDFTimeIndex) + if enable_cftimeindex: + assert isinstance(da.indexes['time'], CFTimeIndex) else: assert isinstance(da.indexes['time'], pd.Index) - assert not isinstance(da.indexes['time'], NetCDFTimeIndex) + assert not isinstance(da.indexes['time'], CFTimeIndex) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 7727b1ed375..2da4bec4e60 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -135,7 +135,7 @@ def test_decode_cf_datetime_overflow(): def test_decode_cf_datetime_non_standard_units(): expected = pd.date_range(periods=100, start='1970-01-01', freq='h') # netCDFs from madis.noaa.gov use this format for their time units - # they cannot be parsed by netcdftime, but pd.Timestamp works + # they cannot be parsed by cftime, but pd.Timestamp works units = 'hours since 1-1-1970' actual = coding.times.decode_cf_datetime(np.arange(100), units) assert_array_equal(actual, expected) @@ -156,10 +156,10 @@ def test_decode_cf_datetime_non_iso_strings(): @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(coding.times._STANDARD_CALENDARS, [False, True])) def test_decode_standard_calendar_inside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): cftime = _import_cftime() units = 'days since 0001-01-01' times = pd.date_range('2001-04-01-00', end='2001-04-30-23', @@ -173,7 +173,7 @@ def test_decode_standard_calendar_inside_timestamp_range( warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = coding.times.decode_cf_datetime( noleap_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) assert actual.dtype == expected_dtype abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, @@ -184,17 +184,17 @@ def test_decode_standard_calendar_inside_timestamp_range( @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(_NON_STANDARD_CALENDARS, [False, True])) def test_decode_non_standard_calendar_inside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): cftime = _import_cftime() units = 'days since 0001-01-01' times = pd.date_range('2001-04-01-00', end='2001-04-30-23', freq='H') noleap_time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar) - if enable_netcdftimeindex: + if enable_cftimeindex: expected = cftime.num2date(noleap_time, units, calendar=calendar) expected_dtype = np.dtype('O') else: @@ -205,7 +205,7 @@ def test_decode_non_standard_calendar_inside_timestamp_range( warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = coding.times.decode_cf_datetime( noleap_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) assert actual.dtype == expected_dtype abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, @@ -216,10 +216,10 @@ def test_decode_non_standard_calendar_inside_timestamp_range( @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(_ALL_CALENDARS, [False, True])) def test_decode_dates_outside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): from datetime import datetime cftime = _import_cftime() @@ -233,7 +233,7 @@ def test_decode_dates_outside_timestamp_range( warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = coding.times.decode_cf_datetime( noleap_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) assert all(isinstance(value, expected_date_type) for value in actual) abs_diff = abs(actual - expected) # once we no longer support versions of netCDF4 older than 1.1.5, @@ -244,10 +244,10 @@ def test_decode_dates_outside_timestamp_range( @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(coding.times._STANDARD_CALENDARS, [False, True])) def test_decode_standard_calendar_single_element_inside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): units = 'days since 0001-01-01' for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): @@ -255,16 +255,16 @@ def test_decode_standard_calendar_single_element_inside_timestamp_range( 'Unable to decode time axis') actual = coding.times.decode_cf_datetime( num_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) assert actual.dtype == np.dtype('M8[ns]') @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(_NON_STANDARD_CALENDARS, [False, True])) def test_decode_non_standard_calendar_single_element_inside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): units = 'days since 0001-01-01' for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): @@ -272,8 +272,8 @@ def test_decode_non_standard_calendar_single_element_inside_timestamp_range( 'Unable to decode time axis') actual = coding.times.decode_cf_datetime( num_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) - if enable_netcdftimeindex: + enable_cftimeindex=enable_cftimeindex) + if enable_cftimeindex: assert actual.dtype == np.dtype('O') else: assert actual.dtype == np.dtype('M8[ns]') @@ -281,10 +281,10 @@ def test_decode_non_standard_calendar_single_element_inside_timestamp_range( @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(_NON_STANDARD_CALENDARS, [False, True])) def test_decode_single_element_outside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): cftime = _import_cftime() units = 'days since 0001-01-01' for days in [1, 1470376]: @@ -294,17 +294,17 @@ def test_decode_single_element_outside_timestamp_range( 'Unable to decode time axis') actual = coding.times.decode_cf_datetime( num_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) expected = cftime.num2date(days, units, calendar) assert isinstance(actual.item(), type(expected)) @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(coding.times._STANDARD_CALENDARS, [False, True])) def test_decode_standard_calendar_multidim_time_inside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): cftime = _import_cftime() units = 'days since 0001-01-01' @@ -323,7 +323,7 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range( actual = coding.times.decode_cf_datetime( mdim_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) assert actual.dtype == np.dtype('M8[ns]') abs_diff1 = abs(actual[:, 0] - expected1) @@ -337,10 +337,10 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range( @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(_NON_STANDARD_CALENDARS, [False, True])) def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): cftime = _import_cftime() units = 'days since 0001-01-01' @@ -354,7 +354,7 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( mdim_time[:, 0] = noleap_time1 mdim_time[:, 1] = noleap_time2 - if enable_netcdftimeindex: + if enable_cftimeindex: expected1 = cftime.num2date(noleap_time1, units, calendar) expected2 = cftime.num2date(noleap_time2, units, calendar) expected_dtype = np.dtype('O') @@ -365,7 +365,7 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( actual = coding.times.decode_cf_datetime( mdim_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) assert actual.dtype == expected_dtype abs_diff1 = abs(actual[:, 0] - expected1) @@ -379,10 +379,10 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(_ALL_CALENDARS, [False, True])) def test_decode_multidim_time_outside_timestamp_range( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): from datetime import datetime cftime = _import_cftime() @@ -402,7 +402,7 @@ def test_decode_multidim_time_outside_timestamp_range( warnings.filterwarnings('ignore', 'Unable to decode time axis') actual = coding.times.decode_cf_datetime( mdim_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) assert actual.dtype == np.dtype('O') @@ -417,10 +417,10 @@ def test_decode_multidim_time_outside_timestamp_range( @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(['360_day', 'all_leap', '366_day'], [False, True])) def test_decode_non_standard_calendar_single_element_fallback( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): cftime = _import_cftime() units = 'days since 0001-01-01' @@ -435,7 +435,7 @@ def test_decode_non_standard_calendar_single_element_fallback( match='Unable to decode time axis'): actual = coding.times.decode_cf_datetime( num_time, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) expected = np.asarray(cftime.num2date(num_time, units, calendar)) assert actual.dtype == np.dtype('O') assert expected == actual @@ -443,10 +443,10 @@ def test_decode_non_standard_calendar_single_element_fallback( @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(['360_day'], [False, True])) def test_decode_non_standard_calendar_fallback( - calendar, enable_netcdftimeindex): + calendar, enable_cftimeindex): cftime = _import_cftime() # ensure leap year doesn't matter for year in [2010, 2011, 2012, 2013, 2014]: @@ -458,7 +458,7 @@ def test_decode_non_standard_calendar_fallback( warnings.simplefilter('always') actual = coding.times.decode_cf_datetime( num_times, units, calendar=calendar, - enable_netcdftimeindex=enable_netcdftimeindex) + enable_cftimeindex=enable_cftimeindex) assert len(w) == 1 assert 'Unable to decode time axis' in str(w[0].message) @@ -515,7 +515,7 @@ def test_infer_datetime_units(dates, expected): @requires_cftime_or_netCDF4 -def test_infer_netcdftime_datetime_units(): +def test_infer_cftime_datetime_units(): date_types = _all_cftime_date_types() for date_type in date_types.values(): for dates, expected in [ @@ -599,18 +599,18 @@ def test_infer_timedelta_units(deltas, expected): '0100-02-03 04:05:06.000000'), ((1000, 2, 3, 4, 5, 6), '1000-02-03 04:05:06.000000')]) -def test_format_netcdftime_datetime(date_args, expected): +def test_format_cftime_datetime(date_args, expected): date_types = _all_cftime_date_types() for date_type in date_types.values(): - result = coding.times.format_netcdftime_datetime(date_type(*date_args)) + result = coding.times.format_cftime_datetime(date_type(*date_args)) assert result == expected @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( - ['calendar', 'enable_netcdftimeindex'], + ['calendar', 'enable_cftimeindex'], product(_ALL_CALENDARS, [False, True])) -def test_decode_cf_enable_netcdftimeindex(calendar, enable_netcdftimeindex): +def test_decode_cf_enable_cftimeindex(calendar, enable_cftimeindex): days = [1., 2., 3.] da = DataArray(days, coords=[days], dims=['time'], name='test') ds = da.to_dataset() @@ -619,10 +619,10 @@ def test_decode_cf_enable_netcdftimeindex(calendar, enable_netcdftimeindex): ds[v].attrs['units'] = 'days since 2000-01-01' ds[v].attrs['calendar'] = calendar - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + with set_options(enable_cftimeindex=enable_cftimeindex): ds = decode_cf(ds) - if (enable_netcdftimeindex and + if (enable_cftimeindex and calendar not in coding.times._STANDARD_CALENDARS): assert ds.test.dtype == np.dtype('O') else: diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 6e934191ba8..66d2250e6b8 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -5,7 +5,7 @@ import pytest from datetime import datetime -from xarray.coding.netcdftimeindex import NetCDFTimeIndex +from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import duck_array_ops, utils from xarray.core.options import set_options from xarray.core.pycompat import OrderedDict @@ -40,35 +40,35 @@ def test_safe_cast_to_index(): @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') -@pytest.mark.parametrize('enable_netcdftimeindex', [False, True]) -def test_safe_cast_to_index_netcdftimeindex(enable_netcdftimeindex): +@pytest.mark.parametrize('enable_cftimeindex', [False, True]) +def test_safe_cast_to_index_cftimeindex(enable_cftimeindex): date_types = _all_cftime_date_types() for date_type in date_types.values(): dates = [date_type(1, 1, day) for day in range(1, 20)] - if enable_netcdftimeindex: - expected = NetCDFTimeIndex(dates) + if enable_cftimeindex: + expected = CFTimeIndex(dates) else: expected = pd.Index(dates) - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + with set_options(enable_cftimeindex=enable_cftimeindex): actual = utils.safe_cast_to_index(np.array(dates)) assert_array_equal(expected, actual) assert expected.dtype == actual.dtype - if enable_netcdftimeindex: - assert isinstance(actual, NetCDFTimeIndex) + if enable_cftimeindex: + assert isinstance(actual, CFTimeIndex) else: assert isinstance(actual, pd.Index) # Test that datetime.datetime objects are never used in a NetCDFTimeIndex @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') -@pytest.mark.parametrize('enable_netcdftimeindex', [False, True]) -def test_safe_cast_to_index_datetime_datetime(enable_netcdftimeindex): +@pytest.mark.parametrize('enable_cftimeindex', [False, True]) +def test_safe_cast_to_index_datetime_datetime(enable_cftimeindex): dates = [datetime(1, 1, day) for day in range(1, 20)] expected = pd.Index(dates) - with set_options(enable_netcdftimeindex=enable_netcdftimeindex): + with set_options(enable_cftimeindex=enable_cftimeindex): actual = utils.safe_cast_to_index(np.array(dates)) assert_array_equal(expected, actual) assert isinstance(actual, pd.Index) From 67fd3356b7904a3235344976bb3587144c84159b Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 16 Apr 2018 09:30:38 -0400 Subject: [PATCH 37/58] Documentation updates --- doc/time-series.rst | 28 ++++++++++++++-------------- doc/whats-new.rst | 19 ++++++++++--------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/doc/time-series.rst b/doc/time-series.rst index 07805fdbb26..b3c69fc3f55 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -70,10 +70,10 @@ You can manual decode arrays in this form by passing a dataset to One unfortunate limitation of using ``datetime64[ns]`` is that it limits the native representation of dates to those that fall between the years 1678 and 2262. When a netCDF file contains dates outside of these bounds, dates will be -returned as arrays of ``netcdftime.datetime`` objects and a ``NetCDFTimeIndex`` -can be used for indexing. The ``NetCDFTimeIndex`` enables only a subset of +returned as arrays of ``cftime.datetime`` objects and a ``CFTimeIndex`` +can be used for indexing. The ``CFTimeIndex`` enables only a subset of the indexing functionality of a ``pandas.DatetimeIndex``. See -:ref:`NetCDFTimeIndex` for more information. +:ref:`CFTimeIndex` for more information. Datetime indexing ----------------- @@ -212,41 +212,41 @@ For more examples of using grouped operations on a time dimension, see :ref:`toy weather data`. -.. _NetCDFTimeIndex: +.. _CFTimeIndex: Non-standard calendars and dates outside the Timestamp-valid range ------------------------------------------------------------------ -Through the optional ``netcdftime`` library and a custom subclass of +Through the optional ``cftime`` library and a custom subclass of ``pandas.Index``, xarray supports a subset of the indexing functionality enabled through the standard ``pandas.DatetimeIndex`` for dates from non-standard calendars or dates using a standard calendar, but outside the `Timestamp-valid range`_ (approximately between years 1678 and 2262). This behavior has not yet been turned on by default; to take advantage of this -functionality, you must have the ``enable_netcdftimeindex`` option set to +functionality, you must have the ``enable_cftimeindex`` option set to ``True`` within your context (see :py:func:`~xarray.set_options` for more information). It is expected that this will become the default behavior in xarray version 0.11. For instance, you can create a DataArray indexed by a time coordinate with a no-leap calendar within a context manager setting the -``enable_netcdftimeindex`` option, and the time index will be cast to a -``NetCDFTimeIndex``: +``enable_cftimeindex`` option, and the time index will be cast to a +``CFTimeIndex``: .. ipython:: python from itertools import product - from netcdftime import DatetimeNoLeap + from cftime import DatetimeNoLeap dates = [DatetimeNoLeap(year, month, 1) for year, month in product(range(1, 3), range(1, 13))] - with xr.set_options(enable_netcdftimeindex=True): + with xr.set_options(enable_cftimeindex=True): da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') .. note:: - With the ``enable_netcdftimeindex`` option activated, a ``NetCDFTimeIndex`` + With the ``enable_cftimeindex`` option activated, a ``CFTimeIndex`` will be used for time indexing if any of the following are true: - The dates are from a non-standard calendar @@ -254,11 +254,11 @@ coordinate with a no-leap calendar within a context manager setting the Otherwise a ``pandas.DatetimeIndex`` will be used. In addition, if any variable (not just an index variable) is encoded using a non-standard - calendar, its times will be decoded into ``netcdftime.datetime`` objects, + calendar, its times will be decoded into ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. -For data indexed by a ``NetCDFTimeIndex`` xarray currently supports: +For data indexed by a ``CFTimeIndex`` xarray currently supports: - `Partial datetime string indexing`_ using strictly `ISO 8601-format`_ partial datetime strings: @@ -295,7 +295,7 @@ For data indexed by a ``NetCDFTimeIndex`` xarray currently supports: .. note:: Currently resampling along the time dimension for data indexed by a - ``NetCDFTimeIndex`` is not supported. + ``CFTimeIndex`` is not supported. .. _Timestamp-valid range: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#timestamp-limitations .. _ISO 8601-format: https://en.wikipedia.org/wiki/ISO_8601 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 96a1a36e766..4a469e92788 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,16 @@ v0.10.4 (unreleased) Enhancements ~~~~~~~~~~~~ +- Add an option for using a ``CFTimeIndex`` for indexing times with + non-standard calendars and/or outside the Timestamp-valid range; this index + enables a subset of the functionality of a standard + ``pandas.DatetimeIndex`` (:issue:`789`, :issue:`1084`, :issue:`1252`). + By `Spencer Clark `_ with help from + `Stephan Hoyer `_. +- Allow for serialization of ``cftime.datetime`` objects (:issue:`789`, + :issue:`1084`, :issue:`2008`, :issue:`1252`). By `Spencer Clark + `_. + Bug fixes ~~~~~~~~~ @@ -48,15 +58,6 @@ The minor release includes a number of bug-fixes and backwards compatible enhanc Enhancements ~~~~~~~~~~~~ -- Add an option for using a ``NetCDFTimeIndex`` for indexing times with - non-standard calendars and/or outside the Timestamp-valid range; this index - enables a subset of the functionality of a standard - ``pandas.DatetimeIndex`` (:issue:`789`, :issue:`1084`, :issue:`1252`). - By `Spencer Clark `_ with help from - `Stephan Hoyer `_. -- Allow for serialization of ``netcdftime.datetime`` objects (:issue:`789`, - :issue:`1084`, :issue:`1252`). By `Spencer Clark - `_. - Some speed improvement to construct :py:class:`~xarray.DataArrayRolling` object (:issue:`1993`) By `Keisuke Fujii `_. From 7041a8dc555b1f0c900bbaa448fd0816a46cf579 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 16 Apr 2018 13:36:50 -0400 Subject: [PATCH 38/58] Only allow use of CFTimeIndex when using the standalone cftime Also only allow for serialization of cftime.datetime objects when using the standalone cftime package. --- doc/time-series.rst | 7 ++-- doc/whats-new.rst | 3 +- xarray/coding/times.py | 23 +++++++++++-- xarray/core/utils.py | 4 ++- xarray/tests/test_backends.py | 37 ++++++++++++--------- xarray/tests/test_cftimeindex.py | 54 +++++++++++++++++-------------- xarray/tests/test_coding_times.py | 29 +++++++++++------ xarray/tests/test_utils.py | 36 +++++++++++++-------- 8 files changed, 124 insertions(+), 69 deletions(-) diff --git a/doc/time-series.rst b/doc/time-series.rst index b3c69fc3f55..5b857789629 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -72,8 +72,9 @@ native representation of dates to those that fall between the years 1678 and 2262. When a netCDF file contains dates outside of these bounds, dates will be returned as arrays of ``cftime.datetime`` objects and a ``CFTimeIndex`` can be used for indexing. The ``CFTimeIndex`` enables only a subset of -the indexing functionality of a ``pandas.DatetimeIndex``. See -:ref:`CFTimeIndex` for more information. +the indexing functionality of a ``pandas.DatetimeIndex`` and is only enabled +when using standalone version of ``cftime`` (not the version packaged with +earlier versions ``netCDF4``). See :ref:`CFTimeIndex` for more information. Datetime indexing ----------------- @@ -217,7 +218,7 @@ For more examples of using grouped operations on a time dimension, see Non-standard calendars and dates outside the Timestamp-valid range ------------------------------------------------------------------ -Through the optional ``cftime`` library and a custom subclass of +Through the standalone ``cftime`` library and a custom subclass of ``pandas.Index``, xarray supports a subset of the indexing functionality enabled through the standard ``pandas.DatetimeIndex`` for dates from non-standard calendars or dates using a standard calendar, but outside the diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4a469e92788..9caa2b251ba 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,7 +41,8 @@ Enhancements By `Spencer Clark `_ with help from `Stephan Hoyer `_. - Allow for serialization of ``cftime.datetime`` objects (:issue:`789`, - :issue:`1084`, :issue:`2008`, :issue:`1252`). By `Spencer Clark + :issue:`1084`, :issue:`2008`, :issue:`1252`) using the standalone ``cftime`` + library. By `Spencer Clark `_. Bug fixes diff --git a/xarray/coding/times.py b/xarray/coding/times.py index a8bbc655e7b..19035246d18 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -72,6 +72,18 @@ def _import_cftime_datetime(): return datetime +def _require_standalone_cftime(message=None): + """Raises an ImportError if the standalone cftime is not found""" + try: + import cftime + except ImportError: + if message: + raise ImportError(message) + else: + raise ImportError('Using a CFTimeIndex requires the standalone ' + 'version of the cftime library.') + + def _netcdf_to_numpy_timeunit(units): units = units.lower() if not units.endswith('s'): @@ -264,6 +276,9 @@ def infer_datetime_units(dates): reference_date = dates[0] if len(dates) > 0 else '1970-01-01' reference_date = pd.Timestamp(reference_date) else: + _require_standalone_cftime( + 'Serializing dates of type cftime.datetime ' + 'requires the standalone cftime library.') dates = np.asarray(dates).ravel() unique_timedeltas = np.unique(pd.to_timedelta(np.diff(dates))) reference_date = dates[0] if len(dates) > 0 else '1970-01-01' @@ -406,14 +421,18 @@ def encode(self, variable, name=None): def decode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_decoding(variable) + enable_cftimeindex = OPTIONS['enable_cftimeindex'] + if enable_cftimeindex: + _require_standalone_cftime() + if 'units' in attrs and 'since' in attrs['units']: units = pop_to(attrs, encoding, 'units') calendar = pop_to(attrs, encoding, 'calendar') dtype = _decode_cf_datetime_dtype( - data, units, calendar, OPTIONS['enable_cftimeindex']) + data, units, calendar, enable_cftimeindex) transform = partial( decode_cf_datetime, units=units, calendar=calendar, - enable_cftimeindex=OPTIONS['enable_cftimeindex']) + enable_cftimeindex=enable_cftimeindex) data = lazy_elemwise_func(data, transform, dtype) return Variable(dims, data, attrs, encoding) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 82452271bb9..c77cb7b5705 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -38,11 +38,13 @@ def wrapper(*args, **kwargs): def _maybe_cast_to_cftimeindex(index): - from ..coding.times import _import_cftime_datetime + from ..coding.times import (_import_cftime_datetime, + _require_standalone_cftime) if not OPTIONS['enable_cftimeindex']: return index else: + _require_standalone_cftime() try: cftime_datetime = _import_cftime_datetime() from ..coding.cftimeindex import CFTimeIndex diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 81685a810d8..15e5ced6086 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -32,7 +32,7 @@ assert_identical, flaky, has_netCDF4, has_scipy, network, raises_regex, requires_dask, requires_h5netcdf, requires_netCDF4, requires_pathlib, requires_pydap, requires_pynio, requires_rasterio, requires_scipy, - requires_scipy_or_netCDF4, requires_zarr) + requires_scipy_or_netCDF4, requires_zarr, has_cftime) from .test_dataset import create_test_data try: @@ -361,21 +361,26 @@ def test_roundtrip_cftime_datetime_data(self): expected_decoded_t0 = np.array([date_type(1, 1, 1)]) expected_calendar = times[0].calendar - with self.roundtrip(expected, save_kwargs=kwds) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - self.assertEquals(actual.t.encoding['units'], - 'days since 0001-01-01 00:00:00.000000') - self.assertEquals(actual.t.encoding['calendar'], - expected_calendar) - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - self.assertEquals(actual.t0.encoding['units'], - 'days since 0001-01-01') - self.assertEquals(actual.t.encoding['calendar'], - expected_calendar) + if has_cftime: + with self.roundtrip(expected, save_kwargs=kwds) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + self.assertEquals(actual.t.encoding['units'], + 'days since 0001-01-01 00:00:00.000000') + self.assertEquals(actual.t.encoding['calendar'], + expected_calendar) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + self.assertEquals(actual.t0.encoding['units'], + 'days since 0001-01-01') + self.assertEquals(actual.t.encoding['calendar'], + expected_calendar) + else: + with pytest.raises(ImportError): + with self.roundtrip(expected, save_kwargs=kwds) as actual: + pass def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 0e269578670..d4b80254932 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -11,7 +11,7 @@ _parsed_string_to_bounds, _parse_iso8601_with_reso) from xarray.tests import assert_array_equal, assert_identical -from . import has_cftime_or_netCDF4 +from . import has_cftime_or_netCDF4, has_cftime # Putting this at the module level for now, though technically we # don't need netcdftime to test the string parser. @@ -108,10 +108,9 @@ def df(index): @pytest.fixture def feb_days(date_type): - from cftime import DatetimeAllLeap, Datetime360Day - if date_type is DatetimeAllLeap: + if date_type is cftime.DatetimeAllLeap: return 29 - elif date_type is Datetime360Day: + elif date_type is cftime.Datetime360Day: return 30 else: return 28 @@ -119,20 +118,19 @@ def feb_days(date_type): @pytest.fixture def dec_days(date_type): - from cftime import Datetime360Day - if date_type is Datetime360Day: + if date_type is cftime.Datetime360Day: return 30 else: return 31 def test_assert_all_valid_date_type(date_type, index): - from cftime import DatetimeNoLeap, DatetimeAllLeap - - if date_type is DatetimeNoLeap: - mixed_date_types = [date_type(1, 1, 1), DatetimeAllLeap(1, 2, 1)] + if date_type is cftime.DatetimeNoLeap: + mixed_date_types = [date_type(1, 1, 1), + cftime.DatetimeAllLeap(1, 2, 1)] else: - mixed_date_types = [date_type(1, 1, 1), DatetimeNoLeap(1, 2, 1)] + mixed_date_types = [date_type(1, 1, 1), + cftime.DatetimeNoLeap(1, 2, 1)] with pytest.raises(TypeError): assert_all_valid_date_type(mixed_date_types) @@ -516,17 +514,25 @@ def test_indexing_in_dataframe_iloc(df, index): @pytest.mark.parametrize('enable_cftimeindex', [False, True]) def test_concat_cftimeindex(date_type, enable_cftimeindex): - with xr.set_options(enable_cftimeindex=enable_cftimeindex): - da1 = xr.DataArray( - [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], - dims=['time']) - da2 = xr.DataArray( - [3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]], - dims=['time']) - da = xr.concat([da1, da2], dim='time') - - if enable_cftimeindex: - assert isinstance(da.indexes['time'], CFTimeIndex) + if not has_cftime and enable_cftimeindex: + with pytest.raises(ImportError): + with xr.set_options(enable_cftimeindex=enable_cftimeindex): + da1 = xr.DataArray( + [1., 2.], + coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], + dims=['time']) else: - assert isinstance(da.indexes['time'], pd.Index) - assert not isinstance(da.indexes['time'], CFTimeIndex) + with xr.set_options(enable_cftimeindex=enable_cftimeindex): + da1 = xr.DataArray( + [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], + dims=['time']) + da2 = xr.DataArray( + [3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]], + dims=['time']) + da = xr.concat([da1, da2], dim='time') + + if enable_cftimeindex: + assert isinstance(da.indexes['time'], CFTimeIndex) + else: + assert isinstance(da.indexes['time'], pd.Index) + assert not isinstance(da.indexes['time'], CFTimeIndex) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 2da4bec4e60..cb786df7d9c 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -12,7 +12,7 @@ from xarray.coding.variables import SerializationWarning from . import (assert_array_equal, has_cftime_or_netCDF4, - requires_cftime_or_netCDF4) + requires_cftime_or_netCDF4, has_cftime) _NON_STANDARD_CALENDARS = {'noleap', '365_day', '360_day', @@ -213,6 +213,7 @@ def test_decode_non_standard_calendar_inside_timestamp_range( # https://github.com/Unidata/netcdf4-python/issues/355 assert (abs_diff <= np.timedelta64(1, 's')).all() + @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( @@ -334,6 +335,7 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range( assert (abs_diff1 <= np.timedelta64(1, 's')).all() assert (abs_diff2 <= np.timedelta64(1, 's')).all() + @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( @@ -532,7 +534,11 @@ def test_infer_cftime_datetime_units(): ([date_type(1900, 1, 1), date_type(1900, 1, 2, 0, 0, 0, 5)], 'days since 1900-01-01 00:00:00.000000')]: - assert expected == coding.times.infer_datetime_units(dates) + if has_cftime: + assert expected == coding.times.infer_datetime_units(dates) + else: + with pytest.raises(ImportError): + coding.times.infer_datetime_units(dates) @pytest.mark.parametrize( @@ -619,11 +625,16 @@ def test_decode_cf_enable_cftimeindex(calendar, enable_cftimeindex): ds[v].attrs['units'] = 'days since 2000-01-01' ds[v].attrs['calendar'] = calendar - with set_options(enable_cftimeindex=enable_cftimeindex): - ds = decode_cf(ds) - - if (enable_cftimeindex and - calendar not in coding.times._STANDARD_CALENDARS): - assert ds.test.dtype == np.dtype('O') + if not has_cftime and enable_cftimeindex: + with pytest.raises(ImportError): + with set_options(enable_cftimeindex=enable_cftimeindex): + ds = decode_cf(ds) else: - assert ds.test.dtype == np.dtype('M8[ns]') + with set_options(enable_cftimeindex=enable_cftimeindex): + ds = decode_cf(ds) + + if (enable_cftimeindex and + calendar not in coding.times._STANDARD_CALENDARS): + assert ds.test.dtype == np.dtype('O') + else: + assert ds.test.dtype == np.dtype('M8[ns]') diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 66d2250e6b8..2862f17147e 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -11,7 +11,7 @@ from xarray.core.pycompat import OrderedDict from .test_coding_times import _all_cftime_date_types from . import (TestCase, requires_dask, assert_array_equal, - has_cftime_or_netCDF4) + has_cftime_or_netCDF4, has_cftime) class TestAlias(TestCase): @@ -50,15 +50,20 @@ def test_safe_cast_to_index_cftimeindex(enable_cftimeindex): else: expected = pd.Index(dates) - with set_options(enable_cftimeindex=enable_cftimeindex): - actual = utils.safe_cast_to_index(np.array(dates)) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype - - if enable_cftimeindex: - assert isinstance(actual, CFTimeIndex) + if not has_cftime and enable_cftimeindex: + with pytest.raises(ImportError): + with set_options(enable_cftimeindex=enable_cftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) else: - assert isinstance(actual, pd.Index) + with set_options(enable_cftimeindex=enable_cftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + + if enable_cftimeindex: + assert isinstance(actual, CFTimeIndex) + else: + assert isinstance(actual, pd.Index) # Test that datetime.datetime objects are never used in a NetCDFTimeIndex @@ -68,10 +73,15 @@ def test_safe_cast_to_index_datetime_datetime(enable_cftimeindex): dates = [datetime(1, 1, day) for day in range(1, 20)] expected = pd.Index(dates) - with set_options(enable_cftimeindex=enable_cftimeindex): - actual = utils.safe_cast_to_index(np.array(dates)) - assert_array_equal(expected, actual) - assert isinstance(actual, pd.Index) + if not has_cftime and enable_cftimeindex: + with pytest.raises(ImportError): + with set_options(enable_cftimeindex=enable_cftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) + else: + with set_options(enable_cftimeindex=enable_cftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) + assert_array_equal(expected, actual) + assert isinstance(actual, pd.Index) def test_multiindex_from_product_levels(): From 9df4e11954dde858fef0ed9bf6815e203d35d58a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 16 Apr 2018 13:41:21 -0400 Subject: [PATCH 39/58] Fix errant what's new changes --- doc/whats-new.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9caa2b251ba..d2cb1813eb4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -59,9 +59,6 @@ The minor release includes a number of bug-fixes and backwards compatible enhanc Enhancements ~~~~~~~~~~~~ - - Some speed improvement to construct :py:class:`~xarray.DataArrayRolling` - object (:issue:`1993`) - By `Keisuke Fujii `_. - :py:meth:`~xarray.DataArray.isin` and :py:meth:`~xarray.Dataset.isin` methods, which test each value in the array for whether it is contained in the supplied list, returning a bool array. See :ref:`selecting values with isin` From 9391463eea5c7124d081550235fd16d0a5fb0f30 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 16 Apr 2018 13:45:42 -0400 Subject: [PATCH 40/58] flake8 --- xarray/coding/times.py | 2 +- xarray/tests/test_coding_times.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 19035246d18..7bf663e23e7 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -75,7 +75,7 @@ def _import_cftime_datetime(): def _require_standalone_cftime(message=None): """Raises an ImportError if the standalone cftime is not found""" try: - import cftime + import cftime # noqa: F401 except ImportError: if message: raise ImportError(message) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index cb786df7d9c..16935693d06 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -213,7 +213,6 @@ def test_decode_non_standard_calendar_inside_timestamp_range( # https://github.com/Unidata/netcdf4-python/issues/355 assert (abs_diff <= np.timedelta64(1, 's')).all() - @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( @@ -335,7 +334,6 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range( assert (abs_diff1 <= np.timedelta64(1, 's')).all() assert (abs_diff2 <= np.timedelta64(1, 's')).all() - @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize( From da12ecdc1b66afcfca167d7eef203ea9ec759efd Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 16 Apr 2018 14:28:26 -0400 Subject: [PATCH 41/58] Fix skip logic in test_cftimeindex.py --- xarray/tests/test_cftimeindex.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index d4b80254932..d20950e3e40 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -13,15 +13,19 @@ from . import has_cftime_or_netCDF4, has_cftime -# Putting this at the module level for now, though technically we -# don't need netcdftime to test the string parser. -pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +if not has_cftime_or_netCDF4: + pytest.importorskip('cftime') -try: - import cftime -except ImportError: - import netcdftime as cftime + +def _import_cftime(): + """Import cftime or netcdftime depending on what's available. + Note this is different than coding.times._import_cftime.""" + try: + import cftime + except ImportError: + import netcdftime as cftime + return cftime def date_dict(year=None, month=None, day=None, @@ -66,6 +70,7 @@ def test_parse_iso8601(string, expected): def cftime_date_types(): + cftime = _import_cftime() return [cftime.DatetimeNoLeap, cftime.DatetimeJulian, cftime.DatetimeAllLeap, cftime.DatetimeGregorian, cftime.DatetimeProlepticGregorian, cftime.Datetime360Day] @@ -108,6 +113,7 @@ def df(index): @pytest.fixture def feb_days(date_type): + cftime = _import_cftime() if date_type is cftime.DatetimeAllLeap: return 29 elif date_type is cftime.Datetime360Day: @@ -118,6 +124,7 @@ def feb_days(date_type): @pytest.fixture def dec_days(date_type): + cftime = _import_cftime() if date_type is cftime.Datetime360Day: return 30 else: @@ -125,6 +132,7 @@ def dec_days(date_type): def test_assert_all_valid_date_type(date_type, index): + cftime = _import_cftime() if date_type is cftime.DatetimeNoLeap: mixed_date_types = [date_type(1, 1, 1), cftime.DatetimeAllLeap(1, 2, 1)] From a6997ec8b9fc33a05cc88fd678c4049055fa5a31 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 26 Apr 2018 15:38:12 -0400 Subject: [PATCH 42/58] Use only_use_cftime_datetimes option in num2date --- xarray/coding/times.py | 23 ++++---- xarray/tests/test_backends.py | 92 ++++++++++++++++++++++--------- xarray/tests/test_coding_times.py | 66 +++++++++++++++++++--- 3 files changed, 137 insertions(+), 44 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 7bf663e23e7..9a5ad691e81 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -72,16 +72,13 @@ def _import_cftime_datetime(): return datetime -def _require_standalone_cftime(message=None): +def _require_standalone_cftime(): """Raises an ImportError if the standalone cftime is not found""" try: import cftime # noqa: F401 except ImportError: - if message: - raise ImportError(message) - else: - raise ImportError('Using a CFTimeIndex requires the standalone ' - 'version of the cftime library.') + raise ImportError('Using a CFTimeIndex requires the standalone ' + 'version of the cftime library.') def _netcdf_to_numpy_timeunit(units): @@ -106,7 +103,11 @@ def _unpack_netcdf_time_units(units): def _decode_datetime_with_cftime(num_dates, units, calendar, enable_cftimeindex): cftime = _import_cftime() - dates = np.asarray(cftime.num2date(num_dates, units, calendar)) + if enable_cftimeindex: + dates = np.asarray(cftime.num2date(num_dates, units, calendar, + only_use_cftime_datetimes=True)) + else: + dates = np.asarray(cftime.num2date(num_dates, units, calendar)) if (dates[np.nanargmin(num_dates)].year < 1678 or dates[np.nanargmax(num_dates)].year >= 2262): @@ -276,9 +277,11 @@ def infer_datetime_units(dates): reference_date = dates[0] if len(dates) > 0 else '1970-01-01' reference_date = pd.Timestamp(reference_date) else: - _require_standalone_cftime( - 'Serializing dates of type cftime.datetime ' - 'requires the standalone cftime library.') + if not OPTIONS['enable_cftimeindex']: + raise ValueError('Serializing dates of type cftime.datetime ' + 'requires setting enable_cftimeindex to True and ' + 'using the standalone cftime library to enable ' + 'accurate roundtripping of date types.') dates = np.asarray(dates).ravel() unique_timedeltas = np.unique(pd.to_timedelta(np.diff(dates))) reference_date = dates[0] if len(dates) > 0 else '1970-01-01' diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 15e5ced6086..284c205af68 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -32,7 +32,8 @@ assert_identical, flaky, has_netCDF4, has_scipy, network, raises_regex, requires_dask, requires_h5netcdf, requires_netCDF4, requires_pathlib, requires_pydap, requires_pynio, requires_rasterio, requires_scipy, - requires_scipy_or_netCDF4, requires_zarr, has_cftime) + requires_scipy_or_netCDF4, requires_zarr, has_cftime, + requires_cftime_or_netCDF4) from .test_dataset import create_test_data try: @@ -349,9 +350,10 @@ def test_roundtrip_numpy_datetime_data(self): assert_identical(expected, actual) assert actual.t0.encoding['units'] == 'days since 1950-01-01' - def test_roundtrip_cftime_datetime_data(self): + @requires_cftime_or_netCDF4 + def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): from .test_coding_times import _all_cftime_date_types - + date_types = _all_cftime_date_types() for date_type in date_types.values(): times = [date_type(1, 1, 1), date_type(1, 1, 2)] @@ -362,24 +364,43 @@ def test_roundtrip_cftime_datetime_data(self): expected_calendar = times[0].calendar if has_cftime: - with self.roundtrip(expected, save_kwargs=kwds) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - self.assertEquals(actual.t.encoding['units'], - 'days since 0001-01-01 00:00:00.000000') - self.assertEquals(actual.t.encoding['calendar'], - expected_calendar) - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) - - self.assertEquals(actual.t0.encoding['units'], - 'days since 0001-01-01') - self.assertEquals(actual.t.encoding['calendar'], - expected_calendar) + with xr.set_options(enable_cftimeindex=True): + with self.roundtrip(expected, save_kwargs=kwds) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + assert (abs_diff <= np.timedelta64(1, 's')).all() + assert (actual.t.encoding['units'] == + 'days since 0001-01-01 00:00:00.000000') + assert (actual.t.encoding['calendar'] == + expected_calendar) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + assert (abs_diff <= np.timedelta64(1, 's')).all() + assert (actual.t0.encoding['units'] == + 'days since 0001-01-01') + assert (actual.t.encoding['calendar'] == + expected_calendar) else: with pytest.raises(ImportError): - with self.roundtrip(expected, save_kwargs=kwds) as actual: + with xr.set_options(enable_cftimeindex=True): + with self.roundtrip(expected, + save_kwargs=kwds) as actual: + pass + + @requires_cftime_or_netCDF4 + def test_roundtrip_cftime_datetime_data_disable_cftimeindex(self): + from .test_coding_times import _all_cftime_date_types + + date_types = _all_cftime_date_types() + for date_type in date_types.values(): + times = [date_type(1, 1, 1), date_type(1, 1, 2)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} + + with pytest.raises(ValueError): + with xr.set_options(enable_cftimeindex=False): + with self.roundtrip( + expected, + save_kwargs=kwds) as actual: # noqa: F841 pass def test_roundtrip_timedelta_data(self): @@ -1927,7 +1948,7 @@ def test_roundtrip_numpy_datetime_data(self): with self.roundtrip(expected) as actual: assert_identical(expected, actual) - def test_roundtrip_cftime_datetime_data(self): + def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): # Override method in DatasetIOTestCases - remove not applicable # save_kwds from .test_coding_times import _all_cftime_date_types @@ -1939,12 +1960,33 @@ def test_roundtrip_cftime_datetime_data(self): expected_decoded_t = np.array(times) expected_decoded_t0 = np.array([date_type(1, 1, 1)]) - with self.roundtrip(expected) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + with xr.set_options(enable_cftimeindex=True): + with self.roundtrip(expected) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + def test_roundtrip_cftime_datetime_data_disable_cftimeindex(self): + # Override method in DatasetIOTestCases - remove not applicable + # save_kwds + from .test_coding_times import _all_cftime_date_types + + date_types = _all_cftime_date_types() + for date_type in date_types.values(): + times = [date_type(1, 1, 1), date_type(1, 1, 2)] + expected = Dataset({'t': ('t', times), 't0': times[0]}) + expected_decoded_t = np.array(times) + expected_decoded_t0 = np.array([date_type(1, 1, 1)]) - abs_diff = abs(actual.t0.values - expected_decoded_t0) - self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + with xr.set_options(enable_cftimeindex=False): + with self.roundtrip(expected) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) + + abs_diff = abs(actual.t0.values - expected_decoded_t0) + self.assertTrue((abs_diff <= np.timedelta64(1, 's')).all()) def test_write_store(self): # Override method in DatasetIOTestCases - not applicable to dask diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 16935693d06..019472c83bc 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -160,6 +160,9 @@ def test_decode_cf_datetime_non_iso_strings(): product(coding.times._STANDARD_CALENDARS, [False, True])) def test_decode_standard_calendar_inside_timestamp_range( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() units = 'days since 0001-01-01' times = pd.date_range('2001-04-01-00', end='2001-04-30-23', @@ -188,6 +191,9 @@ def test_decode_standard_calendar_inside_timestamp_range( product(_NON_STANDARD_CALENDARS, [False, True])) def test_decode_non_standard_calendar_inside_timestamp_range( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() units = 'days since 0001-01-01' times = pd.date_range('2001-04-01-00', end='2001-04-30-23', @@ -221,12 +227,20 @@ def test_decode_non_standard_calendar_inside_timestamp_range( def test_decode_dates_outside_timestamp_range( calendar, enable_cftimeindex): from datetime import datetime + + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() units = 'days since 0001-01-01' times = [datetime(1, 4, 1, h) for h in range(1, 5)] noleap_time = cftime.date2num(times, units, calendar=calendar) - expected = cftime.num2date(noleap_time, units, calendar=calendar) + if enable_cftimeindex: + expected = cftime.num2date(noleap_time, units, calendar=calendar, + only_use_cftime_datetimes=True) + else: + expected = cftime.num2date(noleap_time, units, calendar=calendar) expected_date_type = type(expected[0]) with warnings.catch_warnings(): @@ -248,6 +262,9 @@ def test_decode_dates_outside_timestamp_range( product(coding.times._STANDARD_CALENDARS, [False, True])) def test_decode_standard_calendar_single_element_inside_timestamp_range( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + units = 'days since 0001-01-01' for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): @@ -265,6 +282,9 @@ def test_decode_standard_calendar_single_element_inside_timestamp_range( product(_NON_STANDARD_CALENDARS, [False, True])) def test_decode_non_standard_calendar_single_element_inside_timestamp_range( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + units = 'days since 0001-01-01' for num_time in [735368, [735368], [[735368]]]: with warnings.catch_warnings(): @@ -285,6 +305,9 @@ def test_decode_non_standard_calendar_single_element_inside_timestamp_range( product(_NON_STANDARD_CALENDARS, [False, True])) def test_decode_single_element_outside_timestamp_range( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() units = 'days since 0001-01-01' for days in [1, 1470376]: @@ -305,6 +328,9 @@ def test_decode_single_element_outside_timestamp_range( product(coding.times._STANDARD_CALENDARS, [False, True])) def test_decode_standard_calendar_multidim_time_inside_timestamp_range( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() units = 'days since 0001-01-01' @@ -341,6 +367,9 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range( product(_NON_STANDARD_CALENDARS, [False, True])) def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() units = 'days since 0001-01-01' @@ -384,6 +413,10 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range( def test_decode_multidim_time_outside_timestamp_range( calendar, enable_cftimeindex): from datetime import datetime + + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() units = 'days since 0001-01-01' @@ -395,8 +428,14 @@ def test_decode_multidim_time_outside_timestamp_range( mdim_time[:, 0] = noleap_time1 mdim_time[:, 1] = noleap_time2 - expected1 = cftime.num2date(noleap_time1, units, calendar) - expected2 = cftime.num2date(noleap_time2, units, calendar) + if enable_cftimeindex: + expected1 = cftime.num2date(noleap_time1, units, calendar, + only_use_cftime_datetimes=True) + expected2 = cftime.num2date(noleap_time2, units, calendar, + only_use_cftime_datetimes=True) + else: + expected1 = cftime.num2date(noleap_time1, units, calendar) + expected2 = cftime.num2date(noleap_time2, units, calendar) with warnings.catch_warnings(): warnings.filterwarnings('ignore', 'Unable to decode time axis') @@ -421,6 +460,9 @@ def test_decode_multidim_time_outside_timestamp_range( product(['360_day', 'all_leap', '366_day'], [False, True])) def test_decode_non_standard_calendar_single_element_fallback( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() units = 'days since 0001-01-01' @@ -447,6 +489,9 @@ def test_decode_non_standard_calendar_single_element_fallback( product(['360_day'], [False, True])) def test_decode_non_standard_calendar_fallback( calendar, enable_cftimeindex): + if enable_cftimeindex: + pytest.importorskip('cftime') + cftime = _import_cftime() # ensure leap year doesn't matter for year in [2010, 2011, 2012, 2013, 2014]: @@ -514,8 +559,9 @@ def test_infer_datetime_units(dates, expected): assert expected == coding.times.infer_datetime_units(dates) -@requires_cftime_or_netCDF4 -def test_infer_cftime_datetime_units(): +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') +@pytest.mark.parametrize('enable_cftimeindex', [False, True]) +def test_infer_cftime_datetime_units(enable_cftimeindex): date_types = _all_cftime_date_types() for date_type in date_types.values(): for dates, expected in [ @@ -532,11 +578,13 @@ def test_infer_cftime_datetime_units(): ([date_type(1900, 1, 1), date_type(1900, 1, 2, 0, 0, 0, 5)], 'days since 1900-01-01 00:00:00.000000')]: - if has_cftime: - assert expected == coding.times.infer_datetime_units(dates) + if enable_cftimeindex: + with set_options(enable_cftimeindex=enable_cftimeindex): + assert expected == coding.times.infer_datetime_units(dates) else: - with pytest.raises(ImportError): - coding.times.infer_datetime_units(dates) + with set_options(enable_cftimeindex=enable_cftimeindex): + with pytest.raises(ValueError): + coding.times.infer_datetime_units(dates) @pytest.mark.parametrize( From 9dc5539c6f12aa0f2b9fa8e5ce80f74c2f737224 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Apr 2018 08:41:37 -0400 Subject: [PATCH 43/58] Require standalone cftime library for all new functionality Add tests/fixes for dt accessor with cftime datetimes --- xarray/coding/cftimeindex.py | 11 ++-- xarray/coding/times.py | 16 ----- xarray/core/accessors.py | 2 +- xarray/core/common.py | 11 ++-- xarray/core/utils.py | 14 ++--- xarray/tests/test_accessors.py | 105 ++++++++++++++++++++++++++++++- xarray/tests/test_backends.py | 6 +- xarray/tests/test_cftimeindex.py | 19 +----- xarray/tests/test_utils.py | 9 +-- 9 files changed, 131 insertions(+), 62 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index ac9cee4cb44..a3ff5326b0e 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -8,6 +8,8 @@ from xarray.core import pycompat from xarray.core.utils import is_scalar +from .times import _require_standalone_cftime + def named(name, pattern): return '(?P<' + name + '>' + pattern + ')' @@ -120,10 +122,7 @@ def get_date_type(self): def assert_all_valid_date_type(data): - try: - import cftime - except ImportError: - import netcdftime as cftime + import cftime valid_types = (cftime.DatetimeJulian, cftime.DatetimeNoLeap, cftime.DatetimeAllLeap, cftime.DatetimeGregorian, @@ -135,8 +134,7 @@ def assert_all_valid_date_type(data): if not isinstance(sample, valid_types): raise TypeError( 'CFTimeIndex requires cftime.datetime ' - 'or datetime.datetime objects. ' - 'Got object of {}.'.format(date_type)) + 'objects. Got object of {}.'.format(date_type)) if not all(isinstance(value, date_type) for value in data): raise TypeError( 'CFTimeIndex requires using datetime ' @@ -155,6 +153,7 @@ class CFTimeIndex(pd.Index): date_type = property(get_date_type) def __new__(cls, data): + _require_standalone_cftime() result = object.__new__(cls) assert_all_valid_date_type(data) result._data = np.array(data) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 9a5ad691e81..2c1cb2ea4b6 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -56,22 +56,6 @@ def _import_cftime(): return cftime -def _import_cftime_datetime(): - """Helper function to handle importing cftime.datetime across the - transition between the version of cftime packaged with netCDF4 and the - standalone version""" - try: - from cftime import datetime - except ImportError: - # Need to use private API to import generic cftime datetime in - # older versions. See https://github.com/Unidata/cftime/issues/8 - try: - from netcdftime._netcdftime import datetime - except ImportError: - raise ImportError("Failed to import cftime.datetime") - return datetime - - def _require_standalone_cftime(): """Raises an ImportError if the standalone cftime is not found""" try: diff --git a/xarray/core/accessors.py b/xarray/core/accessors.py index 76628cf3121..81af0532d93 100644 --- a/xarray/core/accessors.py +++ b/xarray/core/accessors.py @@ -21,7 +21,7 @@ def _access_through_cftimeindex(values, name): and access requested datetime component """ from ..coding.cftimeindex import CFTimeIndex - values_as_cftimeindex = CFTimeIndex(values) + values_as_cftimeindex = CFTimeIndex(values.ravel()) if name == 'season': months = values_as_cftimeindex.month field_values = _season_from_months(months) diff --git a/xarray/core/common.py b/xarray/core/common.py index 061ad732530..0125aa0505b 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -879,14 +879,17 @@ def is_np_datetime_like(dtype): def contains_cftime_datetimes(var): """Check if a variable contains cftime datetime objects""" - from ..coding.times import _import_cftime_datetime - try: - cftime_datetime = _import_cftime_datetime() + from cftime import datetime as cftime_datetime except ImportError: return False else: - return isinstance(var.data.flatten()[0], cftime_datetime) + sample = var.data.ravel()[0] + if isinstance(sample, dask_array_type): + sample = sample.compute() + if isinstance(sample, np.ndarray): + sample = sample.item() + return isinstance(sample, cftime_datetime) def _contains_datetime_like_objects(var): diff --git a/xarray/core/utils.py b/xarray/core/utils.py index c77cb7b5705..ec243da3fd4 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -38,22 +38,16 @@ def wrapper(*args, **kwargs): def _maybe_cast_to_cftimeindex(index): - from ..coding.times import (_import_cftime_datetime, - _require_standalone_cftime) + from ..coding.cftimeindex import CFTimeIndex + from ..coding.times import _require_standalone_cftime if not OPTIONS['enable_cftimeindex']: return index else: _require_standalone_cftime() try: - cftime_datetime = _import_cftime_datetime() - from ..coding.cftimeindex import CFTimeIndex - except ImportError: - return index - else: - if len(index): - if isinstance(index[0], cftime_datetime): - index = CFTimeIndex(index) + return CFTimeIndex(index) + except TypeError: return index diff --git a/xarray/tests/test_accessors.py b/xarray/tests/test_accessors.py index ad521546d2e..1f9fcb41e85 100644 --- a/xarray/tests/test_accessors.py +++ b/xarray/tests/test_accessors.py @@ -2,11 +2,13 @@ import numpy as np import pandas as pd +import pytest import xarray as xr from . import ( - TestCase, assert_array_equal, assert_equal, raises_regex, requires_dask) + TestCase, assert_array_equal, assert_equal, raises_regex, requires_dask, + has_cftime, has_dask) class TestDatetimeAccessor(TestCase): @@ -114,3 +116,104 @@ def test_rounders(self): xdates.time.dt.ceil('D').values) assert_array_equal(dates.round('D').values, xdates.time.dt.round('D').values) + + +_CFTIME_CALENDARS = ['365_day', '360_day', 'julian', 'all_leap', + '366_day', 'gregorian', 'proleptic_gregorian'] +_NT = 100 + + +@pytest.fixture(params=_CFTIME_CALENDARS) +def calendar(request): + return request.param + + +@pytest.fixture() +def times(calendar): + import cftime + + return cftime.num2date( + np.arange(_NT), units='hours since 2000-01-01', calendar=calendar, + only_use_cftime_datetimes=True) + + +@pytest.fixture() +def data(times): + data = np.random.rand(10, 10, _NT) + lons = np.linspace(0, 11, 10) + lats = np.linspace(0, 20, 10) + return xr.DataArray(data, coords=[lons, lats, times], + dims=['lon', 'lat', 'time'], name='data') + + +@pytest.fixture() +def times_3d(times): + lons = np.linspace(0, 11, 10) + lats = np.linspace(0, 20, 10) + times_arr = np.random.choice(times, size=(10, 10, _NT)) + return xr.DataArray(times_arr, coords=[lons, lats, times], + dims=['lon', 'lat', 'time'], + name='data') + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('field', ['year', 'month', 'day', 'hour']) +def test_field_access(data, field): + result = getattr(data.time.dt, field) + expected = xr.DataArray( + getattr(xr.coding.cftimeindex.CFTimeIndex(data.time.values), field), + name=field, coords=data.time.coords, dims=data.time.dims) + + assert_equal(result, expected) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('field', ['year', 'month', 'day', 'hour']) +def test_dask_field_access_1d(data, field): + import dask.array as da + + expected = xr.DataArray( + getattr(xr.coding.cftimeindex.CFTimeIndex(data.time.values), field), + name=field, dims=['time']) + times = xr.DataArray(data.time.values, dims=['time']).chunk({'time': 50}) + result = getattr(times.dt, field) + assert isinstance(result.data, da.Array) + assert result.chunks == times.chunks + assert_equal(result.compute(), expected) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize('field', ['year', 'month', 'day', 'hour']) +def test_dask_field_access(times_3d, data, field): + import dask.array as da + + expected = xr.DataArray( + getattr(xr.coding.cftimeindex.CFTimeIndex(times_3d.values.ravel()), + field).reshape(times_3d.shape), + name=field, coords=times_3d.coords, dims=times_3d.dims) + times_3d = times_3d.chunk({'lon': 5, 'lat': 5, 'time': 50}) + result = getattr(times_3d.dt, field) + assert isinstance(result.data, da.Array) + assert result.chunks == times_3d.chunks + assert_equal(result.compute(), expected) + + +@pytest.fixture() +def cftime_date_type(calendar): + from .test_coding_times import _all_cftime_date_types + + return _all_cftime_date_types()[calendar] + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_seasons(cftime_date_type): + dates = np.array([cftime_date_type(2000, month, 15) + for month in range(1, 13)]) + dates = xr.DataArray(dates) + seasons = ['DJF', 'DJF', 'MAM', 'MAM', 'MAM', 'JJA', + 'JJA', 'JJA', 'SON', 'SON', 'SON', 'DJF'] + seasons = xr.DataArray(seasons) + + assert_array_equal(seasons.values, dates.dt.season.values) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b8d0957fd9b..b3fb27d4d90 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -353,7 +353,7 @@ def test_roundtrip_numpy_datetime_data(self): @requires_cftime_or_netCDF4 def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): from .test_coding_times import _all_cftime_date_types - + date_types = _all_cftime_date_types() for date_type in date_types.values(): times = [date_type(1, 1, 1), date_type(1, 1, 2)] @@ -380,7 +380,7 @@ def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): assert (actual.t.encoding['calendar'] == expected_calendar) else: - with pytest.raises(ImportError): + with pytest.raises((ValueError, NotImplementedError)): with xr.set_options(enable_cftimeindex=True): with self.roundtrip(expected, save_kwargs=kwds) as actual: @@ -396,7 +396,7 @@ def test_roundtrip_cftime_datetime_data_disable_cftimeindex(self): expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} - with pytest.raises(ValueError): + with pytest.raises((ValueError, NotImplementedError)): with xr.set_options(enable_cftimeindex=False): with self.roundtrip( expected, diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index d20950e3e40..06bba63b534 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -11,21 +11,10 @@ _parsed_string_to_bounds, _parse_iso8601_with_reso) from xarray.tests import assert_array_equal, assert_identical -from . import has_cftime_or_netCDF4, has_cftime +from . import has_cftime -if not has_cftime_or_netCDF4: - pytest.importorskip('cftime') - - -def _import_cftime(): - """Import cftime or netcdftime depending on what's available. - Note this is different than coding.times._import_cftime.""" - try: - import cftime - except ImportError: - import netcdftime as cftime - return cftime +cftime = pytest.importorskip('cftime') def date_dict(year=None, month=None, day=None, @@ -70,7 +59,6 @@ def test_parse_iso8601(string, expected): def cftime_date_types(): - cftime = _import_cftime() return [cftime.DatetimeNoLeap, cftime.DatetimeJulian, cftime.DatetimeAllLeap, cftime.DatetimeGregorian, cftime.DatetimeProlepticGregorian, cftime.Datetime360Day] @@ -113,7 +101,6 @@ def df(index): @pytest.fixture def feb_days(date_type): - cftime = _import_cftime() if date_type is cftime.DatetimeAllLeap: return 29 elif date_type is cftime.Datetime360Day: @@ -124,7 +111,6 @@ def feb_days(date_type): @pytest.fixture def dec_days(date_type): - cftime = _import_cftime() if date_type is cftime.Datetime360Day: return 30 else: @@ -132,7 +118,6 @@ def dec_days(date_type): def test_assert_all_valid_date_type(date_type, index): - cftime = _import_cftime() if date_type is cftime.DatetimeNoLeap: mixed_date_types = [date_type(1, 1, 1), cftime.DatetimeAllLeap(1, 2, 1)] diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 2862f17147e..3b09e93bd7f 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -45,16 +45,17 @@ def test_safe_cast_to_index_cftimeindex(enable_cftimeindex): date_types = _all_cftime_date_types() for date_type in date_types.values(): dates = [date_type(1, 1, day) for day in range(1, 20)] - if enable_cftimeindex: - expected = CFTimeIndex(dates) - else: - expected = pd.Index(dates) if not has_cftime and enable_cftimeindex: with pytest.raises(ImportError): with set_options(enable_cftimeindex=enable_cftimeindex): actual = utils.safe_cast_to_index(np.array(dates)) else: + if enable_cftimeindex: + expected = CFTimeIndex(dates) + else: + expected = pd.Index(dates) + with set_options(enable_cftimeindex=enable_cftimeindex): actual = utils.safe_cast_to_index(np.array(dates)) assert_array_equal(expected, actual) From 1aa8d865d5e4246fc8daa08618f0292b0817c5f7 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Apr 2018 08:55:19 -0400 Subject: [PATCH 44/58] Improve skipping logic in test_cftimeindex.py --- xarray/tests/test_cftimeindex.py | 49 ++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 06bba63b534..7eb55b698c3 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -12,9 +12,7 @@ from xarray.tests import assert_array_equal, assert_identical from . import has_cftime - - -cftime = pytest.importorskip('cftime') +from .test_coding_times import _all_cftime_date_types def date_dict(year=None, month=None, day=None, @@ -58,15 +56,13 @@ def test_parse_iso8601(string, expected): parse_iso8601(string + '.3') -def cftime_date_types(): - return [cftime.DatetimeNoLeap, cftime.DatetimeJulian, - cftime.DatetimeAllLeap, cftime.DatetimeGregorian, - cftime.DatetimeProlepticGregorian, cftime.Datetime360Day] +_CFTIME_CALENDARS = ['365_day', '360_day', 'julian', 'all_leap', + '366_day', 'gregorian', 'proleptic_gregorian'] -@pytest.fixture(params=cftime_date_types()) +@pytest.fixture(params=_CFTIME_CALENDARS) def date_type(request): - return request.param + return _all_cftime_date_types()[request.param] @pytest.fixture @@ -101,6 +97,7 @@ def df(index): @pytest.fixture def feb_days(date_type): + import cftime if date_type is cftime.DatetimeAllLeap: return 29 elif date_type is cftime.Datetime360Day: @@ -111,13 +108,16 @@ def feb_days(date_type): @pytest.fixture def dec_days(date_type): + import cftime if date_type is cftime.Datetime360Day: return 30 else: return 31 +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_assert_all_valid_date_type(date_type, index): + import cftime if date_type is cftime.DatetimeNoLeap: mixed_date_types = [date_type(1, 1, 1), cftime.DatetimeAllLeap(1, 2, 1)] @@ -133,6 +133,7 @@ def test_assert_all_valid_date_type(date_type, index): assert_all_valid_date_type([date_type(1, 1, 1), date_type(1, 2, 1)]) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize(('field', 'expected'), [ ('year', [1, 1, 2, 2]), ('month', [1, 2, 1, 2]), @@ -146,6 +147,7 @@ def test_cftimeindex_field_accessors(index, field, expected): assert_array_equal(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize(('string', 'date_args', 'reso'), [ ('1999', (1999, 1, 1), 'year'), ('199902', (1999, 2, 1), 'month'), @@ -161,6 +163,7 @@ def test_parse_iso8601_with_reso(date_type, string, date_args, reso): assert result_reso == expected_reso +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_parse_string_to_bounds_year(date_type, dec_days): parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 1, 1) @@ -171,6 +174,7 @@ def test_parse_string_to_bounds_year(date_type, dec_days): assert result_end == expected_end +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_parse_string_to_bounds_month_feb(date_type, feb_days): parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 2, 1) @@ -181,6 +185,7 @@ def test_parse_string_to_bounds_month_feb(date_type, feb_days): assert result_end == expected_end +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_parse_string_to_bounds_month_dec(date_type, dec_days): parsed = date_type(2, 12, 1) expected_start = date_type(2, 12, 1) @@ -191,6 +196,7 @@ def test_parse_string_to_bounds_month_dec(date_type, dec_days): assert result_end == expected_end +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize(('reso', 'ex_start_args', 'ex_end_args'), [ ('day', (2, 2, 10), (2, 2, 10, 23, 59, 59, 999999)), ('hour', (2, 2, 10, 6), (2, 2, 10, 6, 59, 59, 999999)), @@ -208,11 +214,13 @@ def test_parsed_string_to_bounds_sub_monthly(date_type, reso, assert result_end == expected_end +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_parsed_string_to_bounds_raises(date_type): with pytest.raises(KeyError): _parsed_string_to_bounds(date_type, 'a', date_type(1, 1, 1)) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_get_loc(date_type, index): result = index.get_loc('0001') expected = [0, 1] @@ -227,6 +235,7 @@ def test_get_loc(date_type, index): assert result == expected +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('kind', ['loc', 'getitem']) def test_get_slice_bound(date_type, index, kind): result = index.get_slice_bound('0001', 'left', kind) @@ -248,6 +257,7 @@ def test_get_slice_bound(date_type, index, kind): assert result == expected +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('kind', ['loc', 'getitem']) def test_get_slice_bound_decreasing_index( date_type, monotonic_decreasing_index, kind): @@ -270,10 +280,12 @@ def test_get_slice_bound_decreasing_index( assert result == expected +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_date_type_property(date_type, index): assert index.date_type is date_type +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_contains(date_type, index): assert '0001-01-01' in index assert '0001' in index @@ -282,12 +294,14 @@ def test_contains(date_type, index): assert date_type(3, 1, 1) not in index +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_groupby(da): result = da.groupby('time.month').sum('time') expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=['month']) assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_resample_error(da): with pytest.raises(TypeError): da.resample(time='Y') @@ -300,6 +314,7 @@ def test_resample_error(da): } +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_arg', list(SEL_STRING_OR_LIST_TESTS.values()), ids=list(SEL_STRING_OR_LIST_TESTS.keys())) def test_sel_string_or_list(da, index, sel_arg): @@ -308,6 +323,7 @@ def test_sel_string_or_list(da, index, sel_arg): assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_sel_date_slice_or_list(da, index, date_type): expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) result = da.sel(time=slice(date_type(1, 1, 1), date_type(1, 12, 30))) @@ -317,12 +333,14 @@ def test_sel_date_slice_or_list(da, index, date_type): assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_sel_date_scalar(da, date_type, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.sel(time=date_type(1, 1, 1)) assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_kwargs', [ {'method': 'nearest'}, {'method': 'nearest', 'tolerance': timedelta(days=70)} @@ -337,6 +355,7 @@ def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_kwargs', [ {'method': 'pad'}, {'method': 'pad', 'tolerance': timedelta(days=365)} @@ -351,6 +370,7 @@ def test_sel_date_scalar_pad(da, date_type, index, sel_kwargs): assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_kwargs', [ {'method': 'backfill'}, {'method': 'backfill', 'tolerance': timedelta(days=365)} @@ -365,6 +385,7 @@ def test_sel_date_scalar_backfill(da, date_type, index, sel_kwargs): assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_kwargs', [ {'method': 'pad', 'tolerance': timedelta(days=20)}, {'method': 'backfill', 'tolerance': timedelta(days=20)}, @@ -375,6 +396,7 @@ def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=date_type(1, 5, 1), **sel_kwargs) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_kwargs', [ {'method': 'nearest'}, {'method': 'nearest', 'tolerance': timedelta(days=70)} @@ -399,6 +421,7 @@ def test_sel_date_list_nearest(da, date_type, index, sel_kwargs): assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_kwargs', [ {'method': 'pad'}, {'method': 'pad', 'tolerance': timedelta(days=365)} @@ -411,6 +434,7 @@ def test_sel_date_list_pad(da, date_type, index, sel_kwargs): assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_kwargs', [ {'method': 'backfill'}, {'method': 'backfill', 'tolerance': timedelta(days=365)} @@ -423,6 +447,7 @@ def test_sel_date_list_backfill(da, date_type, index, sel_kwargs): assert_identical(result, expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') @pytest.mark.parametrize('sel_kwargs', [ {'method': 'pad', 'tolerance': timedelta(days=20)}, {'method': 'backfill', 'tolerance': timedelta(days=20)}, @@ -433,6 +458,7 @@ def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=[date_type(1, 2, 1), date_type(1, 5, 1)], **sel_kwargs) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_isel(da, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.isel(time=0) @@ -456,6 +482,7 @@ def range_args(date_type): slice(None, date_type(1, 12, 30))] +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_series_getitem(series, index, scalar_args, range_args): for arg in scalar_args: assert series[arg] == 1 @@ -465,6 +492,7 @@ def test_indexing_in_series_getitem(series, index, scalar_args, range_args): assert series[arg].equals(expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_series_loc(series, index, scalar_args, range_args): for arg in scalar_args: assert series.loc[arg] == 1 @@ -474,6 +502,7 @@ def test_indexing_in_series_loc(series, index, scalar_args, range_args): assert series.loc[arg].equals(expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_series_iloc(series, index): expected = 1 assert series.iloc[0] == expected @@ -482,6 +511,7 @@ def test_indexing_in_series_iloc(series, index): assert series.iloc[:2].equals(expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): expected = pd.Series([1], name=index[0]) for arg in scalar_args: @@ -494,6 +524,7 @@ def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): assert result.equals(expected) +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_dataframe_iloc(df, index): expected = pd.Series([1], name=index[0]) result = df.iloc[0] From ef3f2b1087d774ab9c5dd8e0db772f3993568f8f Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Apr 2018 09:18:01 -0400 Subject: [PATCH 45/58] Fix skipping logic in test_cftimeindex.py for when cftime or netcdftime are not available. Use existing requires_cftime decorator where possible (i.e. only on tests that are not parametrized via pytest.mark.parametrize) --- xarray/tests/test_cftimeindex.py | 39 ++++++++++++++++---------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 7eb55b698c3..59222444f92 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -11,7 +11,7 @@ _parsed_string_to_bounds, _parse_iso8601_with_reso) from xarray.tests import assert_array_equal, assert_identical -from . import has_cftime +from . import has_cftime, has_cftime_or_netCDF4, requires_cftime from .test_coding_times import _all_cftime_date_types @@ -115,7 +115,7 @@ def dec_days(date_type): return 31 -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_assert_all_valid_date_type(date_type, index): import cftime if date_type is cftime.DatetimeNoLeap: @@ -163,7 +163,7 @@ def test_parse_iso8601_with_reso(date_type, string, date_args, reso): assert result_reso == expected_reso -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_parse_string_to_bounds_year(date_type, dec_days): parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 1, 1) @@ -174,7 +174,7 @@ def test_parse_string_to_bounds_year(date_type, dec_days): assert result_end == expected_end -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_parse_string_to_bounds_month_feb(date_type, feb_days): parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 2, 1) @@ -185,7 +185,7 @@ def test_parse_string_to_bounds_month_feb(date_type, feb_days): assert result_end == expected_end -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_parse_string_to_bounds_month_dec(date_type, dec_days): parsed = date_type(2, 12, 1) expected_start = date_type(2, 12, 1) @@ -214,13 +214,13 @@ def test_parsed_string_to_bounds_sub_monthly(date_type, reso, assert result_end == expected_end -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_parsed_string_to_bounds_raises(date_type): with pytest.raises(KeyError): _parsed_string_to_bounds(date_type, 'a', date_type(1, 1, 1)) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_get_loc(date_type, index): result = index.get_loc('0001') expected = [0, 1] @@ -280,12 +280,12 @@ def test_get_slice_bound_decreasing_index( assert result == expected -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_date_type_property(date_type, index): assert index.date_type is date_type -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_contains(date_type, index): assert '0001-01-01' in index assert '0001' in index @@ -294,14 +294,14 @@ def test_contains(date_type, index): assert date_type(3, 1, 1) not in index -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_groupby(da): result = da.groupby('time.month').sum('time') expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=['month']) assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_resample_error(da): with pytest.raises(TypeError): da.resample(time='Y') @@ -323,7 +323,7 @@ def test_sel_string_or_list(da, index, sel_arg): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_sel_date_slice_or_list(da, index, date_type): expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) result = da.sel(time=slice(date_type(1, 1, 1), date_type(1, 12, 30))) @@ -333,7 +333,7 @@ def test_sel_date_slice_or_list(da, index, date_type): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_sel_date_scalar(da, date_type, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.sel(time=date_type(1, 1, 1)) @@ -458,7 +458,7 @@ def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=[date_type(1, 2, 1), date_type(1, 5, 1)], **sel_kwargs) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_isel(da, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.isel(time=0) @@ -482,7 +482,7 @@ def range_args(date_type): slice(None, date_type(1, 12, 30))] -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_indexing_in_series_getitem(series, index, scalar_args, range_args): for arg in scalar_args: assert series[arg] == 1 @@ -492,7 +492,7 @@ def test_indexing_in_series_getitem(series, index, scalar_args, range_args): assert series[arg].equals(expected) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_indexing_in_series_loc(series, index, scalar_args, range_args): for arg in scalar_args: assert series.loc[arg] == 1 @@ -502,7 +502,7 @@ def test_indexing_in_series_loc(series, index, scalar_args, range_args): assert series.loc[arg].equals(expected) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_indexing_in_series_iloc(series, index): expected = 1 assert series.iloc[0] == expected @@ -511,7 +511,7 @@ def test_indexing_in_series_iloc(series, index): assert series.iloc[:2].equals(expected) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): expected = pd.Series([1], name=index[0]) for arg in scalar_args: @@ -524,7 +524,7 @@ def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): assert result.equals(expected) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@requires_cftime def test_indexing_in_dataframe_iloc(df, index): expected = pd.Series([1], name=index[0]) result = df.iloc[0] @@ -536,6 +536,7 @@ def test_indexing_in_dataframe_iloc(df, index): assert result.equals(expected) +@pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize('enable_cftimeindex', [False, True]) def test_concat_cftimeindex(date_type, enable_cftimeindex): if not has_cftime and enable_cftimeindex: From 4fb5a901f6473732365504e0f4a8ba6ed7fe48b3 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Apr 2018 14:03:29 -0400 Subject: [PATCH 46/58] Fix skip logic in Python 3.4 build for test_cftimeindex.py --- xarray/tests/test_cftimeindex.py | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 59222444f92..dc33feff55f 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -11,7 +11,7 @@ _parsed_string_to_bounds, _parse_iso8601_with_reso) from xarray.tests import assert_array_equal, assert_identical -from . import has_cftime, has_cftime_or_netCDF4, requires_cftime +from . import has_cftime, has_cftime_or_netCDF4 from .test_coding_times import _all_cftime_date_types @@ -115,7 +115,7 @@ def dec_days(date_type): return 31 -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_assert_all_valid_date_type(date_type, index): import cftime if date_type is cftime.DatetimeNoLeap: @@ -163,7 +163,7 @@ def test_parse_iso8601_with_reso(date_type, string, date_args, reso): assert result_reso == expected_reso -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_parse_string_to_bounds_year(date_type, dec_days): parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 1, 1) @@ -174,7 +174,7 @@ def test_parse_string_to_bounds_year(date_type, dec_days): assert result_end == expected_end -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_parse_string_to_bounds_month_feb(date_type, feb_days): parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 2, 1) @@ -185,7 +185,7 @@ def test_parse_string_to_bounds_month_feb(date_type, feb_days): assert result_end == expected_end -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_parse_string_to_bounds_month_dec(date_type, dec_days): parsed = date_type(2, 12, 1) expected_start = date_type(2, 12, 1) @@ -214,13 +214,13 @@ def test_parsed_string_to_bounds_sub_monthly(date_type, reso, assert result_end == expected_end -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_parsed_string_to_bounds_raises(date_type): with pytest.raises(KeyError): _parsed_string_to_bounds(date_type, 'a', date_type(1, 1, 1)) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_get_loc(date_type, index): result = index.get_loc('0001') expected = [0, 1] @@ -280,12 +280,12 @@ def test_get_slice_bound_decreasing_index( assert result == expected -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_date_type_property(date_type, index): assert index.date_type is date_type -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_contains(date_type, index): assert '0001-01-01' in index assert '0001' in index @@ -294,14 +294,14 @@ def test_contains(date_type, index): assert date_type(3, 1, 1) not in index -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_groupby(da): result = da.groupby('time.month').sum('time') expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=['month']) assert_identical(result, expected) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_resample_error(da): with pytest.raises(TypeError): da.resample(time='Y') @@ -323,7 +323,7 @@ def test_sel_string_or_list(da, index, sel_arg): assert_identical(result, expected) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_sel_date_slice_or_list(da, index, date_type): expected = xr.DataArray([1, 2], coords=[index[:2]], dims=['time']) result = da.sel(time=slice(date_type(1, 1, 1), date_type(1, 12, 30))) @@ -333,7 +333,7 @@ def test_sel_date_slice_or_list(da, index, date_type): assert_identical(result, expected) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_sel_date_scalar(da, date_type, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.sel(time=date_type(1, 1, 1)) @@ -458,7 +458,7 @@ def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=[date_type(1, 2, 1), date_type(1, 5, 1)], **sel_kwargs) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_isel(da, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.isel(time=0) @@ -482,7 +482,7 @@ def range_args(date_type): slice(None, date_type(1, 12, 30))] -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_series_getitem(series, index, scalar_args, range_args): for arg in scalar_args: assert series[arg] == 1 @@ -492,7 +492,7 @@ def test_indexing_in_series_getitem(series, index, scalar_args, range_args): assert series[arg].equals(expected) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_series_loc(series, index, scalar_args, range_args): for arg in scalar_args: assert series.loc[arg] == 1 @@ -502,7 +502,7 @@ def test_indexing_in_series_loc(series, index, scalar_args, range_args): assert series.loc[arg].equals(expected) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_series_iloc(series, index): expected = 1 assert series.iloc[0] == expected @@ -511,7 +511,7 @@ def test_indexing_in_series_iloc(series, index): assert series.iloc[:2].equals(expected) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): expected = pd.Series([1], name=index[0]) for arg in scalar_args: @@ -524,7 +524,7 @@ def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): assert result.equals(expected) -@requires_cftime +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_indexing_in_dataframe_iloc(df, index): expected = pd.Series([1], name=index[0]) result = df.iloc[0] From 1fd205ab59d06f040164378b264055854eb8332e Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Apr 2018 16:26:21 -0400 Subject: [PATCH 47/58] Improve error messages when for when the standalone cftime is not installed --- xarray/coding/times.py | 4 +++- xarray/core/accessors.py | 4 +++- xarray/core/common.py | 21 +++++++++++++++++++++ xarray/tests/test_accessors.py | 14 +++++++++++++- xarray/tests/test_backends.py | 6 ++++-- 5 files changed, 44 insertions(+), 5 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 2c1cb2ea4b6..e24a97a5e73 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -9,7 +9,8 @@ import numpy as np import pandas as pd -from ..core.common import contains_cftime_datetimes +from ..core.common import (contains_cftime_datetimes, + raise_if_contains_netcdftime_datetimes) from ..core import indexing from ..core.formatting import first_n_items, format_timestamp, last_item from ..core.options import OPTIONS @@ -394,6 +395,7 @@ class CFDatetimeCoder(VariableCoder): def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) + raise_if_contains_netcdftime_datetimes(variable) if (np.issubdtype(data.dtype, np.datetime64) or contains_cftime_datetimes(variable)): (data, units, calendar) = encode_cf_datetime( diff --git a/xarray/core/accessors.py b/xarray/core/accessors.py index 81af0532d93..9de00123061 100644 --- a/xarray/core/accessors.py +++ b/xarray/core/accessors.py @@ -3,7 +3,8 @@ import numpy as np import pandas as pd -from .common import is_np_datetime_like, _contains_datetime_like_objects +from .common import (is_np_datetime_like, _contains_datetime_like_objects, + raise_if_contains_netcdftime_datetimes) from .pycompat import dask_array_type @@ -136,6 +137,7 @@ class DatetimeAccessor(object): """ def __init__(self, xarray_obj): + raise_if_contains_netcdftime_datetimes(xarray_obj) if not _contains_datetime_like_objects(xarray_obj): raise TypeError("'dt' accessor only available for " "DataArray with datetime64 timedelta64 dtype or " diff --git a/xarray/core/common.py b/xarray/core/common.py index 0125aa0505b..759617f2818 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -892,6 +892,27 @@ def contains_cftime_datetimes(var): return isinstance(sample, cftime_datetime) +def raise_if_contains_netcdftime_datetimes(var): + """Raise a TypeError if netcdftime.datetime objects are found""" + try: + from netcdftime._netcdftime import datetime as netcdftime_datetime + except ImportError: + pass + else: + sample = var.data.ravel()[0] + if isinstance(sample, dask_array_type): + sample = sample.compute() + if isinstance(sample, np.ndarray): + sample = sample.item() + if isinstance(sample, netcdftime_datetime): + raise TypeError( + 'Serialization or use of the dt accessor for dates not of type ' + 'np.datetime64 requires that the standalone ' + 'cftime library be installed rather than the version packaged ' + 'with netCDF4; got dates of {}.'.format(type(sample))) + + + def _contains_datetime_like_objects(var): """Check if a variable contains datetime like objects (either np.datetime64, np.timedelta64, or cftime.datetime)""" diff --git a/xarray/tests/test_accessors.py b/xarray/tests/test_accessors.py index 1f9fcb41e85..d1326408e7e 100644 --- a/xarray/tests/test_accessors.py +++ b/xarray/tests/test_accessors.py @@ -8,7 +8,7 @@ from . import ( TestCase, assert_array_equal, assert_equal, raises_regex, requires_dask, - has_cftime, has_dask) + has_cftime, has_dask, has_netCDF4) class TestDatetimeAccessor(TestCase): @@ -217,3 +217,15 @@ def test_seasons(cftime_date_type): seasons = xr.DataArray(seasons) assert_array_equal(seasons.values, dates.dt.season.values) + + +@pytest.mark.skipif(not has_netCDF4, reason='netCDF4 not installed') +def test_dt_accessor_error_netCDF4(cftime_date_type): + da = xr.DataArray( + [cftime_date_type(1, 1, 1), cftime_date_type(2, 1, 1)], + dims=['time']) + if not has_cftime: + with pytest.raises(TypeError): + da.dt.month + else: + pass diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b3fb27d4d90..712baa597df 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -380,7 +380,8 @@ def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): assert (actual.t.encoding['calendar'] == expected_calendar) else: - with pytest.raises((ValueError, NotImplementedError)): + with pytest.raises((ValueError, NotImplementedError, + TypeError)): with xr.set_options(enable_cftimeindex=True): with self.roundtrip(expected, save_kwargs=kwds) as actual: @@ -396,7 +397,8 @@ def test_roundtrip_cftime_datetime_data_disable_cftimeindex(self): expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} - with pytest.raises((ValueError, NotImplementedError)): + with pytest.raises((ValueError, NotImplementedError, + TypeError)): with xr.set_options(enable_cftimeindex=False): with self.roundtrip( expected, From 58a071599fa2ed310169d5032b3914026d017b34 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Apr 2018 16:29:09 -0400 Subject: [PATCH 48/58] Tweak skip logic in test_accessors.py --- xarray/tests/test_accessors.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_accessors.py b/xarray/tests/test_accessors.py index d1326408e7e..83e2884ac60 100644 --- a/xarray/tests/test_accessors.py +++ b/xarray/tests/test_accessors.py @@ -8,7 +8,7 @@ from . import ( TestCase, assert_array_equal, assert_equal, raises_regex, requires_dask, - has_cftime, has_dask, has_netCDF4) + has_cftime, has_dask, has_cftime_or_netCDF4) class TestDatetimeAccessor(TestCase): @@ -219,7 +219,8 @@ def test_seasons(cftime_date_type): assert_array_equal(seasons.values, dates.dt.season.values) -@pytest.mark.skipif(not has_netCDF4, reason='netCDF4 not installed') +@pytest.mark.skipif(not has_cftime_or_netCDF4, + reason='cftime or netCDF4 not installed') def test_dt_accessor_error_netCDF4(cftime_date_type): da = xr.DataArray( [cftime_date_type(1, 1, 1), cftime_date_type(2, 1, 1)], From ca4d7ddb4001e6c0615aac069471a411a4c64418 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 28 Apr 2018 16:30:37 -0400 Subject: [PATCH 49/58] flake8 --- xarray/core/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 759617f2818..f30a0bf86c7 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -912,7 +912,6 @@ def raise_if_contains_netcdftime_datetimes(var): 'with netCDF4; got dates of {}.'.format(type(sample))) - def _contains_datetime_like_objects(var): """Check if a variable contains datetime like objects (either np.datetime64, np.timedelta64, or cftime.datetime)""" From 3947aacec271339fda2ab1911dee78b044cb8097 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 30 Apr 2018 11:17:12 -0400 Subject: [PATCH 50/58] Address review comments --- xarray/coding/cftimeindex.py | 1 - xarray/coding/times.py | 45 +++++++---------------- xarray/core/accessors.py | 4 +-- xarray/core/common.py | 20 ----------- xarray/core/utils.py | 14 +++++--- xarray/tests/test_accessors.py | 2 +- xarray/tests/test_backends.py | 60 +++++++++---------------------- xarray/tests/test_cftimeindex.py | 34 +++++++----------- xarray/tests/test_coding_times.py | 35 +++++++++--------- xarray/tests/test_utils.py | 38 +++++++------------- 10 files changed, 85 insertions(+), 168 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index a3ff5326b0e..b05d221d054 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -153,7 +153,6 @@ class CFTimeIndex(pd.Index): date_type = property(get_date_type) def __new__(cls, data): - _require_standalone_cftime() result = object.__new__(cls) assert_all_valid_date_type(data) result._data = np.array(data) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index e24a97a5e73..81854278dd8 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -9,8 +9,7 @@ import numpy as np import pandas as pd -from ..core.common import (contains_cftime_datetimes, - raise_if_contains_netcdftime_datetimes) +from ..core.common import contains_cftime_datetimes from ..core import indexing from ..core.formatting import first_n_items, format_timestamp, last_item from ..core.options import OPTIONS @@ -89,6 +88,7 @@ def _decode_datetime_with_cftime(num_dates, units, calendar, enable_cftimeindex): cftime = _import_cftime() if enable_cftimeindex: + _require_standalone_cftime() dates = np.asarray(cftime.num2date(num_dates, units, calendar, only_use_cftime_datetimes=True)) else: @@ -96,24 +96,16 @@ def _decode_datetime_with_cftime(num_dates, units, calendar, if (dates[np.nanargmin(num_dates)].year < 1678 or dates[np.nanargmax(num_dates)].year >= 2262): - warnings.warn( - 'Unable to decode time axis into full ' - 'numpy.datetime64 objects, continuing using dummy ' - 'cftime.datetime objects instead, reason: dates out ' - 'of range', SerializationWarning, stacklevel=3) + if calendar in _STANDARD_CALENDARS: + warnings.warn( + 'Unable to decode time axis into full ' + 'numpy.datetime64 objects, continuing using dummy ' + 'cftime.datetime objects instead, reason: dates out ' + 'of range', SerializationWarning, stacklevel=3) else: if enable_cftimeindex: if calendar in _STANDARD_CALENDARS: dates = cftime_to_nptime(dates) - else: - warnings.warn( - 'Unable to decode time axis into full numpy.datetime64 ' - 'objects, because dates are encoded using a ' - 'non-standard calendar ({}). Using cftime.datetime ' - 'objects instead. Time indexing will be done using a ' - 'CFTimeIndex rather than ' - 'a DatetimeIndex'.format(calendar), - SerializationWarning, stacklevel=3) else: try: dates = cftime_to_nptime(dates) @@ -243,10 +235,7 @@ def infer_calendar_name(dates): if np.asarray(dates).dtype == 'datetime64[ns]': return 'proleptic_gregorian' else: - try: - return np.asarray(dates)[0].calendar - except IndexError: - return np.asarray(dates).item().calendar + return np.asarray(dates).ravel()[0].calendar def infer_datetime_units(dates): @@ -255,22 +244,16 @@ def infer_datetime_units(dates): 'hours', 'minutes' or 'seconds' (the first one that can evenly divide all unique time deltas in `dates`) """ + dates = np.asarray(dates).ravel() if np.asarray(dates).dtype == 'datetime64[ns]': - dates = pd.to_datetime(np.asarray(dates).ravel(), box=False) + dates = pd.to_datetime(dates, box=False) dates = dates[pd.notnull(dates)] - unique_timedeltas = np.unique(np.diff(dates)) reference_date = dates[0] if len(dates) > 0 else '1970-01-01' reference_date = pd.Timestamp(reference_date) else: - if not OPTIONS['enable_cftimeindex']: - raise ValueError('Serializing dates of type cftime.datetime ' - 'requires setting enable_cftimeindex to True and ' - 'using the standalone cftime library to enable ' - 'accurate roundtripping of date types.') - dates = np.asarray(dates).ravel() - unique_timedeltas = np.unique(pd.to_timedelta(np.diff(dates))) reference_date = dates[0] if len(dates) > 0 else '1970-01-01' reference_date = format_cftime_datetime(reference_date) + unique_timedeltas = np.unique(np.diff(dates)).astype('timedelta64[ns]') units = _infer_time_units_from_diff(unique_timedeltas) return '%s since %s' % (units, reference_date) @@ -395,7 +378,6 @@ class CFDatetimeCoder(VariableCoder): def encode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_encoding(variable) - raise_if_contains_netcdftime_datetimes(variable) if (np.issubdtype(data.dtype, np.datetime64) or contains_cftime_datetimes(variable)): (data, units, calendar) = encode_cf_datetime( @@ -411,9 +393,6 @@ def decode(self, variable, name=None): dims, data, attrs, encoding = unpack_for_decoding(variable) enable_cftimeindex = OPTIONS['enable_cftimeindex'] - if enable_cftimeindex: - _require_standalone_cftime() - if 'units' in attrs and 'since' in attrs['units']: units = pop_to(attrs, encoding, 'units') calendar = pop_to(attrs, encoding, 'calendar') diff --git a/xarray/core/accessors.py b/xarray/core/accessors.py index 9de00123061..81af0532d93 100644 --- a/xarray/core/accessors.py +++ b/xarray/core/accessors.py @@ -3,8 +3,7 @@ import numpy as np import pandas as pd -from .common import (is_np_datetime_like, _contains_datetime_like_objects, - raise_if_contains_netcdftime_datetimes) +from .common import is_np_datetime_like, _contains_datetime_like_objects from .pycompat import dask_array_type @@ -137,7 +136,6 @@ class DatetimeAccessor(object): """ def __init__(self, xarray_obj): - raise_if_contains_netcdftime_datetimes(xarray_obj) if not _contains_datetime_like_objects(xarray_obj): raise TypeError("'dt' accessor only available for " "DataArray with datetime64 timedelta64 dtype or " diff --git a/xarray/core/common.py b/xarray/core/common.py index f30a0bf86c7..62cd0ac0765 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -890,26 +890,6 @@ def contains_cftime_datetimes(var): if isinstance(sample, np.ndarray): sample = sample.item() return isinstance(sample, cftime_datetime) - - -def raise_if_contains_netcdftime_datetimes(var): - """Raise a TypeError if netcdftime.datetime objects are found""" - try: - from netcdftime._netcdftime import datetime as netcdftime_datetime - except ImportError: - pass - else: - sample = var.data.ravel()[0] - if isinstance(sample, dask_array_type): - sample = sample.compute() - if isinstance(sample, np.ndarray): - sample = sample.item() - if isinstance(sample, netcdftime_datetime): - raise TypeError( - 'Serialization or use of the dt accessor for dates not of type ' - 'np.datetime64 requires that the standalone ' - 'cftime library be installed rather than the version packaged ' - 'with netCDF4; got dates of {}.'.format(type(sample))) def _contains_datetime_like_objects(var): diff --git a/xarray/core/utils.py b/xarray/core/utils.py index ec243da3fd4..2259098857b 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -39,16 +39,22 @@ def wrapper(*args, **kwargs): def _maybe_cast_to_cftimeindex(index): from ..coding.cftimeindex import CFTimeIndex - from ..coding.times import _require_standalone_cftime if not OPTIONS['enable_cftimeindex']: return index else: - _require_standalone_cftime() try: - return CFTimeIndex(index) - except TypeError: + import cftime # noqa: F401 + except ImportError: return index + else: + if index.dtype == 'O': + try: + return CFTimeIndex(index) + except TypeError: + return index + else: + return index def safe_cast_to_index(array): diff --git a/xarray/tests/test_accessors.py b/xarray/tests/test_accessors.py index 83e2884ac60..e1b3a95b942 100644 --- a/xarray/tests/test_accessors.py +++ b/xarray/tests/test_accessors.py @@ -229,4 +229,4 @@ def test_dt_accessor_error_netCDF4(cftime_date_type): with pytest.raises(TypeError): da.dt.month else: - pass + da.dt.month diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 712baa597df..1ac94419190 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -32,8 +32,8 @@ assert_identical, has_dask, has_netCDF4, has_scipy, network, raises_regex, requires_dask, requires_h5netcdf, requires_netCDF4, requires_pathlib, requires_pydap, requires_pynio, requires_rasterio, requires_scipy, - requires_scipy_or_netCDF4, requires_zarr, has_cftime, - requires_cftime_or_netCDF4) + requires_scipy_or_netCDF4, requires_zarr, + requires_cftime) from .test_dataset import create_test_data try: @@ -350,7 +350,7 @@ def test_roundtrip_numpy_datetime_data(self): assert_identical(expected, actual) assert actual.t0.encoding['units'] == 'days since 1950-01-01' - @requires_cftime_or_netCDF4 + @requires_cftime def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): from .test_coding_times import _all_cftime_date_types @@ -363,47 +363,21 @@ def test_roundtrip_cftime_datetime_data_enable_cftimeindex(self): expected_decoded_t0 = np.array([date_type(1, 1, 1)]) expected_calendar = times[0].calendar - if has_cftime: - with xr.set_options(enable_cftimeindex=True): - with self.roundtrip(expected, save_kwargs=kwds) as actual: - abs_diff = abs(actual.t.values - expected_decoded_t) - assert (abs_diff <= np.timedelta64(1, 's')).all() - assert (actual.t.encoding['units'] == - 'days since 0001-01-01 00:00:00.000000') - assert (actual.t.encoding['calendar'] == - expected_calendar) - - abs_diff = abs(actual.t0.values - expected_decoded_t0) - assert (abs_diff <= np.timedelta64(1, 's')).all() - assert (actual.t0.encoding['units'] == - 'days since 0001-01-01') - assert (actual.t.encoding['calendar'] == - expected_calendar) - else: - with pytest.raises((ValueError, NotImplementedError, - TypeError)): - with xr.set_options(enable_cftimeindex=True): - with self.roundtrip(expected, - save_kwargs=kwds) as actual: - pass - - @requires_cftime_or_netCDF4 - def test_roundtrip_cftime_datetime_data_disable_cftimeindex(self): - from .test_coding_times import _all_cftime_date_types - - date_types = _all_cftime_date_types() - for date_type in date_types.values(): - times = [date_type(1, 1, 1), date_type(1, 1, 2)] - expected = Dataset({'t': ('t', times), 't0': times[0]}) - kwds = {'encoding': {'t0': {'units': 'days since 0001-01-01'}}} + with xr.set_options(enable_cftimeindex=True): + with self.roundtrip(expected, save_kwargs=kwds) as actual: + abs_diff = abs(actual.t.values - expected_decoded_t) + assert (abs_diff <= np.timedelta64(1, 's')).all() + assert (actual.t.encoding['units'] == + 'days since 0001-01-01 00:00:00.000000') + assert (actual.t.encoding['calendar'] == + expected_calendar) - with pytest.raises((ValueError, NotImplementedError, - TypeError)): - with xr.set_options(enable_cftimeindex=False): - with self.roundtrip( - expected, - save_kwargs=kwds) as actual: # noqa: F841 - pass + abs_diff = abs(actual.t0.values - expected_decoded_t0) + assert (abs_diff <= np.timedelta64(1, 's')).all() + assert (actual.t0.encoding['units'] == + 'days since 0001-01-01') + assert (actual.t.encoding['calendar'] == + expected_calendar) def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index dc33feff55f..c78ac038bd5 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -539,25 +539,17 @@ def test_indexing_in_dataframe_iloc(df, index): @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize('enable_cftimeindex', [False, True]) def test_concat_cftimeindex(date_type, enable_cftimeindex): - if not has_cftime and enable_cftimeindex: - with pytest.raises(ImportError): - with xr.set_options(enable_cftimeindex=enable_cftimeindex): - da1 = xr.DataArray( - [1., 2.], - coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], - dims=['time']) + with xr.set_options(enable_cftimeindex=enable_cftimeindex): + da1 = xr.DataArray( + [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], + dims=['time']) + da2 = xr.DataArray( + [3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]], + dims=['time']) + da = xr.concat([da1, da2], dim='time') + + if enable_cftimeindex and has_cftime: + assert isinstance(da.indexes['time'], CFTimeIndex) else: - with xr.set_options(enable_cftimeindex=enable_cftimeindex): - da1 = xr.DataArray( - [1., 2.], coords=[[date_type(1, 1, 1), date_type(1, 2, 1)]], - dims=['time']) - da2 = xr.DataArray( - [3., 4.], coords=[[date_type(1, 3, 1), date_type(1, 4, 1)]], - dims=['time']) - da = xr.concat([da1, da2], dim='time') - - if enable_cftimeindex: - assert isinstance(da.indexes['time'], CFTimeIndex) - else: - assert isinstance(da.indexes['time'], pd.Index) - assert not isinstance(da.indexes['time'], CFTimeIndex) + assert isinstance(da.indexes['time'], pd.Index) + assert not isinstance(da.indexes['time'], CFTimeIndex) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 019472c83bc..1d16acaaab1 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -473,11 +473,17 @@ def test_decode_non_standard_calendar_single_element_fallback( dt = cftime.datetime(2001, 2, 29) num_time = cftime.date2num(dt, units, calendar) - with pytest.warns(SerializationWarning, - match='Unable to decode time axis'): + if enable_cftimeindex: actual = coding.times.decode_cf_datetime( num_time, units, calendar=calendar, enable_cftimeindex=enable_cftimeindex) + else: + with pytest.warns(SerializationWarning, + match='Unable to decode time axis'): + actual = coding.times.decode_cf_datetime( + num_time, units, calendar=calendar, + enable_cftimeindex=enable_cftimeindex) + expected = np.asarray(cftime.num2date(num_time, units, calendar)) assert actual.dtype == np.dtype('O') assert expected == actual @@ -504,8 +510,11 @@ def test_decode_non_standard_calendar_fallback( actual = coding.times.decode_cf_datetime( num_times, units, calendar=calendar, enable_cftimeindex=enable_cftimeindex) - assert len(w) == 1 - assert 'Unable to decode time axis' in str(w[0].message) + if enable_cftimeindex: + assert len(w) == 0 + else: + assert len(w) == 1 + assert 'Unable to decode time axis' in str(w[0].message) assert actual.dtype == np.dtype('O') assert_array_equal(actual, expected) @@ -560,8 +569,7 @@ def test_infer_datetime_units(dates, expected): @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') -@pytest.mark.parametrize('enable_cftimeindex', [False, True]) -def test_infer_cftime_datetime_units(enable_cftimeindex): +def test_infer_cftime_datetime_units(): date_types = _all_cftime_date_types() for date_type in date_types.values(): for dates, expected in [ @@ -578,13 +586,7 @@ def test_infer_cftime_datetime_units(enable_cftimeindex): ([date_type(1900, 1, 1), date_type(1900, 1, 2, 0, 0, 0, 5)], 'days since 1900-01-01 00:00:00.000000')]: - if enable_cftimeindex: - with set_options(enable_cftimeindex=enable_cftimeindex): - assert expected == coding.times.infer_datetime_units(dates) - else: - with set_options(enable_cftimeindex=enable_cftimeindex): - with pytest.raises(ValueError): - coding.times.infer_datetime_units(dates) + assert expected == coding.times.infer_datetime_units(dates) @pytest.mark.parametrize( @@ -668,11 +670,12 @@ def test_decode_cf_enable_cftimeindex(calendar, enable_cftimeindex): ds = da.to_dataset() for v in ['test', 'time']: - ds[v].attrs['units'] = 'days since 2000-01-01' + ds[v].attrs['units'] = 'days since 2001-01-01' ds[v].attrs['calendar'] = calendar - if not has_cftime and enable_cftimeindex: - with pytest.raises(ImportError): + if (not has_cftime and enable_cftimeindex and + calendar not in coding.times._STANDARD_CALENDARS): + with pytest.raises(ValueError): with set_options(enable_cftimeindex=enable_cftimeindex): ds = decode_cf(ds) else: diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 3b09e93bd7f..7f3b015d0b7 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -46,25 +46,16 @@ def test_safe_cast_to_index_cftimeindex(enable_cftimeindex): for date_type in date_types.values(): dates = [date_type(1, 1, day) for day in range(1, 20)] - if not has_cftime and enable_cftimeindex: - with pytest.raises(ImportError): - with set_options(enable_cftimeindex=enable_cftimeindex): - actual = utils.safe_cast_to_index(np.array(dates)) + if enable_cftimeindex and has_cftime: + expected = CFTimeIndex(dates) else: - if enable_cftimeindex: - expected = CFTimeIndex(dates) - else: - expected = pd.Index(dates) + expected = pd.Index(dates) - with set_options(enable_cftimeindex=enable_cftimeindex): - actual = utils.safe_cast_to_index(np.array(dates)) - assert_array_equal(expected, actual) - assert expected.dtype == actual.dtype - - if enable_cftimeindex: - assert isinstance(actual, CFTimeIndex) - else: - assert isinstance(actual, pd.Index) + with set_options(enable_cftimeindex=enable_cftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) + assert_array_equal(expected, actual) + assert expected.dtype == actual.dtype + assert isinstance(actual, type(expected)) # Test that datetime.datetime objects are never used in a NetCDFTimeIndex @@ -74,15 +65,10 @@ def test_safe_cast_to_index_datetime_datetime(enable_cftimeindex): dates = [datetime(1, 1, day) for day in range(1, 20)] expected = pd.Index(dates) - if not has_cftime and enable_cftimeindex: - with pytest.raises(ImportError): - with set_options(enable_cftimeindex=enable_cftimeindex): - actual = utils.safe_cast_to_index(np.array(dates)) - else: - with set_options(enable_cftimeindex=enable_cftimeindex): - actual = utils.safe_cast_to_index(np.array(dates)) - assert_array_equal(expected, actual) - assert isinstance(actual, pd.Index) + with set_options(enable_cftimeindex=enable_cftimeindex): + actual = utils.safe_cast_to_index(np.array(dates)) + assert_array_equal(expected, actual) + assert isinstance(actual, pd.Index) def test_multiindex_from_product_levels(): From a395db0f77782d6f6763978681ec31752ed9c3c5 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 30 Apr 2018 11:20:16 -0400 Subject: [PATCH 51/58] Temporarily remove cftime from py27 build environment on windows --- ci/requirements-py27-windows.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements-py27-windows.yml b/ci/requirements-py27-windows.yml index 43b292100de..a39b24b887c 100644 --- a/ci/requirements-py27-windows.yml +++ b/ci/requirements-py27-windows.yml @@ -3,7 +3,6 @@ channels: - conda-forge dependencies: - python=2.7 - - cftime - dask - distributed - h5py From 1b00bde38c2cb2638a5d4737f0da36f7ea0532bb Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 30 Apr 2018 11:21:54 -0400 Subject: [PATCH 52/58] flake8 --- xarray/coding/cftimeindex.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index b05d221d054..20cb3dda159 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -8,8 +8,6 @@ from xarray.core import pycompat from xarray.core.utils import is_scalar -from .times import _require_standalone_cftime - def named(name, pattern): return '(?P<' + name + '>' + pattern + ')' From 5fdcd209785072f8405be682b19ae61ccb2cf173 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 30 Apr 2018 11:58:03 -0400 Subject: [PATCH 53/58] Install cftime via pip for Python 2.7 on Windows --- ci/requirements-py27-windows.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/requirements-py27-windows.yml b/ci/requirements-py27-windows.yml index a39b24b887c..7562874785b 100644 --- a/ci/requirements-py27-windows.yml +++ b/ci/requirements-py27-windows.yml @@ -20,3 +20,5 @@ dependencies: - toolz - rasterio - zarr + - pip: + - cftime From 7e9bb20b7efd9b71ec7566398c334e62267c0b37 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 30 Apr 2018 13:51:43 -0400 Subject: [PATCH 54/58] flake8 --- xarray/backends/zarr.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 341ce2f97bf..36f5df48792 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -7,7 +7,6 @@ from .. import Variable, coding, conventions from ..core import indexing -from ..core.common import contains_cftime_datetimes from ..core.pycompat import OrderedDict, integer_types, iteritems from ..core.utils import FrozenOrderedDict, HiddenKeyDict from .common import AbstractWritableDataStore, ArrayWriter, BackendArray From 247c9eb3f4cb05458c08ee3d797ca0e1a5473bed Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 1 May 2018 07:55:55 -0400 Subject: [PATCH 55/58] Remove unnecessary new lines; simplify _maybe_cast_to_cftimeindex --- xarray/__init__.py | 1 + xarray/backends/zarr.py | 1 + xarray/core/utils.py | 17 ++++++----------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/xarray/__init__.py b/xarray/__init__.py index ad8dbb28616..1a2bf3fe283 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -19,6 +19,7 @@ save_mfdataset) from .backends.rasterio_ import open_rasterio from .backends.zarr import open_zarr + from .conventions import decode_cf, SerializationWarning try: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 36f5df48792..83dcbd9a172 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -207,6 +207,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None): out : xarray.Variable A variable which has been encoded as described above. """ + var = conventions.encode_cf_variable(var, name=name) # zarr allows unicode, but not variable-length strings, so it's both diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 2259098857b..06bb3ede393 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -43,18 +43,13 @@ def _maybe_cast_to_cftimeindex(index): if not OPTIONS['enable_cftimeindex']: return index else: - try: - import cftime # noqa: F401 - except ImportError: - return index - else: - if index.dtype == 'O': - try: - return CFTimeIndex(index) - except TypeError: - return index - else: + if index.dtype == 'O': + try: + return CFTimeIndex(index) + except (ImportError, TypeError): return index + else: + return index def safe_cast_to_index(array): From e66abe922c57e0ff71d49846eedf554c47cd2d39 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 1 May 2018 08:43:37 -0400 Subject: [PATCH 56/58] Restore test case for #2002 in test_coding_times.py I must have inadvertently removed it during a merge. --- xarray/tests/test_coding_times.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 1d16acaaab1..da2c37df1df 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -43,7 +43,8 @@ (17093352.0, 'hours since 1-1-1 00:00:0.0'), ([0.5, 1.5], 'hours since 1900-01-01T00:00:00'), (0, 'milliseconds since 2000-01-01T00:00:00'), - (0, 'microseconds since 2000-01-01T00:00:00') + (0, 'microseconds since 2000-01-01T00:00:00'), + (np.int32(788961600), 'seconds since 1981-01-01') # GH2002 ] _CF_DATETIME_TESTS = [num_dates_units + (calendar,) for num_dates_units, calendar in product(_CF_DATETIME_NUM_DATES_UNITS, From f25b0b65270fa30ea75af6b0244a0e729d8fa468 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 1 May 2018 21:06:55 -0400 Subject: [PATCH 57/58] Tweak dates out of range warning logic slightly to preserve current default --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 81854278dd8..61314d9cbe6 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -96,7 +96,7 @@ def _decode_datetime_with_cftime(num_dates, units, calendar, if (dates[np.nanargmin(num_dates)].year < 1678 or dates[np.nanargmax(num_dates)].year >= 2262): - if calendar in _STANDARD_CALENDARS: + if not enable_cftimeindex or calendar in _STANDARD_CALENDARS: warnings.warn( 'Unable to decode time axis into full ' 'numpy.datetime64 objects, continuing using dummy ' From c318755b51c5dab4008a6f48d0afdc80bbd6bea6 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 12 May 2018 09:24:16 -0400 Subject: [PATCH 58/58] Address review comments --- xarray/coding/cftimeindex.py | 7 +--- xarray/core/common.py | 15 ++++--- xarray/plot/plot.py | 8 +++- xarray/tests/test_coding_times.py | 69 ++++++++++++++++++++++++++++++- xarray/tests/test_dataarray.py | 19 +++++++-- xarray/tests/test_plot.py | 24 ++++++++++- xarray/tests/test_utils.py | 2 +- 7 files changed, 125 insertions(+), 19 deletions(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 20cb3dda159..fb51ace5d69 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -122,14 +122,9 @@ def get_date_type(self): def assert_all_valid_date_type(data): import cftime - valid_types = (cftime.DatetimeJulian, cftime.DatetimeNoLeap, - cftime.DatetimeAllLeap, cftime.DatetimeGregorian, - cftime.DatetimeProlepticGregorian, - cftime.Datetime360Day) - sample = data[0] date_type = type(sample) - if not isinstance(sample, valid_types): + if not isinstance(sample, cftime.datetime): raise TypeError( 'CFTimeIndex requires cftime.datetime ' 'objects. Got object of {}.'.format(date_type)) diff --git a/xarray/core/common.py b/xarray/core/common.py index 62cd0ac0765..fa194554166 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -884,12 +884,15 @@ def contains_cftime_datetimes(var): except ImportError: return False else: - sample = var.data.ravel()[0] - if isinstance(sample, dask_array_type): - sample = sample.compute() - if isinstance(sample, np.ndarray): - sample = sample.item() - return isinstance(sample, cftime_datetime) + if var.dtype == np.dtype('O') and var.data.size > 0: + sample = var.data.ravel()[0] + if isinstance(sample, dask_array_type): + sample = sample.compute() + if isinstance(sample, np.ndarray): + sample = sample.item() + return isinstance(sample, cftime_datetime) + else: + return False def _contains_datetime_like_objects(var): diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 94ddc8c0535..1266dda33af 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -14,6 +14,7 @@ import numpy as np import pandas as pd +from xarray.core.common import contains_cftime_datetimes from xarray.core.pycompat import basestring from .facetgrid import FacetGrid @@ -53,7 +54,8 @@ def _ensure_plottable(*args): if not (_valid_numpy_subdtype(np.array(x), numpy_types) or _valid_other_type(np.array(x), other_types)): raise TypeError('Plotting requires coordinates to be numeric ' - 'or dates.') + 'or dates of type np.datetime64 or ' + 'datetime.datetime.') def _easy_facetgrid(darray, plotfunc, x, y, row=None, col=None, @@ -120,6 +122,10 @@ def plot(darray, row=None, col=None, col_wrap=None, ax=None, rtol=0.01, """ darray = darray.squeeze() + if contains_cftime_datetimes(darray): + raise NotImplementedError('Plotting arrays of cftime.datetime objects ' + 'is currently not possible.') + plot_dims = set(darray.dims) plot_dims.discard(row) plot_dims.discard(col) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index da2c37df1df..7c1e869f772 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -10,9 +10,10 @@ from xarray import Variable, coding, set_options, DataArray, decode_cf from xarray.coding.times import _import_cftime from xarray.coding.variables import SerializationWarning +from xarray.core.common import contains_cftime_datetimes from . import (assert_array_equal, has_cftime_or_netCDF4, - requires_cftime_or_netCDF4, has_cftime) + requires_cftime_or_netCDF4, has_cftime, has_dask) _NON_STANDARD_CALENDARS = {'noleap', '365_day', '360_day', @@ -688,3 +689,69 @@ def test_decode_cf_enable_cftimeindex(calendar, enable_cftimeindex): assert ds.test.dtype == np.dtype('O') else: assert ds.test.dtype == np.dtype('M8[ns]') + + +@pytest.fixture(params=_ALL_CALENDARS) +def calendar(request): + return request.param + + +@pytest.fixture() +def times(calendar): + cftime = _import_cftime() + + return cftime.num2date( + np.arange(4), units='hours since 2000-01-01', calendar=calendar, + only_use_cftime_datetimes=True) + + +@pytest.fixture() +def data(times): + data = np.random.rand(2, 2, 4) + lons = np.linspace(0, 11, 2) + lats = np.linspace(0, 20, 2) + return DataArray(data, coords=[lons, lats, times], + dims=['lon', 'lat', 'time'], name='data') + + +@pytest.fixture() +def times_3d(times): + lons = np.linspace(0, 11, 2) + lats = np.linspace(0, 20, 2) + times_arr = np.random.choice(times, size=(2, 2, 4)) + return DataArray(times_arr, coords=[lons, lats, times], + dims=['lon', 'lat', 'time'], + name='data') + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains_cftime_datetimes_1d(data): + assert contains_cftime_datetimes(data.time) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains_cftime_datetimes_dask_1d(data): + assert contains_cftime_datetimes(data.time.chunk()) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains_cftime_datetimes_3d(times_3d): + assert contains_cftime_datetimes(times_3d) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_contains_cftime_datetimes_dask_3d(times_3d): + assert contains_cftime_datetimes(times_3d.chunk()) + + +@pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])]) +def test_contains_cftime_datetimes_non_cftimes(non_cftime_data): + assert not contains_cftime_datetimes(non_cftime_data) + + +@pytest.mark.skipif(not has_dask, reason='dask not installed') +@pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])]) +def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data): + assert not contains_cftime_datetimes(non_cftime_data.chunk()) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 32ab3a634cb..3d2f8cbbf4f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -11,14 +11,14 @@ import xarray as xr from xarray import ( - DataArray, Dataset, IndexVariable, Variable, align, broadcast) -from xarray.coding.times import CFDatetimeCoder + DataArray, Dataset, IndexVariable, Variable, align, broadcast, set_options) +from xarray.coding.times import CFDatetimeCoder, _import_cftime from xarray.core.common import full_like from xarray.core.pycompat import OrderedDict, iteritems from xarray.tests import ( ReturnItem, TestCase, assert_allclose, assert_array_equal, assert_equal, assert_identical, raises_regex, requires_bottleneck, requires_dask, - requires_scipy, source_ndarray, unittest) + requires_scipy, source_ndarray, unittest, requires_cftime) class TestDataArray(TestCase): @@ -2208,6 +2208,19 @@ def test_resample(self): with raises_regex(ValueError, 'index must be monotonic'): array[[2, 0, 1]].resample(time='1D') + @requires_cftime + def test_resample_cftimeindex(self): + cftime = _import_cftime() + times = cftime.num2date(np.arange(12), units='hours since 0001-01-01', + calendar='noleap') + with set_options(enable_cftimeindex=True): + array = DataArray(np.arange(12), [('time', times)]) + + with raises_regex(TypeError, + 'Only valid with DatetimeIndex, ' + 'TimedeltaIndex or PeriodIndex'): + array.resample(time='6H').mean() + def test_resample_first(self): times = pd.date_range('2000-01-01', freq='6H', periods=10) array = DataArray(np.arange(10), [('time', times)]) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 2a5eeb86bdd..4d32ea429ca 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -9,6 +9,7 @@ import xarray.plot as xplt from xarray import DataArray +from xarray.coding.times import _import_cftime from xarray.plot.plot import _infer_interval_breaks from xarray.plot.utils import ( _build_discrete_cmap, _color_palette, _determine_cmap_params, @@ -16,7 +17,7 @@ from . import ( TestCase, assert_array_equal, assert_equal, raises_regex, - requires_matplotlib, requires_seaborn) + requires_matplotlib, requires_seaborn, requires_cftime) # import mpl and change the backend before other mpl imports try: @@ -1488,3 +1489,24 @@ def test_plot_seaborn_no_import_warning(): with pytest.warns(None) as record: _color_palette('Blues', 4) assert len(record) == 0 + + +@requires_cftime +def test_plot_cftime_coordinate_error(): + cftime = _import_cftime() + time = cftime.num2date(np.arange(5), units='days since 0001-01-01', + calendar='noleap') + data = DataArray(np.arange(5), coords=[time], dims=['time']) + with raises_regex(TypeError, + 'requires coordinates to be numeric or dates'): + data.plot() + + +@requires_cftime +def test_plot_cftime_data_error(): + cftime = _import_cftime() + data = cftime.num2date(np.arange(5), units='days since 0001-01-01', + calendar='noleap') + data = DataArray(data, coords=[np.arange(5)], dims=['x']) + with raises_regex(NotImplementedError, 'cftime.datetime'): + data.plot() diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 7f3b015d0b7..0b3b0ee7dd6 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -58,7 +58,7 @@ def test_safe_cast_to_index_cftimeindex(enable_cftimeindex): assert isinstance(actual, type(expected)) -# Test that datetime.datetime objects are never used in a NetCDFTimeIndex +# Test that datetime.datetime objects are never used in a CFTimeIndex @pytest.mark.skipif(not has_cftime_or_netCDF4, reason='cftime not installed') @pytest.mark.parametrize('enable_cftimeindex', [False, True]) def test_safe_cast_to_index_datetime_datetime(enable_cftimeindex):