diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 388d4d67340..e4b2a06a3e7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,11 @@ Bug fixes - Make illegal path-like variable names when constructing a DataTree from a Dataset (:issue:`9339`, :pull:`9378`) By `Etienne Schalk `_. +- Work around `upstream pandas issue + `_ to ensure that we can + decode times encoded with small integer dtype values (e.g. ``np.int32``) in + environments with NumPy 2.0 or greater without needing to fall back to cftime + (:pull:`9518`). By `Spencer Clark `_. - Fix bug when encoding times with missing values as floats in the case when the non-missing times could in theory be encoded with integers (:issue:`9488`, :pull:`9497`). By `Spencer Clark diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 5655bd20afc..2b9a5cf7de2 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -254,6 +254,15 @@ def _decode_datetime_with_pandas( "pandas." ) + # Work around pandas.to_timedelta issue with dtypes smaller than int64 and + # NumPy 2.0 by casting all int and uint data to int64 and uint64, + # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for + # more details. + if flat_num_dates.dtype.kind == "i": + flat_num_dates = flat_num_dates.astype(np.int64) + elif flat_num_dates.dtype.kind == "u": + flat_num_dates = flat_num_dates.astype(np.uint64) + time_units, ref_date_str = _unpack_netcdf_time_units(units) time_units = _netcdf_to_numpy_timeunit(time_units) try: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 66caf25cc73..bb0dd1dd25c 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd import pytest -from pandas.errors import OutOfBoundsDatetime +from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta from xarray import ( DataArray, @@ -1136,11 +1136,16 @@ def test_should_cftime_be_used_target_not_npable(): _should_cftime_be_used(src, "noleap", False) -@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64]) -def test_decode_cf_datetime_uint(dtype): +@pytest.mark.parametrize( + "dtype", + [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64], +) +def test_decode_cf_datetime_varied_integer_dtypes(dtype): units = "seconds since 2018-08-22T03:23:03Z" num_dates = dtype(50) - result = decode_cf_datetime(num_dates, units) + # Set use_cftime=False to ensure we cannot mask a failure by falling back + # to cftime. + result = decode_cf_datetime(num_dates, units, use_cftime=False) expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns")) np.testing.assert_equal(result, expected) @@ -1154,6 +1159,14 @@ def test_decode_cf_datetime_uint64_with_cftime(): np.testing.assert_equal(result, expected) +def test_decode_cf_datetime_uint64_with_pandas_overflow_error(): + units = "nanoseconds since 1970-01-01" + calendar = "standard" + num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000) + with pytest.raises(OutOfBoundsTimedelta): + decode_cf_datetime(num_dates, units, calendar, use_cftime=False) + + @requires_cftime def test_decode_cf_datetime_uint64_with_cftime_overflow_error(): units = "microseconds since 1700-01-01" @@ -1438,10 +1451,8 @@ def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None "days since 1700-01-01", np.dtype("int32"), ), - "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": ( - "250YS", - "days since 1700-01-01", - np.dtype("int32"), + "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": pytest.param( + "250YS", "days since 1700-01-01", np.dtype("int32"), marks=requires_cftime ), "pandas-encoding-with-default-units-and-dtype": ("250YS", None, None), }