From 6c37e2803350777273520942f9c7d1d33c74ae5c Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Wed, 18 Sep 2024 19:47:33 -0400 Subject: [PATCH 1/2] Fix pandas datetime decoding with np.int32 values and NumPy >= 2 Thanks @langmore for noting this issue and suggesting this workaround. --- doc/whats-new.rst | 6 +++++- xarray/coding/times.py | 9 +++++++++ xarray/tests/test_coding_times.py | 27 +++++++++++++++++++-------- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 387b4bdf6b5..8d4bb6143ca 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,7 +47,11 @@ Bug fixes - Make illegal path-like variable names when constructing a DataTree from a Dataset (:issue:`9339`, :pull:`9378`) By `Etienne Schalk `_. - +- Work around `upstream pandas issue + `_ to ensure that we can + decode times encoded with ``np.int32`` values in environments with NumPy 2.0 + or greater without needing to fall back to cftime (:pull:`9518`). By `Spencer + Clark `_. Documentation diff --git a/xarray/coding/times.py b/xarray/coding/times.py index cfdecd28a27..72fb69e1592 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -254,6 +254,15 @@ def _decode_datetime_with_pandas( "pandas." ) + # Work around pandas.to_timedelta issue with dtypes smaller than int64 and + # NumPy 2.0 by casting all int and uint data to int64 and uint64, + # respectively. See https://github.com/pandas-dev/pandas/issues/56996 for + # more details. + if flat_num_dates.dtype.kind == "i": + flat_num_dates = flat_num_dates.astype(np.int64) + elif flat_num_dates.dtype.kind == "u": + flat_num_dates = flat_num_dates.astype(np.uint64) + time_units, ref_date_str = _unpack_netcdf_time_units(units) time_units = _netcdf_to_numpy_timeunit(time_units) try: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 5879e6beed8..8b90b2fad83 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd import pytest -from pandas.errors import OutOfBoundsDatetime +from pandas.errors import OutOfBoundsDatetime, OutOfBoundsTimedelta from xarray import ( DataArray, @@ -1136,11 +1136,16 @@ def test_should_cftime_be_used_target_not_npable(): _should_cftime_be_used(src, "noleap", False) -@pytest.mark.parametrize("dtype", [np.uint8, np.uint16, np.uint32, np.uint64]) -def test_decode_cf_datetime_uint(dtype): +@pytest.mark.parametrize( + "dtype", + [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64], +) +def test_decode_cf_datetime_varied_integer_dtypes(dtype): units = "seconds since 2018-08-22T03:23:03Z" num_dates = dtype(50) - result = decode_cf_datetime(num_dates, units) + # Set use_cftime=False to ensure we cannot mask a failure by falling back + # to cftime. + result = decode_cf_datetime(num_dates, units, use_cftime=False) expected = np.asarray(np.datetime64("2018-08-22T03:23:53", "ns")) np.testing.assert_equal(result, expected) @@ -1154,6 +1159,14 @@ def test_decode_cf_datetime_uint64_with_cftime(): np.testing.assert_equal(result, expected) +def test_decode_cf_datetime_uint64_with_pandas_overflow_error(): + units = "nanoseconds since 1970-01-01" + calendar = "standard" + num_dates = np.uint64(1_000_000 * 86_400 * 360 * 500_000) + with pytest.raises(OutOfBoundsTimedelta): + decode_cf_datetime(num_dates, units, calendar, use_cftime=False) + + @requires_cftime def test_decode_cf_datetime_uint64_with_cftime_overflow_error(): units = "microseconds since 1700-01-01" @@ -1416,10 +1429,8 @@ def test_roundtrip_float_times() -> None: "days since 1700-01-01", np.dtype("int32"), ), - "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": ( - "250YS", - "days since 1700-01-01", - np.dtype("int32"), + "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": pytest.param( + "250YS", "days since 1700-01-01", np.dtype("int32"), marks=requires_cftime ), "pandas-encoding-with-default-units-and-dtype": ("250YS", None, None), } From ca0a0b388faddc30f43fedf14abf754c408e0f0f Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Wed, 18 Sep 2024 20:00:14 -0400 Subject: [PATCH 2/2] Refine what's new entry --- doc/whats-new.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 27e4fa8d088..e4b2a06a3e7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,9 +49,9 @@ Bug fixes By `Etienne Schalk `_. - Work around `upstream pandas issue `_ to ensure that we can - decode times encoded with ``np.int32`` values in environments with NumPy 2.0 - or greater without needing to fall back to cftime (:pull:`9518`). By `Spencer - Clark `_. + decode times encoded with small integer dtype values (e.g. ``np.int32``) in + environments with NumPy 2.0 or greater without needing to fall back to cftime + (:pull:`9518`). By `Spencer Clark `_. - Fix bug when encoding times with missing values as floats in the case when the non-missing times could in theory be encoded with integers (:issue:`9488`, :pull:`9497`). By `Spencer Clark