From 73e417e33b6f8f57d67404ed31ffe9c15717ab9c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Jan 2024 11:55:02 -0800 Subject: [PATCH 1/5] Allow Z in datetime string parsing in non pandas compat mode --- python/cudf/cudf/core/column/datetime.py | 5 ++++- python/cudf/cudf/tests/test_datetime.py | 21 ++++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 7980b58ab8b..466ea3220c8 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. from __future__ import annotations @@ -102,6 +102,9 @@ def infer_format(element: str, **kwargs) -> str: """ Infers datetime format from a string, also takes cares for `ms` and `ns` """ + if not cudf.get_option("mode.pandas_compatible"): + # We allow "Z" but don't localize it to datetime64[ns, UTC] type (yet) + element = element.replace("Z", "") fmt = _guess_datetime_format(element, **kwargs) if fmt is not None: diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 07c8c407ab9..957870863c3 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. import datetime import operator @@ -1306,8 +1306,9 @@ def test_datetime_infer_format(data, timezone, dtype): assert_eq(expected, actual) else: - with pytest.raises(NotImplementedError): - sr.astype(dtype) + with cudf.option_context("mode.pandas_compatible", True): + with pytest.raises(NotImplementedError): + sr.astype(dtype) def test_dateoffset_instance_subclass_check(): @@ -2308,12 +2309,22 @@ def test_format_timezone_not_implemented(code): ) -@pytest.mark.parametrize("tz", ["Z", "UTC-3", "+01:00"]) -def test_no_format_timezone_not_implemented(tz): +@pytest.mark.parametrize("tz", ["UTC-3", "+01:00"]) +def test_utcoffset_not_implemented(tz): with pytest.raises(NotImplementedError): cudf.to_datetime([f"2020-01-01 00:00:00{tz}"]) +def test_Z_utcoffset(): + if cudf.get_option("mode.pandas_compatible"): + with pytest.raises(NotImplementedError): + cudf.to_datetime(["2020-01-01 00:00:00Z"]) + else: + result = cudf.to_datetime(["2020-01-01 00:00:00Z"]) + expected = cudf.to_datetime(["2020-01-01 00:00:00"]) + assert_eq(result, expected) + + @pytest.mark.parametrize("arg", [True, False]) def test_args_not_datetime_typerror(arg): with pytest.raises(TypeError): From c2d10d8e17fbd255879eed8a25e8eb95abf436f1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Jan 2024 12:20:27 -0800 Subject: [PATCH 2/5] Adjust tests --- python/cudf/cudf/tests/test_datetime.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 957870863c3..c126a893f19 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1310,6 +1310,8 @@ def test_datetime_infer_format(data, timezone, dtype): with pytest.raises(NotImplementedError): sr.astype(dtype) + # pandas doesn't allow parsing "Z" to naive type + def test_dateoffset_instance_subclass_check(): assert not issubclass(pd.DateOffset, cudf.DateOffset) @@ -2316,13 +2318,13 @@ def test_utcoffset_not_implemented(tz): def test_Z_utcoffset(): - if cudf.get_option("mode.pandas_compatible"): + with cudf.option_context("mode.pandas_compatible", True): with pytest.raises(NotImplementedError): cudf.to_datetime(["2020-01-01 00:00:00Z"]) - else: - result = cudf.to_datetime(["2020-01-01 00:00:00Z"]) - expected = cudf.to_datetime(["2020-01-01 00:00:00"]) - assert_eq(result, expected) + + result = cudf.to_datetime(["2020-01-01 00:00:00Z"]) + expected = cudf.to_datetime(["2020-01-01 00:00:00"]) + assert_eq(result, expected) @pytest.mark.parametrize("arg", [True, False]) From 71008001e0b0c8a8df2e357360c9830bc3f55575 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:35:48 -0800 Subject: [PATCH 3/5] Update python/cudf/cudf/tests/test_datetime.py Co-authored-by: Bradley Dice --- python/cudf/cudf/tests/test_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index c126a893f19..e7a34565333 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -2312,7 +2312,7 @@ def test_format_timezone_not_implemented(code): @pytest.mark.parametrize("tz", ["UTC-3", "+01:00"]) -def test_utcoffset_not_implemented(tz): +def test_utc_offset_not_implemented(tz): with pytest.raises(NotImplementedError): cudf.to_datetime([f"2020-01-01 00:00:00{tz}"]) From 0462c430019269dcee28e118f5a26acf4664b0e2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:35:58 -0800 Subject: [PATCH 4/5] Update python/cudf/cudf/tests/test_datetime.py Co-authored-by: Bradley Dice --- python/cudf/cudf/tests/test_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index e7a34565333..680b84696b4 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -2317,7 +2317,7 @@ def test_utc_offset_not_implemented(tz): cudf.to_datetime([f"2020-01-01 00:00:00{tz}"]) -def test_Z_utcoffset(): +def test_Z_utc_offset(): with cudf.option_context("mode.pandas_compatible", True): with pytest.raises(NotImplementedError): cudf.to_datetime(["2020-01-01 00:00:00Z"]) From 5641fb570b98f551c0d4051cb8e12999cfe6b4b9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 3 Jan 2024 15:37:15 -0800 Subject: [PATCH 5/5] move comment --- python/cudf/cudf/tests/test_datetime.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 680b84696b4..22d452fdda5 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1308,10 +1308,9 @@ def test_datetime_infer_format(data, timezone, dtype): else: with cudf.option_context("mode.pandas_compatible", True): with pytest.raises(NotImplementedError): + # pandas doesn't allow parsing "Z" to naive type sr.astype(dtype) - # pandas doesn't allow parsing "Z" to naive type - def test_dateoffset_instance_subclass_check(): assert not issubclass(pd.DateOffset, cudf.DateOffset)