From 05447ad5710b272b2dc985ed9cad7800650ad2a7 Mon Sep 17 00:00:00 2001 From: shaneding Date: Tue, 10 Aug 2021 19:11:58 -0400 Subject: [PATCH] Python binding for quarters (#8862) Closes #8676. This PR adds python bindings for #8779. Authors: - https://github.com/shaneding Approvers: - Michael Wang (https://github.com/isVoid) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8862 --- python/cudf/cudf/_lib/cpp/datetime.pxd | 1 + python/cudf/cudf/_lib/datetime.pyx | 14 +++++++++++ python/cudf/cudf/core/index.py | 27 +++++++++++++++++++- python/cudf/cudf/core/series.py | 30 ++++++++++++++++++++-- python/cudf/cudf/tests/test_datetime.py | 33 +++++++++++++++++++++++++ 5 files changed, 102 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index b8cac6cd42f..26d25e3017e 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -18,4 +18,5 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: ) except + cdef unique_ptr[column] day_of_year(const column_view& column) except + cdef unique_ptr[column] is_leap_year(const column_view& column) except + + cdef unique_ptr[column] extract_quarter(const column_view& column) except + cdef unique_ptr[column] days_in_month(const column_view& column) except + diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3a1c3ebbf5e..51ceb7c0d8a 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -71,6 +71,20 @@ def is_leap_year(Column col): return Column.from_unique_ptr(move(c_result)) +def extract_quarter(Column col): + """ + Returns a column which contains the corresponding quarter of the year + for every timestamp inside the input column. + """ + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + + with nogil: + c_result = move(libcudf_datetime.extract_quarter(col_view)) + + return Column.from_unique_ptr(move(c_result)) + + def days_in_month(Column col): """Extracts the number of days in the month of the date """ diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index b3ca6f7973b..9ed756547bb 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -13,7 +13,7 @@ from pandas._config import get_option import cudf -from cudf._lib.datetime import is_leap_year +from cudf._lib.datetime import extract_quarter, is_leap_year from cudf._lib.filling import sequence from cudf._lib.search import search_sorted from cudf._lib.table import Table @@ -2493,6 +2493,31 @@ def is_leap_year(self): res = is_leap_year(self._values).fillna(False) return cupy.asarray(res) + @property + def quarter(self): + """ + Integer indicator for which quarter of the year the date belongs in. + + There are 4 quarters in a year. With the first quarter being from + January - March, second quarter being April - June, third quarter + being July - September and fourth quarter being October - December. + + Returns + ------- + Int8Index + Integer indicating which quarter the date belongs to. + + Examples + -------- + >>> import cudf + >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00", + ... "1999-12-31 18:40:00"]) + >>> gIndex.quarter + Int8Index([2, 4], dtype='int8') + """ + res = extract_quarter(self._values) + return Int8Index(res, dtype="int8") + def to_pandas(self): nanos = self._values.astype("datetime64[ns]") return pd.DatetimeIndex(nanos.to_pandas(), name=self.name) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index e7a58be62b5..dd83b69b459 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5945,13 +5945,39 @@ def is_leap_year(self): ) @property - def is_month_start(self): + def quarter(self): """ - Boolean indicator if the date is the first day of the month. + Integer indicator for which quarter of the year the date belongs in. + + There are 4 quarters in a year. With the first quarter being from + January - March, second quarter being April - June, third quarter + being July - September and fourth quarter being October - December. Returns ------- Series + Integer indicating which quarter the date belongs to. + + Examples + ------- + >>> import cudf + >>> s = cudf.Series(["2020-05-31 08:00:00","1999-12-31 18:40:00"], + ... dtype="datetime64[ms]") + >>> s.dt.quarter + 0 2 + 1 4 + dtype: int8 + """ + res = libcudf.datetime.extract_quarter(self.series._column).astype( + np.int8 + ) + return Series._from_data( + {None: res}, index=self.series._index, name=self.series.name, + ) + + @property + def is_month_start(self): + """ Booleans indicating if dates are the first day of the month. """ return (self.day == 1).fillna(False) diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 0c2dfb0d268..904595ad5a5 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1300,6 +1300,39 @@ def test_is_leap_year(): assert_eq(expect2, got2) +def test_quarter(): + data = [ + "2020-05-31 08:00:00", + "1999-12-31 18:40:00", + "2000-12-31 04:00:00", + "1900-02-28 07:00:00", + "1800-03-14 07:30:00", + "2100-03-14 07:30:00", + "1970-01-01 00:00:00", + "1969-12-31 12:59:00", + ] + dtype = "datetime64[s]" + + # Series + ps = pd.Series(data, dtype=dtype) + gs = cudf.from_pandas(ps) + + expect = ps.dt.quarter + got = gs.dt.quarter + + assert_eq(expect, got, check_dtype=False) + + # DatetimeIndex + pIndex = pd.DatetimeIndex(data) + gIndex = cudf.from_pandas(pIndex) + + expect2 = pIndex.quarter + got2 = gIndex.quarter + + assert isinstance(got2, cudf.Int8Index) + assert_eq(expect2.values, got2.values, check_dtype=False) + + @pytest.mark.parametrize("dtype", DATETIME_TYPES) def test_days_in_months(dtype): nrows = 1000