Skip to content

Commit

Permalink
Python binding for quarters (#8862)
Browse files Browse the repository at this point in the history
Closes #8676.
This PR adds python bindings for #8779.

Authors:
  - https://github.com/shaneding

Approvers:
  - Michael Wang (https://github.com/isVoid)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #8862
  • Loading branch information
shaneding authored Aug 10, 2021
1 parent 7d892d1 commit 05447ad
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 3 deletions.
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/cpp/datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
) except +
cdef unique_ptr[column] day_of_year(const column_view& column) except +
cdef unique_ptr[column] is_leap_year(const column_view& column) except +
cdef unique_ptr[column] extract_quarter(const column_view& column) except +
cdef unique_ptr[column] days_in_month(const column_view& column) except +
14 changes: 14 additions & 0 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,20 @@ def is_leap_year(Column col):
return Column.from_unique_ptr(move(c_result))


def extract_quarter(Column col):
"""
Returns a column which contains the corresponding quarter of the year
for every timestamp inside the input column.
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.extract_quarter(col_view))

return Column.from_unique_ptr(move(c_result))


def days_in_month(Column col):
"""Extracts the number of days in the month of the date
"""
Expand Down
27 changes: 26 additions & 1 deletion python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pandas._config import get_option

import cudf
from cudf._lib.datetime import is_leap_year
from cudf._lib.datetime import extract_quarter, is_leap_year
from cudf._lib.filling import sequence
from cudf._lib.search import search_sorted
from cudf._lib.table import Table
Expand Down Expand Up @@ -2493,6 +2493,31 @@ def is_leap_year(self):
res = is_leap_year(self._values).fillna(False)
return cupy.asarray(res)

@property
def quarter(self):
"""
Integer indicator for which quarter of the year the date belongs in.
There are 4 quarters in a year. With the first quarter being from
January - March, second quarter being April - June, third quarter
being July - September and fourth quarter being October - December.
Returns
-------
Int8Index
Integer indicating which quarter the date belongs to.
Examples
--------
>>> import cudf
>>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00",
... "1999-12-31 18:40:00"])
>>> gIndex.quarter
Int8Index([2, 4], dtype='int8')
"""
res = extract_quarter(self._values)
return Int8Index(res, dtype="int8")

def to_pandas(self):
nanos = self._values.astype("datetime64[ns]")
return pd.DatetimeIndex(nanos.to_pandas(), name=self.name)
Expand Down
30 changes: 28 additions & 2 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5945,13 +5945,39 @@ def is_leap_year(self):
)

@property
def is_month_start(self):
def quarter(self):
"""
Boolean indicator if the date is the first day of the month.
Integer indicator for which quarter of the year the date belongs in.
There are 4 quarters in a year. With the first quarter being from
January - March, second quarter being April - June, third quarter
being July - September and fourth quarter being October - December.
Returns
-------
Series
Integer indicating which quarter the date belongs to.
Examples
-------
>>> import cudf
>>> s = cudf.Series(["2020-05-31 08:00:00","1999-12-31 18:40:00"],
... dtype="datetime64[ms]")
>>> s.dt.quarter
0 2
1 4
dtype: int8
"""
res = libcudf.datetime.extract_quarter(self.series._column).astype(
np.int8
)
return Series._from_data(
{None: res}, index=self.series._index, name=self.series.name,
)

@property
def is_month_start(self):
"""
Booleans indicating if dates are the first day of the month.
"""
return (self.day == 1).fillna(False)
Expand Down
33 changes: 33 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1300,6 +1300,39 @@ def test_is_leap_year():
assert_eq(expect2, got2)


def test_quarter():
data = [
"2020-05-31 08:00:00",
"1999-12-31 18:40:00",
"2000-12-31 04:00:00",
"1900-02-28 07:00:00",
"1800-03-14 07:30:00",
"2100-03-14 07:30:00",
"1970-01-01 00:00:00",
"1969-12-31 12:59:00",
]
dtype = "datetime64[s]"

# Series
ps = pd.Series(data, dtype=dtype)
gs = cudf.from_pandas(ps)

expect = ps.dt.quarter
got = gs.dt.quarter

assert_eq(expect, got, check_dtype=False)

# DatetimeIndex
pIndex = pd.DatetimeIndex(data)
gIndex = cudf.from_pandas(pIndex)

expect2 = pIndex.quarter
got2 = gIndex.quarter

assert isinstance(got2, cudf.Int8Index)
assert_eq(expect2.values, got2.values, check_dtype=False)


@pytest.mark.parametrize("dtype", DATETIME_TYPES)
def test_days_in_months(dtype):
nrows = 1000
Expand Down

0 comments on commit 05447ad

Please sign in to comment.