Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python binding for quarters #8862

Merged
merged 23 commits into from
Aug 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/cpp/datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
) except +
cdef unique_ptr[column] day_of_year(const column_view& column) except +
cdef unique_ptr[column] is_leap_year(const column_view& column) except +
cdef unique_ptr[column] extract_quarter(const column_view& column) except +
cdef unique_ptr[column] days_in_month(const column_view& column) except +
14 changes: 14 additions & 0 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,20 @@ def is_leap_year(Column col):
return Column.from_unique_ptr(move(c_result))


def extract_quarter(Column col):
shaneding marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns a column which contains the corresponding quarter of the year
for every timestamp inside the input column.
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.extract_quarter(col_view))

return Column.from_unique_ptr(move(c_result))


def days_in_month(Column col):
"""Extracts the number of days in the month of the date
"""
Expand Down
27 changes: 26 additions & 1 deletion python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pandas._config import get_option

import cudf
from cudf._lib.datetime import is_leap_year
from cudf._lib.datetime import extract_quarter, is_leap_year
from cudf._lib.filling import sequence
from cudf._lib.search import search_sorted
from cudf._lib.table import Table
Expand Down Expand Up @@ -2283,6 +2283,31 @@ def is_leap_year(self):
res = is_leap_year(self._values).fillna(False)
return cupy.asarray(res)

@property
def quarter(self):
"""
Integer indicator for which quarter of the year the date belongs in.

There are 4 quarters in a year. With the first quarter being from
January - March, second quarter being April - June, third quarter
being July - September and fourth quarter being October - December.

Returns
-------
Int8Index
Integer indicating which quarter the date belongs to.

Examples
--------
>>> import cudf
>>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00",
... "1999-12-31 18:40:00"])
>>> gIndex.quarter
Int8Index([2, 4], dtype='int8')
"""
res = extract_quarter(self._values)
return Int8Index(res, dtype="int8")

def to_pandas(self):
nanos = self._values.astype("datetime64[ns]")
return pd.DatetimeIndex(nanos.to_pandas(), name=self.name)
Expand Down
30 changes: 28 additions & 2 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5944,13 +5944,39 @@ def is_leap_year(self):
)

@property
def is_month_start(self):
def quarter(self):
"""
Boolean indicator if the date is the first day of the month.
Integer indicator for which quarter of the year the date belongs in.

There are 4 quarters in a year. With the first quarter being from
January - March, second quarter being April - June, third quarter
being July - September and fourth quarter being October - December.

Returns
-------
Series
Integer indicating which quarter the date belongs to.

Examples
-------
>>> import cudf
>>> s = cudf.Series(["2020-05-31 08:00:00","1999-12-31 18:40:00"],
... dtype="datetime64[ms]")
>>> s.dt.quarter
0 2
1 4
dtype: int8
"""
res = libcudf.datetime.extract_quarter(self.series._column).astype(
np.int8
)
return Series._from_data(
{None: res}, index=self.series._index, name=self.series.name,
)

@property
def is_month_start(self):
"""
Booleans indicating if dates are the first day of the month.
"""
return (self.day == 1).fillna(False)
Expand Down
33 changes: 33 additions & 0 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1300,6 +1300,39 @@ def test_is_leap_year():
assert_eq(expect2, got2)


def test_quarter():
data = [
"2020-05-31 08:00:00",
"1999-12-31 18:40:00",
"2000-12-31 04:00:00",
"1900-02-28 07:00:00",
"1800-03-14 07:30:00",
"2100-03-14 07:30:00",
"1970-01-01 00:00:00",
"1969-12-31 12:59:00",
]
dtype = "datetime64[s]"

# Series
ps = pd.Series(data, dtype=dtype)
gs = cudf.from_pandas(ps)

expect = ps.dt.quarter
got = gs.dt.quarter

assert_eq(expect, got, check_dtype=False)

# DatetimeIndex
pIndex = pd.DatetimeIndex(data)
gIndex = cudf.from_pandas(pIndex)

expect2 = pIndex.quarter
got2 = gIndex.quarter

assert isinstance(got2, cudf.Int8Index)
assert_eq(expect2.values, got2.values, check_dtype=False)


@pytest.mark.parametrize("dtype", DATETIME_TYPES)
def test_days_in_months(dtype):
nrows = 1000
Expand Down