Skip to content

Commit

Permalink
Implement Series.datetime.floor (#9488)
Browse files Browse the repository at this point in the history
Fixes: #7102

Authors:
  - Sheilah Kirui (https://github.com/skirui-source)
  - Mayank Anand (https://github.com/mayankanand007)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Ashwin Srinath (https://github.com/shwina)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #9488
  • Loading branch information
skirui-source authored Oct 29, 2021
1 parent f41e05f commit 201f750
Show file tree
Hide file tree
Showing 9 changed files with 383 additions and 6 deletions.
93 changes: 92 additions & 1 deletion cpp/include/cudf/datetime.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -378,5 +378,96 @@ std::unique_ptr<column> ceil_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest day
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_day(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest hour
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_hour(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest minute
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_minute(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest second
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<cudf::column> floor_second(
cudf::column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest millisecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_millisecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest microsecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_microsecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Round down to the nearest nanosecond
*
* @param column cudf::column_view of the input datetime values
* @param mr Device memory resource used to allocate device memory of the returned column.
*
* @throw cudf::logic_error if input column datatype is not TIMESTAMP
* @return cudf::column of the same datetime resolution as the input column
*/
std::unique_ptr<column> floor_nanosecond(
column_view const& column,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace datetime
} // namespace cudf
97 changes: 95 additions & 2 deletions cpp/src/datetime/datetime_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,35 @@ struct ceil_timestamp {
}
};

template <datetime_component COMPONENT>
struct floor_timestamp {
template <typename Timestamp>
CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const
{
using namespace cuda::std::chrono;
// want to use this with D, H, T (minute), S, L (millisecond), U
switch (COMPONENT) {
case datetime_component::DAY:
return time_point_cast<typename Timestamp::duration>(floor<duration_D>(ts));
case datetime_component::HOUR:
return time_point_cast<typename Timestamp::duration>(floor<duration_h>(ts));
case datetime_component::MINUTE:
return time_point_cast<typename Timestamp::duration>(floor<duration_m>(ts));
case datetime_component::SECOND:
return time_point_cast<typename Timestamp::duration>(floor<duration_s>(ts));
case datetime_component::MILLISECOND:
return time_point_cast<typename Timestamp::duration>(floor<duration_ms>(ts));
case datetime_component::MICROSECOND:
return time_point_cast<typename Timestamp::duration>(floor<duration_us>(ts));
case datetime_component::NANOSECOND:
return time_point_cast<typename Timestamp::duration>(floor<duration_ns>(ts));
default: cudf_assert(false && "Unexpected resolution");
}

return {};
}
};

// Number of days until month indexed by leap year and month (0-based index)
static __device__ int16_t const days_until_month[2][13] = {
{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, // For non leap years
Expand Down Expand Up @@ -196,7 +225,7 @@ struct is_leap_year_op {

// Specific function for applying ceil/floor date ops
template <typename TransformFunctor>
struct dispatch_ceil {
struct dispatch_ceil_or_floor {
template <typename Timestamp>
std::enable_if_t<cudf::is_timestamp<Timestamp>(), std::unique_ptr<cudf::column>> operator()(
cudf::column_view const& column,
Expand Down Expand Up @@ -403,7 +432,19 @@ std::unique_ptr<column> ceil_general(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
return cudf::type_dispatcher(
column.type(), dispatch_ceil<detail::ceil_timestamp<Component>>{}, column, stream, mr);
column.type(), dispatch_ceil_or_floor<detail::ceil_timestamp<Component>>{}, column, stream, mr);
}

template <datetime_component Component>
std::unique_ptr<column> floor_general(column_view const& column,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return cudf::type_dispatcher(column.type(),
dispatch_ceil_or_floor<detail::floor_timestamp<Component>>{},
column,
stream,
mr);
}

std::unique_ptr<column> extract_year(column_view const& column,
Expand Down Expand Up @@ -560,6 +601,58 @@ std::unique_ptr<column> ceil_nanosecond(column_view const& column,
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> floor_day(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::DAY>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::HOUR>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::MINUTE>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> floor_second(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::SECOND>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> floor_millisecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::MILLISECOND>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> floor_microsecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::MICROSECOND>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> floor_nanosecond(column_view const& column,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::floor_general<detail::datetime_component::NANOSECOND>(
column, rmm::cuda_stream_default, mr);
}

std::unique_ptr<column> extract_year(column_view const& column, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
Expand Down
56 changes: 56 additions & 0 deletions cpp/tests/datetime/datetime_ops_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,4 +827,60 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter);
}

TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
{
using T = TypeParam;
using namespace cudf::test;
using namespace cudf::datetime;
using namespace cuda::std::chrono;

auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT
auto stop_ = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT

auto input = generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop_));

auto host_val = to_host<T>(input);
thrust::host_vector<T> timestamps = host_val.first;

thrust::host_vector<T> floored_day(timestamps.size());
thrust::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<days>(i));
});
auto expected_day = fixed_width_column_wrapper<T, typename T::duration::rep>(floored_day.begin(),
floored_day.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day);

thrust::host_vector<T> floored_hour(timestamps.size());
thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<hours>(i));
});
auto expected_hour = fixed_width_column_wrapper<T, typename T::duration::rep>(
floored_hour.begin(), floored_hour.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour);

std::vector<T> floored_minute(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<minutes>(i));
});
auto expected_minute = fixed_width_column_wrapper<T, typename T::duration::rep>(
floored_minute.begin(), floored_minute.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute);

std::vector<T> floored_second(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<seconds>(i));
});
auto expected_second = fixed_width_column_wrapper<T, typename T::duration::rep>(
floored_second.begin(), floored_second.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second);

std::vector<T> floored_millisecond(timestamps.size());
std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) {
return time_point_cast<typename T::duration>(floor<milliseconds>(i));
});
auto expected_millisecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
floored_millisecond.begin(), floored_millisecond.end());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond);
}

CUDF_TEST_PROGRAM_MAIN()
2 changes: 2 additions & 0 deletions docs/cudf/source/api_docs/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ Datetime methods

strftime
isocalendar
ceil
floor


Timedelta properties
Expand Down
17 changes: 16 additions & 1 deletion python/cudf/cudf/_lib/cpp/datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,22 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
cdef unique_ptr[column] ceil_microsecond(
const column_view& column
) except +
cdef unique_ptr[column] ceil_nanosecond(const column_view& column) except +
cdef unique_ptr[column] ceil_nanosecond(
const column_view& column
) except +
cdef unique_ptr[column] floor_day(const column_view& column) except +
cdef unique_ptr[column] floor_hour(const column_view& column) except +
cdef unique_ptr[column] floor_minute(const column_view& column) except +
cdef unique_ptr[column] floor_second(const column_view& column) except +
cdef unique_ptr[column] floor_millisecond(
const column_view& column
) except +
cdef unique_ptr[column] floor_microsecond(
const column_view& column
) except +
cdef unique_ptr[column] floor_nanosecond(
const column_view& column
) except +
cdef unique_ptr[column] add_calendrical_months(
const column_view& timestamps,
const column_view& months
Expand Down
27 changes: 27 additions & 0 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,33 @@ def ceil_datetime(Column col, object field):
return result


def floor_datetime(Column col, object field):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
# https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.Timedelta.resolution.html
if field == "D":
c_result = move(libcudf_datetime.floor_day(col_view))
elif field == "H":
c_result = move(libcudf_datetime.floor_hour(col_view))
elif field == "T":
c_result = move(libcudf_datetime.floor_minute(col_view))
elif field == "S":
c_result = move(libcudf_datetime.floor_second(col_view))
elif field == "L":
c_result = move(libcudf_datetime.floor_millisecond(col_view))
elif field == "U":
c_result = move(libcudf_datetime.floor_microsecond(col_view))
elif field == "N":
c_result = move(libcudf_datetime.floor_nanosecond(col_view))
else:
raise ValueError(f"Invalid resolution: '{field}'")

result = Column.from_unique_ptr(move(c_result))
return result


def is_leap_year(Column col):
"""Returns a boolean indicator whether the year of the date is a leap year
"""
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ def get_dt_field(self, field: str) -> ColumnBase:
def ceil(self, field: str) -> ColumnBase:
return libcudf.datetime.ceil_datetime(self, field)

def floor(self, field: str) -> ColumnBase:
return libcudf.datetime.floor_datetime(self, field)

def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
if isinstance(other, cudf.Scalar):
return other
Expand Down
Loading

0 comments on commit 201f750

Please sign in to comment.