diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index d67984daa7c..71e5968bf07 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -378,5 +378,96 @@ std::unique_ptr ceil_nanosecond( column_view const& column, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Round down to the nearest day + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_day( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest hour + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_hour( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest minute + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_minute( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest second + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_second( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest millisecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_millisecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest microsecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_microsecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Round down to the nearest nanosecond + * + * @param column cudf::column_view of the input datetime values + * @param mr Device memory resource used to allocate device memory of the returned column. + * + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + * @return cudf::column of the same datetime resolution as the input column + */ +std::unique_ptr floor_nanosecond( + column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace datetime } // namespace cudf diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 6e892b3e461..7cbacbd0a14 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -116,6 +116,35 @@ struct ceil_timestamp { } }; +template +struct floor_timestamp { + template + CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const + { + using namespace cuda::std::chrono; + // want to use this with D, H, T (minute), S, L (millisecond), U + switch (COMPONENT) { + case datetime_component::DAY: + return time_point_cast(floor(ts)); + case datetime_component::HOUR: + return time_point_cast(floor(ts)); + case datetime_component::MINUTE: + return time_point_cast(floor(ts)); + case datetime_component::SECOND: + return time_point_cast(floor(ts)); + case datetime_component::MILLISECOND: + return time_point_cast(floor(ts)); + case datetime_component::MICROSECOND: + return time_point_cast(floor(ts)); + case datetime_component::NANOSECOND: + return time_point_cast(floor(ts)); + default: cudf_assert(false && "Unexpected resolution"); + } + + return {}; + } +}; + // Number of days until month indexed by leap year and month (0-based index) static __device__ int16_t const days_until_month[2][13] = { {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}, // For non leap years @@ -196,7 +225,7 @@ struct is_leap_year_op { // Specific function for applying ceil/floor date ops template -struct dispatch_ceil { +struct dispatch_ceil_or_floor { template std::enable_if_t(), std::unique_ptr> operator()( cudf::column_view const& column, @@ -403,7 +432,19 @@ std::unique_ptr ceil_general(column_view const& column, rmm::mr::device_memory_resource* mr) { return cudf::type_dispatcher( - column.type(), dispatch_ceil>{}, column, stream, mr); + column.type(), dispatch_ceil_or_floor>{}, column, stream, mr); +} + +template +std::unique_ptr floor_general(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return cudf::type_dispatcher(column.type(), + dispatch_ceil_or_floor>{}, + column, + stream, + mr); } std::unique_ptr extract_year(column_view const& column, @@ -560,6 +601,58 @@ std::unique_ptr ceil_nanosecond(column_view const& column, column, rmm::cuda_stream_default, mr); } +std::unique_ptr floor_day(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_second(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_millisecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_microsecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + +std::unique_ptr floor_nanosecond(column_view const& column, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::floor_general( + column, rmm::cuda_stream_default, mr); +} + std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 1d3e87279e5..d2703b8acd0 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -827,4 +827,60 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter); } +TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime) +{ + using T = TypeParam; + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + auto start = milliseconds(-2500000000000); // Sat, 11 Oct 1890 19:33:20 GMT + auto stop_ = milliseconds(2500000000000); // Mon, 22 Mar 2049 04:26:40 GMT + + auto input = generate_timestamps(this->size(), time_point_ms(start), time_point_ms(stop_)); + + auto host_val = to_host(input); + thrust::host_vector timestamps = host_val.first; + + thrust::host_vector floored_day(timestamps.size()); + thrust::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_day = fixed_width_column_wrapper(floored_day.begin(), + floored_day.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day); + + thrust::host_vector floored_hour(timestamps.size()); + thrust::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_hour = fixed_width_column_wrapper( + floored_hour.begin(), floored_hour.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour); + + std::vector floored_minute(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_minute = fixed_width_column_wrapper( + floored_minute.begin(), floored_minute.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute); + + std::vector floored_second(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_second = fixed_width_column_wrapper( + floored_second.begin(), floored_second.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second); + + std::vector floored_millisecond(timestamps.size()); + std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) { + return time_point_cast(floor(i)); + }); + auto expected_millisecond = fixed_width_column_wrapper( + floored_millisecond.begin(), floored_millisecond.end()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond); +} + CUDF_TEST_PROGRAM_MAIN() diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 95cf58adf0e..c32a4a22ef2 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -297,6 +297,8 @@ Datetime methods strftime isocalendar + ceil + floor Timedelta properties diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index c27eb324008..71064a555c9 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -22,7 +22,22 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] ceil_microsecond( const column_view& column ) except + - cdef unique_ptr[column] ceil_nanosecond(const column_view& column) except + + cdef unique_ptr[column] ceil_nanosecond( + const column_view& column + ) except + + cdef unique_ptr[column] floor_day(const column_view& column) except + + cdef unique_ptr[column] floor_hour(const column_view& column) except + + cdef unique_ptr[column] floor_minute(const column_view& column) except + + cdef unique_ptr[column] floor_second(const column_view& column) except + + cdef unique_ptr[column] floor_millisecond( + const column_view& column + ) except + + cdef unique_ptr[column] floor_microsecond( + const column_view& column + ) except + + cdef unique_ptr[column] floor_nanosecond( + const column_view& column + ) except + cdef unique_ptr[column] add_calendrical_months( const column_view& timestamps, const column_view& months diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 4921d1b4ace..580e55a4308 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -86,6 +86,33 @@ def ceil_datetime(Column col, object field): return result +def floor_datetime(Column col, object field): + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + + with nogil: + # https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.Timedelta.resolution.html + if field == "D": + c_result = move(libcudf_datetime.floor_day(col_view)) + elif field == "H": + c_result = move(libcudf_datetime.floor_hour(col_view)) + elif field == "T": + c_result = move(libcudf_datetime.floor_minute(col_view)) + elif field == "S": + c_result = move(libcudf_datetime.floor_second(col_view)) + elif field == "L": + c_result = move(libcudf_datetime.floor_millisecond(col_view)) + elif field == "U": + c_result = move(libcudf_datetime.floor_microsecond(col_view)) + elif field == "N": + c_result = move(libcudf_datetime.floor_nanosecond(col_view)) + else: + raise ValueError(f"Invalid resolution: '{field}'") + + result = Column.from_unique_ptr(move(c_result)) + return result + + def is_leap_year(Column col): """Returns a boolean indicator whether the year of the date is a leap year """ diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 68379002e6b..7492c127a67 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -225,6 +225,9 @@ def get_dt_field(self, field: str) -> ColumnBase: def ceil(self, field: str) -> ColumnBase: return libcudf.datetime.ceil_datetime(self, field) + def floor(self, field: str) -> ColumnBase: + return libcudf.datetime.floor_datetime(self, field) + def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike: if isinstance(other, cudf.Scalar): return other diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index d918a005d40..77c84f63af8 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5008,12 +5008,73 @@ def _get_dt_field(self, field): ) def ceil(self, field): + """ + Perform ceil operation on the data to the specified freq. + + Parameters + ---------- + field : str + One of ["D", "H", "T", "S", "L", "U", "N"] + See `frequency aliases `_ + for more details on these aliases. + + Returns + ------- + Series + Series with the same index for a Series. + + Examples + -------- + >>> import cudf + >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> t.dt.ceil("T") + 0 2001-01-01 00:05:00 + 1 2001-01-01 00:05:00 + 2 2001-01-01 00:06:00 + dtype: datetime64[ns] + """ out_column = self.series._column.ceil(field) return Series( data=out_column, index=self.series._index, name=self.series.name ) + def floor(self, field): + """ + Perform floor operation on the data to the specified freq. + + Parameters + ---------- + field : str + One of ["D", "H", "T", "S", "L", "U", "N"] + See `frequency aliases `_ + for more details on these aliases. + + Returns + ------- + Series + Series with the same index for a Series. + + Examples + -------- + >>> import cudf + >>> t = cudf.Series(["2001-01-01 00:04:45", "2001-01-01 00:04:58", + ... "2001-01-01 00:05:04"], dtype="datetime64[ns]") + >>> t.dt.floor("T") + 0 2001-01-01 00:04:00 + 1 2001-01-01 00:04:00 + 2 2001-01-01 00:05:00 + dtype: datetime64[ns] + """ + out_column = self.series._column.floor(field) + + return Series( + data=out_column, index=self.series._index, name=self.series.name + ) + def strftime(self, date_format, *args, **kwargs): """ Convert to Series using specified ``date_format``. diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 3bbac217283..85f1055da89 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -1625,9 +1625,38 @@ def test_error_values(): @pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) def test_ceil(data, time_type, resolution): - ps = pd.Series(data, dtype=time_type) - gs = cudf.from_pandas(ps) + gs = cudf.Series(data, dtype=time_type) + ps = gs.to_pandas() expect = ps.dt.ceil(resolution) got = gs.dt.ceil(resolution) assert_eq(expect, got) + + +@pytest.mark.parametrize( + "data", + [ + ( + [ + "2020-05-31 08:00:00", + "1999-12-31 18:40:10", + "2000-12-31 04:00:05", + "1900-02-28 07:00:06", + "1800-03-14 07:30:20", + "2100-03-14 07:30:20", + "1970-01-01 00:00:09", + "1969-12-31 12:59:10", + ] + ) + ], +) +@pytest.mark.parametrize("time_type", DATETIME_TYPES) +@pytest.mark.parametrize("resolution", ["D", "H", "T", "S", "L", "U", "N"]) +def test_floor(data, time_type, resolution): + + gs = cudf.Series(data, dtype=time_type) + ps = gs.to_pandas() + + expect = ps.dt.floor(resolution) + got = gs.dt.floor(resolution) + assert_eq(expect, got)