diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 4c8ac4165fe..0f38138fff9 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -308,6 +308,7 @@ set(STRINGS_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/string/case_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/contains_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/copy_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/find_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/replace_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/url_decode_benchmark.cpp") diff --git a/cpp/benchmarks/string/copy_benchmark.cpp b/cpp/benchmarks/string/copy_benchmark.cpp new file mode 100644 index 00000000000..d5c24527fe3 --- /dev/null +++ b/cpp/benchmarks/string/copy_benchmark.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +class StringCopy : public cudf::benchmark { +}; + +static void BM_copy(benchmark::State& state) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const source = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}); + auto const target = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}); + + // scatter indices + std::vector host_map_data(n_rows); + std::iota(host_map_data.begin(), host_map_data.end(), 0); + std::random_shuffle(host_map_data.begin(), host_map_data.end()); + cudf::test::fixed_width_column_wrapper scatter_map(host_map_data.begin(), + host_map_data.end()); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + cudf::scatter(source->view(), scatter_map, target->view()); + } + + state.SetBytesProcessed(state.iterations() * + cudf::strings_column_view(source->view().column(0)).chars_size()); +} + +#define SORT_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringCopy, name) \ + (::benchmark::State & st) { BM_copy(st); } \ + BENCHMARK_REGISTER_F(StringCopy, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 12, 1 << 24}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +SORT_BENCHMARK_DEFINE(scatter) diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 0918f071443..7d5c3f4d2ee 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -114,10 +114,9 @@ struct column_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - using strings::detail::create_string_vector_from_column; - auto const source_vector = create_string_vector_from_column(source, stream); - auto const begin = source_vector.begin(); - auto const end = begin + std::distance(scatter_map_begin, scatter_map_end); + auto d_column = column_device_view::create(source, stream); + auto const begin = d_column->begin(); + auto const end = begin + cudf::distance(scatter_map_begin, scatter_map_end); return strings::detail::scatter(begin, end, scatter_map_begin, target, stream, mr); } }; diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index 9e18798eb46..5dd3db1117c 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -431,11 +431,11 @@ struct list_child_constructor { auto const num_child_rows{ cudf::detail::get_value(list_offsets, list_offsets.size() - 1, stream)}; - auto string_views = rmm::device_vector(num_child_rows); + auto string_views = rmm::device_uvector(num_child_rows, stream); auto populate_string_views = [d_scattered_lists = list_vector.begin(), // unbound_list_view* d_list_offsets = list_offsets.template data(), - d_string_views = string_views.data().get(), + d_string_views = string_views.data(), source_lists, target_lists] __device__(auto const& row_index) { auto unbound_list_view = d_scattered_lists[row_index]; @@ -483,7 +483,7 @@ struct list_child_constructor { string_views.begin(), string_views.size(), stream, mr); auto string_chars = cudf::strings::detail::child_chars_from_string_vector( - string_views, string_offsets->view().template data(), 0, stream, mr); + string_views, string_offsets->view(), stream, mr); auto child_null_mask = source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() ? construct_child_nullmask( diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh index 7e2513a7633..342afae7336 100644 --- a/cpp/include/cudf/strings/detail/scatter.cuh +++ b/cpp/include/cudf/strings/detail/scatter.cuh @@ -61,29 +61,25 @@ std::unique_ptr scatter( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto strings_count = target.size(); - if (strings_count == 0) return make_empty_strings_column(stream, mr); + if (target.is_empty()) return make_empty_strings_column(stream, mr); - // create null mask -- caller must update this - rmm::device_buffer null_mask{0, stream, mr}; - if (target.has_nulls()) null_mask = cudf::detail::copy_bitmask(target.parent(), stream, mr); + // create vector of string_view's to scatter into + rmm::device_uvector target_vector = create_string_vector_from_column(target, stream); - // create string vectors - rmm::device_vector target_vector = create_string_vector_from_column(target, stream); // do the scatter thrust::scatter(rmm::exec_policy(stream), begin, end, scatter_map, target_vector.begin()); // build offsets column auto offsets_column = child_offsets_from_string_vector(target_vector, stream, mr); // build chars column - auto chars_column = child_chars_from_string_vector( - target_vector, offsets_column->view().data(), 0, stream, mr); + auto chars_column = + child_chars_from_string_vector(target_vector, offsets_column->view(), stream, mr); - return make_strings_column(strings_count, + return make_strings_column(target.size(), std::move(offsets_column), std::move(chars_column), UNKNOWN_NULL_COUNT, - std::move(null_mask), + cudf::detail::copy_bitmask(target.parent(), stream, mr), stream, mr); } diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index 3084c55bce0..8844d2fb4b2 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,10 @@ #include #include +#include #include -#include +#include namespace cudf { namespace strings { @@ -60,36 +61,34 @@ std::unique_ptr make_empty_strings_column( * @param stream CUDA stream used for device memory operations and kernel launches. * @return Device vector of string_views */ -rmm::device_vector create_string_vector_from_column( - cudf::strings_column_view strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default); +rmm::device_uvector create_string_vector_from_column( + cudf::strings_column_view const strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** * @brief Creates an offsets column from a string_view vector. * - * @param strings Strings column + * @param strings Strings input data * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return Child offsets column */ std::unique_ptr child_offsets_from_string_vector( - const rmm::device_vector& strings, + cudf::detail::device_span strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Creates a chars column from a string_view vector. * - * @param strings Strings vector + * @param strings Strings input data * @param d_offsets Offsets vector for placing strings into column's memory. - * @param null_count Number of null strings. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return Child chars column */ std::unique_ptr child_chars_from_string_vector( - const rmm::device_vector& strings, - const int32_t* d_offsets, - cudf::size_type null_count, + cudf::detail::device_span strings, + column_view const& offsets, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index d68cdc9f557..5b9a1374224 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,11 +21,12 @@ #include #include +#include #include #include #include -#include +#include #include #include @@ -52,19 +53,20 @@ std::unique_ptr> string_from_host new string_view(reinterpret_cast(d_str->data()), length), deleter}; } -// build a vector of string_view objects from a strings column -rmm::device_vector create_string_vector_from_column(cudf::strings_column_view strings, - rmm::cuda_stream_view stream) +/** + * @copydoc create_string_vector_from_column + */ +rmm::device_uvector create_string_vector_from_column(cudf::strings_column_view strings, + rmm::cuda_stream_view stream) { auto strings_column = column_device_view::create(strings.parent(), stream); auto d_column = *strings_column; - auto count = strings.size(); - rmm::device_vector strings_vector(count); - string_view* d_strings = strings_vector.data().get(); + rmm::device_uvector strings_vector(strings.size(), stream); + string_view* d_strings = strings_vector.data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - count, + strings.size(), [d_column, d_strings] __device__(size_type idx) { if (d_column.is_null(idx)) d_strings[idx] = string_view(nullptr, 0); @@ -74,35 +76,38 @@ rmm::device_vector create_string_vector_from_column(cudf::strings_c return strings_vector; } -// build a strings offsets column from a vector of string_views +/** + * @copydoc child_offsets_from_string_vector + */ std::unique_ptr child_offsets_from_string_vector( - const rmm::device_vector& strings, + cudf::detail::device_span strings, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { return child_offsets_from_string_iterator(strings.begin(), strings.size(), stream, mr); } -// build a strings chars column from an vector of string_views +/** + * @copydoc child_chars_from_string_vector + */ std::unique_ptr child_chars_from_string_vector( - const rmm::device_vector& strings, - const int32_t* d_offsets, - cudf::size_type null_count, + cudf::detail::device_span strings, + column_view const& offsets, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - size_type count = strings.size(); - auto d_strings = strings.data().get(); - size_type bytes = thrust::device_pointer_cast(d_offsets)[count]; + auto const d_strings = strings.data(); + auto const bytes = cudf::detail::get_value(offsets, strings.size(), stream); + auto const d_offsets = offsets.data(); // create column auto chars_column = make_numeric_column(data_type{type_id::INT8}, bytes, mask_state::UNALLOCATED, stream, mr); // get it's view - auto d_chars = chars_column->mutable_view().data(); + auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - count, + strings.size(), [d_strings, d_offsets, d_chars] __device__(size_type idx) { string_view const d_str = d_strings[idx]; memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());