From fe27198966c7fa90dd14d0cd638c8b54d908f98b Mon Sep 17 00:00:00 2001 From: davidwendt Date: Wed, 24 Feb 2021 13:06:01 -0500 Subject: [PATCH 1/5] Add gbenchmarks for strings filter functions --- cpp/benchmarks/CMakeLists.txt | 1 + cpp/benchmarks/string/filter_benchmark.cpp | 93 ++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 cpp/benchmarks/string/filter_benchmark.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 0f38138fff9..c9e7cd318f4 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -309,6 +309,7 @@ set(STRINGS_BENCH_SRC "${CMAKE_CURRENT_SOURCE_DIR}/string/contains_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/copy_benchmark.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/string/filter_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/find_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/replace_benchmark.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/string/url_decode_benchmark.cpp") diff --git a/cpp/benchmarks/string/filter_benchmark.cpp b/cpp/benchmarks/string/filter_benchmark.cpp new file mode 100644 index 00000000000..dfec0564ed2 --- /dev/null +++ b/cpp/benchmarks/string/filter_benchmark.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +enum FilterAPI { filter, filter_chars, strip }; + +class StringFilterChars : public cudf::benchmark { +}; + +static void BM_filter_chars(benchmark::State& state, FilterAPI api) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + cudf::strings_column_view input(table->view().column(0)); + + auto types = cudf::strings::string_character_types::SPACE; + std::vector> filter_table{ + {cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}}; + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + switch (api) { + case filter: cudf::strings::filter_characters_of_type(input, types); break; + case filter_chars: cudf::strings::filter_characters(input, filter_table); break; + case strip: cudf::strings::strip(input); break; + } + } + + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { + for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { + // avoid generating combinations that exceed the cudf column limit + size_t total_chars = static_cast(row_count) * rowlen; + if (total_chars < std::numeric_limits::max()) { + b->Args({row_count, rowlen}); + } + } + } +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringFilterChars, name) \ + (::benchmark::State & st) { BM_filter_chars(st, FilterAPI::name); } \ + BENCHMARK_REGISTER_F(StringFilterChars, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(filter) +STRINGS_BENCHMARK_DEFINE(filter_chars) +STRINGS_BENCHMARK_DEFINE(strip) From 645df83954ec722dc703a21eaedbe7e22b48df5c Mon Sep 17 00:00:00 2001 From: davidwendt Date: Wed, 24 Feb 2021 13:06:57 -0500 Subject: [PATCH 2/5] refactored and simplified calls to strip and filter_chars functors --- cpp/src/strings/filter_chars.cu | 67 +++++++++----------- cpp/src/strings/strip.cu | 104 ++++++++++++-------------------- 2 files changed, 67 insertions(+), 104 deletions(-) diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu index 5a0b409fcf2..3cc2a017254 100644 --- a/cpp/src/strings/filter_chars.cu +++ b/cpp/src/strings/filter_chars.cu @@ -52,7 +52,7 @@ struct filter_fn { rmm::device_vector::iterator table_begin; rmm::device_vector::iterator table_end; string_view const d_replacement; - int32_t const* d_offsets{}; + int32_t* d_offsets{}; char* d_chars{}; /** @@ -78,23 +78,28 @@ struct filter_fn { * This is also used to calculate the size of the output. * * @param idx Index of the current string to process. - * @return The size of the output for this string. */ - __device__ size_type operator()(size_type idx) + __device__ void operator()(size_type idx) { - if (d_strings.is_null(idx)) return 0; - string_view d_str = d_strings.element(idx); - size_type nbytes = d_str.size_bytes(); - auto const in_ptr = d_str.data(); - auto out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; + if (d_strings.is_null(idx)) { + if (!d_chars) d_offsets[idx] = 0; + return; + } + auto const d_str = d_strings.element(idx); + + auto nbytes = d_str.size_bytes(); + auto out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) { - auto const char_size = bytes_in_char_utf8(*itr); - string_view const d_newchar = - remove_char(*itr) ? d_replacement : string_view(in_ptr + itr.byte_offset(), char_size); - nbytes += d_newchar.size_bytes() - char_size; - if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar); + auto const char_size = bytes_in_char_utf8(*itr); + string_view const d_newchar = remove_char(*itr) + ? d_replacement + : string_view(d_str.data() + itr.byte_offset(), char_size); + if (out_ptr) + out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar); + else + nbytes += d_newchar.size_bytes() - char_size; } - return nbytes; + if (!out_ptr) d_offsets[idx] = nbytes; } }; @@ -125,34 +130,18 @@ std::unique_ptr filter_characters( }); rmm::device_vector table(htable); // copy filter table to device memory - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_strings = *strings_column; - - // create null mask - rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr); - - // create offsets column - filter_fn ffn{d_strings, keep_characters, table.begin(), table.end(), d_replacement}; - auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator(0, ffn); - auto offsets_column = make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - ffn.d_offsets = offsets_column->view().data(); - - // build chars column - size_type bytes = cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = strings::detail::create_chars_child_column( - strings_count, strings.null_count(), bytes, stream, mr); - ffn.d_chars = chars_column->mutable_view().data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - ffn); + auto d_strings = column_device_view::create(strings.parent(), stream); + + // this utility calls the strip_fn to build the offsets and chars columns + filter_fn ffn{*d_strings, keep_characters, table.begin(), table.end(), d_replacement}; + auto children = cudf::strings::detail::make_strings_children( + ffn, strings.size(), strings.null_count(), stream, mr); return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), + std::move(children.first), + std::move(children.second), strings.null_count(), - std::move(null_mask), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), stream, mr); } diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu index f9e8463afd7..88addec5881 100644 --- a/cpp/src/strings/strip.cu +++ b/cpp/src/strings/strip.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,16 +35,6 @@ namespace cudf { namespace strings { namespace detail { namespace { -/** - * @brief Used as template parameter to divide size calculation from - * the actual string operation within a function. - * - * Useful when most of the logic is identical for both passes. - */ -enum TwoPass { - SizeOnly = 0, ///< calculate the size only - ExecuteOp ///< run the string operation -}; /** * @brief Strip characters from the beginning and/or end of a string. @@ -53,15 +43,12 @@ enum TwoPass { * of any characters found in d_to_strip or whitespace if * d_to_strip is empty. * - * @tparam Pass Allows computing only the size of the output - * or writing the output to device memory. */ -template struct strip_fn { column_device_view const d_strings; - strip_type stype; // right, left, or both - string_view d_to_strip; - int32_t const* d_offsets{}; + strip_type const stype; // right, left, or both + string_view const d_to_strip; + int32_t* d_offsets{}; char* d_chars{}; __device__ bool is_strip_character(char_utf8 chr) @@ -73,31 +60,38 @@ struct strip_fn { }); } - __device__ size_type operator()(size_type idx) + __device__ void operator()(size_type idx) { - if (d_strings.is_null(idx)) return 0; - string_view d_str = d_strings.element(idx); - size_type length = d_str.length(); + if (d_strings.is_null(idx)) { + if (!d_chars) d_offsets[idx] = 0; + return; + } + auto const d_str = d_strings.element(idx); + size_type left_offset = 0; - auto itr = d_str.begin(); + // auto itr = d_str.begin(); if (stype == strip_type::LEFT || stype == strip_type::BOTH) { - for (; itr != d_str.end();) { - if (!is_strip_character(*itr++)) break; - left_offset = itr.byte_offset(); - } + auto const itr = thrust::find_if(thrust::seq, d_str.begin(), d_str.end(), [this](auto chr) { + return !this->is_strip_character(chr); + }); + + left_offset = itr != d_str.end() ? itr.byte_offset() : d_str.size_bytes(); } size_type right_offset = d_str.size_bytes(); if (stype == strip_type::RIGHT || stype == strip_type::BOTH) { - itr = d_str.end(); + auto const length = d_str.length(); + + auto itr = d_str.end(); for (size_type n = 0; n < length; ++n) { if (!is_strip_character(*(--itr))) break; right_offset = itr.byte_offset(); } } - size_type bytes = 0; - if (right_offset > left_offset) bytes = right_offset - left_offset; - if (Pass == ExecuteOp) memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes); - return bytes; + auto const bytes = (right_offset > left_offset) ? right_offset - left_offset : 0; + if (d_chars) + memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes); + else + d_offsets[idx] = bytes; } }; @@ -110,42 +104,22 @@ std::unique_ptr strip( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - auto strings_count = strings.size(); - if (strings_count == 0) return detail::make_empty_strings_column(stream, mr); + if (strings.is_empty()) return detail::make_empty_strings_column(stream, mr); CUDF_EXPECTS(to_strip.is_valid(), "Parameter to_strip must be valid"); - string_view d_to_strip(to_strip.data(), to_strip.size()); - - auto strings_column = column_device_view::create(strings.parent(), stream); - auto d_column = *strings_column; - size_type null_count = strings.null_count(); - - // copy null mask - rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr); - - // build offsets column -- calculate the size of each output string - auto offsets_transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), strip_fn{d_column, stype, d_to_strip}); - auto offsets_column = make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto offsets_view = offsets_column->view(); - auto d_offsets = offsets_view.data(); - - // build the chars column -- convert characters based on case_flag parameter - size_type bytes = thrust::device_pointer_cast(d_offsets)[strings_count]; - auto chars_column = create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - strip_fn{d_column, stype, d_to_strip, d_offsets, d_chars}); - - return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), - null_count, - std::move(null_mask), + string_view const d_to_strip(to_strip.data(), to_strip.size()); + + auto const d_column = column_device_view::create(strings.parent(), stream); + + // this utility calls the strip_fn to build the offsets and chars columns + auto children = cudf::strings::detail::make_strings_children( + strip_fn{*d_column, stype, d_to_strip}, strings.size(), strings.null_count(), stream, mr); + + return make_strings_column(strings.size(), + std::move(children.first), + std::move(children.second), + strings.null_count(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), stream, mr); } From 8b4e122bef76ced080d8d74e78e52f72c08d5fdb Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 25 Feb 2021 10:51:59 -0500 Subject: [PATCH 3/5] change device_vector to device_uvector --- cpp/src/strings/filter_chars.cu | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu index 3cc2a017254..7ed77a830ad 100644 --- a/cpp/src/strings/filter_chars.cu +++ b/cpp/src/strings/filter_chars.cu @@ -49,8 +49,8 @@ namespace { struct filter_fn { column_device_view const d_strings; filter_type keep_characters; - rmm::device_vector::iterator table_begin; - rmm::device_vector::iterator table_end; + rmm::device_uvector::iterator table_begin; + rmm::device_uvector::iterator table_end; string_view const d_replacement; int32_t* d_offsets{}; char* d_chars{}; @@ -128,7 +128,12 @@ std::unique_ptr filter_characters( characters_to_filter.begin(), characters_to_filter.end(), htable.begin(), [](auto entry) { return char_range{entry.first, entry.second}; }); - rmm::device_vector table(htable); // copy filter table to device memory + rmm::device_uvector table(table_size, stream); + CUDA_TRY(cudaMemcpyAsync(table.data(), + htable.data(), + table_size * sizeof(char_range), + cudaMemcpyHostToDevice, + stream.value())); auto d_strings = column_device_view::create(strings.parent(), stream); From b2934073b539d0d54e47c0414cee00414b5d1f05 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 25 Feb 2021 11:35:59 -0500 Subject: [PATCH 4/5] add const to variable decl --- cpp/benchmarks/string/filter_benchmark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/string/filter_benchmark.cpp b/cpp/benchmarks/string/filter_benchmark.cpp index dfec0564ed2..123c5597df9 100644 --- a/cpp/benchmarks/string/filter_benchmark.cpp +++ b/cpp/benchmarks/string/filter_benchmark.cpp @@ -45,7 +45,7 @@ static void BM_filter_chars(benchmark::State& state, FilterAPI api) create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); cudf::strings_column_view input(table->view().column(0)); - auto types = cudf::strings::string_character_types::SPACE; + auto const types = cudf::strings::string_character_types::SPACE; std::vector> filter_table{ {cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}}; From a44098e8132eea35b56c935c50b5fcf24cf6a033 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 25 Feb 2021 11:36:25 -0500 Subject: [PATCH 5/5] remove commented out line --- cpp/src/strings/strip.cu | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu index 88addec5881..3ffa331ba49 100644 --- a/cpp/src/strings/strip.cu +++ b/cpp/src/strings/strip.cu @@ -51,15 +51,6 @@ struct strip_fn { int32_t* d_offsets{}; char* d_chars{}; - __device__ bool is_strip_character(char_utf8 chr) - { - return d_to_strip.empty() ? (chr <= ' ') : // whitespace check - thrust::any_of( - thrust::seq, d_to_strip.begin(), d_to_strip.end(), [chr] __device__(char_utf8 c) { - return c == chr; - }); - } - __device__ void operator()(size_type idx) { if (d_strings.is_null(idx)) { @@ -68,25 +59,31 @@ struct strip_fn { } auto const d_str = d_strings.element(idx); - size_type left_offset = 0; - // auto itr = d_str.begin(); - if (stype == strip_type::LEFT || stype == strip_type::BOTH) { - auto const itr = thrust::find_if(thrust::seq, d_str.begin(), d_str.end(), [this](auto chr) { - return !this->is_strip_character(chr); - }); + auto is_strip_character = [d_to_strip = d_to_strip] __device__(char_utf8 chr) -> bool { + return d_to_strip.empty() ? (chr <= ' ') : // whitespace check + thrust::any_of( + thrust::seq, d_to_strip.begin(), d_to_strip.end(), [chr] __device__(char_utf8 c) { + return c == chr; + }); + }; + + size_type const left_offset = [&] { + if (stype != strip_type::LEFT && stype != strip_type::BOTH) return 0; + auto const itr = + thrust::find_if_not(thrust::seq, d_str.begin(), d_str.end(), is_strip_character); + return itr != d_str.end() ? itr.byte_offset() : d_str.size_bytes(); + }(); - left_offset = itr != d_str.end() ? itr.byte_offset() : d_str.size_bytes(); - } size_type right_offset = d_str.size_bytes(); if (stype == strip_type::RIGHT || stype == strip_type::BOTH) { auto const length = d_str.length(); - - auto itr = d_str.end(); + auto itr = d_str.end(); for (size_type n = 0; n < length; ++n) { if (!is_strip_character(*(--itr))) break; right_offset = itr.byte_offset(); } } + auto const bytes = (right_offset > left_offset) ? right_offset - left_offset : 0; if (d_chars) memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes);