Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gbenchmarks for strings filter functions #7438

Merged
merged 5 commits into from
Feb 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ set(STRINGS_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/string/contains_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/copy_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/filter_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/find_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/replace_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/url_decode_benchmark.cpp")
Expand Down
93 changes: 93 additions & 0 deletions cpp/benchmarks/string/filter_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/char_types/char_types.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/strings/strip.hpp>
#include <cudf/strings/translate.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <limits>
#include <vector>

enum FilterAPI { filter, filter_chars, strip };

class StringFilterChars : public cudf::benchmark {
};

static void BM_filter_chars(benchmark::State& state, FilterAPI api)
{
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));

auto const types = cudf::strings::string_character_types::SPACE;
std::vector<std::pair<cudf::char_utf8, cudf::char_utf8>> filter_table{
{cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}};

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
switch (api) {
case filter: cudf::strings::filter_characters_of_type(input, types); break;
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
case filter_chars: cudf::strings::filter_characters(input, filter_table); break;
case strip: cudf::strings::strip(input); break;
}
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) {
for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) {
// avoid generating combinations that exceed the cudf column limit
size_t total_chars = static_cast<size_t>(row_count) * rowlen;
if (total_chars < std::numeric_limits<cudf::size_type>::max()) {
b->Args({row_count, rowlen});
}
}
}
}

#define STRINGS_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringFilterChars, name) \
(::benchmark::State & st) { BM_filter_chars(st, FilterAPI::name); } \
BENCHMARK_REGISTER_F(StringFilterChars, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

STRINGS_BENCHMARK_DEFINE(filter)
STRINGS_BENCHMARK_DEFINE(filter_chars)
STRINGS_BENCHMARK_DEFINE(strip)
80 changes: 37 additions & 43 deletions cpp/src/strings/filter_chars.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ namespace {
struct filter_fn {
column_device_view const d_strings;
filter_type keep_characters;
rmm::device_vector<char_range>::iterator table_begin;
rmm::device_vector<char_range>::iterator table_end;
rmm::device_uvector<char_range>::iterator table_begin;
rmm::device_uvector<char_range>::iterator table_end;
string_view const d_replacement;
int32_t const* d_offsets{};
int32_t* d_offsets{};
char* d_chars{};

/**
Expand All @@ -78,23 +78,28 @@ struct filter_fn {
* This is also used to calculate the size of the output.
*
* @param idx Index of the current string to process.
* @return The size of the output for this string.
*/
__device__ size_type operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) return 0;
string_view d_str = d_strings.element<string_view>(idx);
size_type nbytes = d_str.size_bytes();
auto const in_ptr = d_str.data();
auto out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr;
if (d_strings.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_strings.element<string_view>(idx);

auto nbytes = d_str.size_bytes();
auto out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr;
for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) {
auto const char_size = bytes_in_char_utf8(*itr);
string_view const d_newchar =
remove_char(*itr) ? d_replacement : string_view(in_ptr + itr.byte_offset(), char_size);
nbytes += d_newchar.size_bytes() - char_size;
if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar);
auto const char_size = bytes_in_char_utf8(*itr);
string_view const d_newchar = remove_char(*itr)
? d_replacement
: string_view(d_str.data() + itr.byte_offset(), char_size);
if (out_ptr)
out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar);
else
nbytes += d_newchar.size_bytes() - char_size;
}
return nbytes;
if (!out_ptr) d_offsets[idx] = nbytes;
}
};

Expand Down Expand Up @@ -123,36 +128,25 @@ std::unique_ptr<column> filter_characters(
characters_to_filter.begin(), characters_to_filter.end(), htable.begin(), [](auto entry) {
return char_range{entry.first, entry.second};
});
rmm::device_vector<char_range> table(htable); // copy filter table to device memory

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_strings = *strings_column;

// create null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr);

// create offsets column
filter_fn ffn{d_strings, keep_characters, table.begin(), table.end(), d_replacement};
auto offsets_transformer_itr = cudf::detail::make_counting_transform_iterator(0, ffn);
auto offsets_column = make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
ffn.d_offsets = offsets_column->view().data<int32_t>();

// build chars column
size_type bytes = cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(
strings_count, strings.null_count(), bytes, stream, mr);
ffn.d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<cudf::size_type>(0),
strings_count,
ffn);
rmm::device_uvector<char_range> table(table_size, stream);
CUDA_TRY(cudaMemcpyAsync(table.data(),
htable.data(),
table_size * sizeof(char_range),
cudaMemcpyHostToDevice,
stream.value()));

auto d_strings = column_device_view::create(strings.parent(), stream);

// this utility calls the strip_fn to build the offsets and chars columns
filter_fn ffn{*d_strings, keep_characters, table.begin(), table.end(), d_replacement};
auto children = cudf::strings::detail::make_strings_children(
ffn, strings.size(), strings.null_count(), stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(children.first),
std::move(children.second),
strings.null_count(),
std::move(null_mask),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
stream,
mr);
}
Expand Down
123 changes: 47 additions & 76 deletions cpp/src/strings/strip.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,16 +35,6 @@ namespace cudf {
namespace strings {
namespace detail {
namespace {
/**
* @brief Used as template parameter to divide size calculation from
* the actual string operation within a function.
*
* Useful when most of the logic is identical for both passes.
*/
enum TwoPass {
SizeOnly = 0, ///< calculate the size only
ExecuteOp ///< run the string operation
};

/**
* @brief Strip characters from the beginning and/or end of a string.
Expand All @@ -53,51 +43,52 @@ enum TwoPass {
* of any characters found in d_to_strip or whitespace if
* d_to_strip is empty.
*
* @tparam Pass Allows computing only the size of the output
* or writing the output to device memory.
*/
template <TwoPass Pass = SizeOnly>
struct strip_fn {
column_device_view const d_strings;
strip_type stype; // right, left, or both
string_view d_to_strip;
int32_t const* d_offsets{};
strip_type const stype; // right, left, or both
string_view const d_to_strip;
int32_t* d_offsets{};
char* d_chars{};

__device__ bool is_strip_character(char_utf8 chr)
{
return d_to_strip.empty() ? (chr <= ' ') : // whitespace check
thrust::any_of(
thrust::seq, d_to_strip.begin(), d_to_strip.end(), [chr] __device__(char_utf8 c) {
return c == chr;
});
}

__device__ size_type operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) return 0;
string_view d_str = d_strings.element<string_view>(idx);
size_type length = d_str.length();
size_type left_offset = 0;
auto itr = d_str.begin();
if (stype == strip_type::LEFT || stype == strip_type::BOTH) {
for (; itr != d_str.end();) {
if (!is_strip_character(*itr++)) break;
left_offset = itr.byte_offset();
}
if (d_strings.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_strings.element<string_view>(idx);

auto is_strip_character = [d_to_strip = d_to_strip] __device__(char_utf8 chr) -> bool {
return d_to_strip.empty() ? (chr <= ' ') : // whitespace check
thrust::any_of(
thrust::seq, d_to_strip.begin(), d_to_strip.end(), [chr] __device__(char_utf8 c) {
return c == chr;
});
};

size_type const left_offset = [&] {
if (stype != strip_type::LEFT && stype != strip_type::BOTH) return 0;
auto const itr =
thrust::find_if_not(thrust::seq, d_str.begin(), d_str.end(), is_strip_character);
return itr != d_str.end() ? itr.byte_offset() : d_str.size_bytes();
}();

size_type right_offset = d_str.size_bytes();
if (stype == strip_type::RIGHT || stype == strip_type::BOTH) {
itr = d_str.end();
auto const length = d_str.length();
auto itr = d_str.end();
for (size_type n = 0; n < length; ++n) {
if (!is_strip_character(*(--itr))) break;
right_offset = itr.byte_offset();
}
}
size_type bytes = 0;
if (right_offset > left_offset) bytes = right_offset - left_offset;
if (Pass == ExecuteOp) memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes);
return bytes;

auto const bytes = (right_offset > left_offset) ? right_offset - left_offset : 0;
if (d_chars)
memcpy(d_chars + d_offsets[idx], d_str.data() + left_offset, bytes);
else
d_offsets[idx] = bytes;
}
};

Expand All @@ -110,42 +101,22 @@ std::unique_ptr<column> strip(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_count = strings.size();
if (strings_count == 0) return detail::make_empty_strings_column(stream, mr);
if (strings.is_empty()) return detail::make_empty_strings_column(stream, mr);

CUDF_EXPECTS(to_strip.is_valid(), "Parameter to_strip must be valid");
string_view d_to_strip(to_strip.data(), to_strip.size());

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
size_type null_count = strings.null_count();

// copy null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr);

// build offsets column -- calculate the size of each output string
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), strip_fn<SizeOnly>{d_column, stype, d_to_strip});
auto offsets_column = make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto offsets_view = offsets_column->view();
auto d_offsets = offsets_view.data<int32_t>();

// build the chars column -- convert characters based on case_flag parameter
size_type bytes = thrust::device_pointer_cast(d_offsets)[strings_count];
auto chars_column = create_chars_child_column(strings_count, null_count, bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
strip_fn<ExecuteOp>{d_column, stype, d_to_strip, d_offsets, d_chars});

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
null_count,
std::move(null_mask),
string_view const d_to_strip(to_strip.data(), to_strip.size());

auto const d_column = column_device_view::create(strings.parent(), stream);

// this utility calls the strip_fn to build the offsets and chars columns
auto children = cudf::strings::detail::make_strings_children(
strip_fn{*d_column, stype, d_to_strip}, strings.size(), strings.null_count(), stream, mr);

return make_strings_column(strings.size(),
std::move(children.first),
std::move(children.second),
strings.null_count(),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
stream,
mr);
}
Expand Down