Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gbenchmark for cudf::strings::to_lower #7316

Merged
merged 15 commits into from
Feb 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ ConfigureBench(SUBWORD_TOKENIZER_BENCH "${SUBWORD_TOKENIZER_BENCH_SRC}")
# - strings benchmark -------------------------------------------------------------------

set(STRINGS_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/string/case_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp")

ConfigureBench(STRINGS_BENCH "${STRINGS_BENCH_SRC}")
51 changes: 51 additions & 0 deletions cpp/benchmarks/string/case_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/strings/case.hpp>
#include <cudf/strings/strings_column_view.hpp>

class StringCase : public cudf::benchmark {
};

static void BM_case(benchmark::State& state)
{
cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows});
cudf::strings_column_view input(table->view().column(0));

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
cudf::strings::to_lower(input);
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

#define SORT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringCase, name) \
(::benchmark::State & st) { BM_case(st); } \
BENCHMARK_REGISTER_F(StringCase, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 12, 1 << 24}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

SORT_BENCHMARK_DEFINE(to_lower)
129 changes: 47 additions & 82 deletions cpp/src/strings/case.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,7 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/case.hpp>
Expand All @@ -37,30 +38,18 @@ namespace cudf {
namespace strings {
namespace detail {
namespace {
/**
* @brief Used as template parameter to divide size calculation from
* the actual string operation within a function.
*
* Useful when most of the logic is identical for both passes.
*/
enum TwoPass {
SizeOnly = 0, ///< calculate the size only
ExecuteOp ///< run the string operation
};

/**
* @brief Per string logic for case conversion functions.
*
* @tparam Pass Determines if size calculation or output write is begin performed.
*/
template <TwoPass Pass = SizeOnly>
struct upper_lower_fn {
const column_device_view d_column;
character_flags_table_type case_flag; // flag to check with on each character
const character_flags_table_type* d_flags;
const character_cases_table_type* d_case_table;
const special_case_mapping* d_special_case_mapping;
const int32_t* d_offsets{};
int32_t* d_offsets{};
char* d_chars{};

__device__ special_case_mapping get_special_case_mapping(uint32_t code_point)
Expand All @@ -70,7 +59,7 @@ struct upper_lower_fn {

// compute-size / copy the bytes representing the special case mapping for this codepoint
__device__ int32_t handle_special_case_bytes(uint32_t code_point,
char*& d_buffer,
char* d_buffer,
detail::character_flags_table_type flag)
{
special_case_mapping m = get_special_case_mapping(code_point);
Expand All @@ -79,47 +68,45 @@ struct upper_lower_fn {
auto const count = IS_LOWER(flag) ? m.num_upper_chars : m.num_lower_chars;
auto const* chars = IS_LOWER(flag) ? m.upper : m.lower;
for (uint16_t idx = 0; idx < count; idx++) {
if (Pass == SizeOnly) {
bytes += detail::bytes_in_char_utf8(detail::codepoint_to_utf8(chars[idx]));
} else {
bytes += detail::from_char_utf8(detail::codepoint_to_utf8(chars[idx]), d_buffer + bytes);
}
bytes += d_buffer
? detail::from_char_utf8(detail::codepoint_to_utf8(chars[idx]), d_buffer + bytes)
: detail::bytes_in_char_utf8(detail::codepoint_to_utf8(chars[idx]));
}
if (d_buffer != nullptr) { d_buffer += bytes; }
return bytes;
}

__device__ int32_t operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) return 0; // null string
string_view d_str = d_column.template element<string_view>(idx);
int32_t bytes = 0;
char* d_buffer = nullptr;
if (Pass == ExecuteOp) d_buffer = d_chars + d_offsets[idx];
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_column.template element<string_view>(idx);
int32_t bytes = 0;
char* d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr;
for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) {
uint32_t code_point = detail::utf8_to_codepoint(*itr);
uint32_t code_point = detail::utf8_to_codepoint(*itr);

detail::character_flags_table_type flag = code_point <= 0x00FFFF ? d_flags[code_point] : 0;

// we apply special mapping in two cases:
// - uncased characters with the special mapping flag, always
// - cased characters with the special mapping flag, when matching the input case_flag
//
if (IS_SPECIAL(flag) && ((flag & case_flag) || !IS_UPPER_OR_LOWER(flag))) {
bytes += handle_special_case_bytes(code_point, d_buffer, case_flag);
} else if (flag & case_flag) {
if (Pass == SizeOnly)
bytes += detail::bytes_in_char_utf8(detail::codepoint_to_utf8(d_case_table[code_point]));
else
d_buffer +=
detail::from_char_utf8(detail::codepoint_to_utf8(d_case_table[code_point]), d_buffer);
auto const new_bytes = handle_special_case_bytes(code_point, d_buffer, case_flag);
bytes += new_bytes;
if (d_buffer) d_buffer += new_bytes;
} else {
if (Pass == SizeOnly)
bytes += detail::bytes_in_char_utf8(*itr);
char_utf8 new_char =
(flag & case_flag) ? detail::codepoint_to_utf8(d_case_table[code_point]) : *itr;
if (!d_buffer)
bytes += detail::bytes_in_char_utf8(new_char);
else
d_buffer += detail::from_char_utf8(*itr, d_buffer);
d_buffer += detail::from_char_utf8(new_char, d_buffer);
}
}
return bytes;
if (!d_buffer) d_offsets[idx] = bytes;
}
};

Expand All @@ -138,49 +125,27 @@ std::unique_ptr<column> convert_case(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto strings_count = strings.size();
if (strings_count == 0) return detail::make_empty_strings_column(stream, mr);

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
size_type null_count = strings.null_count();

// copy null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr);
// get the lookup tables used for case conversion
auto d_flags = get_character_flags_table();

auto d_case_table = get_character_cases_table();
auto d_special_case_mapping = get_special_case_mapping_table();

// build offsets column -- calculate the size of each output string
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0),
upper_lower_fn<SizeOnly>{d_column, case_flag, d_flags, d_case_table, d_special_case_mapping});
auto offsets_column = detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto offsets_view = offsets_column->view();
auto d_new_offsets = offsets_view.data<int32_t>();

// build the chars column -- convert characters based on case_flag parameter
size_type bytes = thrust::device_pointer_cast(d_new_offsets)[strings_count];
auto chars_column =
strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.data<char>();

thrust::for_each_n(
rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
upper_lower_fn<ExecuteOp>{
d_column, case_flag, d_flags, d_case_table, d_special_case_mapping, d_new_offsets, d_chars});

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
null_count,
std::move(null_mask),
if (strings.is_empty()) return detail::make_empty_strings_column(stream, mr);

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;

// build functor with lookup tables used for case conversion
upper_lower_fn functor{d_column,
case_flag,
get_character_flags_table(),
get_character_cases_table(),
get_special_case_mapping_table()};

// this utility calls the functor to build the offsets and chars columns
auto children = cudf::strings::detail::make_strings_children(
functor, strings.size(), strings.null_count(), stream, mr);

return make_strings_column(strings.size(),
std::move(children.first),
std::move(children.second),
strings.null_count(),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
stream,
mr);
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/strings/char_types/char_cases.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -188,9 +188,9 @@ void generate_special_mapping_hash_table()
printf(
"// the special case mapping table is a perfect hash table with no collisions, allowing us\n"
"// to 'hash' by simply modding by the incoming codepoint\n"
"inline __device__ uint16_t get_special_case_hash_index(uint32_t code_point){\n"
"constexpr uint16_t get_special_case_hash_index(uint32_t code_point){\n"
" constexpr uint16_t special_case_prime = %d;\n"
" return code_point %% special_case_prime;"
" return static_cast<uint16_t>(code_point %% special_case_prime);"
"\n}\n",
hash_prime);
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/strings/char_types/char_cases.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -5236,10 +5236,10 @@ constexpr special_case_mapping g_special_case_mappings[] = {
};
// the special case mapping table is a perfect hash table with no collisions, allowing us
// to 'hash' by simply modding by the incoming codepoint
inline __device__ uint16_t get_special_case_hash_index(uint32_t code_point)
constexpr uint16_t get_special_case_hash_index(uint32_t code_point)
{
constexpr uint16_t special_case_prime = 499;
return code_point % special_case_prime;
return static_cast<uint16_t>(code_point % special_case_prime);
}

} // namespace detail
Expand Down