Skip to content

Commit

Permalink
Remove unneeded temporary device vector for strings scatter specializ…
Browse files Browse the repository at this point in the history
…ation (#7409)

The specialization logic for scatter on strings column includes building a temporary `device_vector` of `string_view` objects for the source column. The builtin iterators for an input `column_view` will work for `string_view` and so this extra `device_vector` is not required.

The utilities for creating a `device_vector` for `string_view`s is also changed to use `device_uvector` instead. This also removed an unnecessary parameter as well as other minor changes.

I also added a gbenchmark for scatter that includes strings. Removing the extra `device_vector` showed a small 10-15% performance improvement.

Authors:
  - David (@davidwendt)

Approvers:
  - Paul Taylor (@trxcllnt)
  - Keith Kraus (@kkraus14)
  - Mark Harris (@harrism)

URL: #7409
  • Loading branch information
davidwendt authored Feb 23, 2021
1 parent b887e58 commit 43b44e1
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 49 deletions.
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ set(STRINGS_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/string/case_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/contains_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/convert_durations_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/copy_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/find_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/replace_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/string/url_decode_benchmark.cpp")
Expand Down
63 changes: 63 additions & 0 deletions cpp/benchmarks/string/copy_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/copying.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <algorithm>
#include <random>

class StringCopy : public cudf::benchmark {
};

static void BM_copy(benchmark::State& state)
{
cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
auto const source = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows});
auto const target = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows});

// scatter indices
std::vector<cudf::size_type> host_map_data(n_rows);
std::iota(host_map_data.begin(), host_map_data.end(), 0);
std::random_shuffle(host_map_data.begin(), host_map_data.end());
cudf::test::fixed_width_column_wrapper<cudf::size_type> scatter_map(host_map_data.begin(),
host_map_data.end());

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
cudf::scatter(source->view(), scatter_map, target->view());
}

state.SetBytesProcessed(state.iterations() *
cudf::strings_column_view(source->view().column(0)).chars_size());
}

#define SORT_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringCopy, name) \
(::benchmark::State & st) { BM_copy(st); } \
BENCHMARK_REGISTER_F(StringCopy, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 12, 1 << 24}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

SORT_BENCHMARK_DEFINE(scatter)
7 changes: 3 additions & 4 deletions cpp/include/cudf/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,9 @@ struct column_scatterer_impl<string_view, MapIterator> {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr) const
{
using strings::detail::create_string_vector_from_column;
auto const source_vector = create_string_vector_from_column(source, stream);
auto const begin = source_vector.begin();
auto const end = begin + std::distance(scatter_map_begin, scatter_map_end);
auto d_column = column_device_view::create(source, stream);
auto const begin = d_column->begin<string_view>();
auto const end = begin + cudf::distance(scatter_map_begin, scatter_map_end);
return strings::detail::scatter(begin, end, scatter_map_begin, target, stream, mr);
}
};
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -431,11 +431,11 @@ struct list_child_constructor {
auto const num_child_rows{
cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};

auto string_views = rmm::device_vector<string_view>(num_child_rows);
auto string_views = rmm::device_uvector<string_view>(num_child_rows, stream);

auto populate_string_views = [d_scattered_lists = list_vector.begin(), // unbound_list_view*
d_list_offsets = list_offsets.template data<int32_t>(),
d_string_views = string_views.data().get(),
d_string_views = string_views.data(),
source_lists,
target_lists] __device__(auto const& row_index) {
auto unbound_list_view = d_scattered_lists[row_index];
Expand Down Expand Up @@ -483,7 +483,7 @@ struct list_child_constructor {
string_views.begin(), string_views.size(), stream, mr);

auto string_chars = cudf::strings::detail::child_chars_from_string_vector(
string_views, string_offsets->view().template data<cudf::size_type>(), 0, stream, mr);
string_views, string_offsets->view(), stream, mr);
auto child_null_mask =
source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
? construct_child_nullmask(
Expand Down
18 changes: 7 additions & 11 deletions cpp/include/cudf/strings/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -61,29 +61,25 @@ std::unique_ptr<column> scatter(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
auto strings_count = target.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (target.is_empty()) return make_empty_strings_column(stream, mr);

// create null mask -- caller must update this
rmm::device_buffer null_mask{0, stream, mr};
if (target.has_nulls()) null_mask = cudf::detail::copy_bitmask(target.parent(), stream, mr);
// create vector of string_view's to scatter into
rmm::device_uvector<string_view> target_vector = create_string_vector_from_column(target, stream);

// create string vectors
rmm::device_vector<string_view> target_vector = create_string_vector_from_column(target, stream);
// do the scatter
thrust::scatter(rmm::exec_policy(stream), begin, end, scatter_map, target_vector.begin());

// build offsets column
auto offsets_column = child_offsets_from_string_vector(target_vector, stream, mr);
// build chars column
auto chars_column = child_chars_from_string_vector(
target_vector, offsets_column->view().data<int32_t>(), 0, stream, mr);
auto chars_column =
child_chars_from_string_vector(target_vector, offsets_column->view(), stream, mr);

return make_strings_column(strings_count,
return make_strings_column(target.size(),
std::move(offsets_column),
std::move(chars_column),
UNKNOWN_NULL_COUNT,
std::move(null_mask),
cudf::detail::copy_bitmask(target.parent(), stream, mr),
stream,
mr);
}
Expand Down
21 changes: 10 additions & 11 deletions cpp/include/cudf/strings/detail/utilities.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,9 +17,10 @@

#include <cudf/column/column.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/device_uvector.hpp>

namespace cudf {
namespace strings {
Expand Down Expand Up @@ -60,36 +61,34 @@ std::unique_ptr<column> make_empty_strings_column(
* @param stream CUDA stream used for device memory operations and kernel launches.
* @return Device vector of string_views
*/
rmm::device_vector<string_view> create_string_vector_from_column(
cudf::strings_column_view strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default);
rmm::device_uvector<string_view> create_string_vector_from_column(
cudf::strings_column_view const strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default);

/**
* @brief Creates an offsets column from a string_view vector.
*
* @param strings Strings column
* @param strings Strings input data
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Child offsets column
*/
std::unique_ptr<cudf::column> child_offsets_from_string_vector(
const rmm::device_vector<string_view>& strings,
cudf::detail::device_span<string_view> strings,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Creates a chars column from a string_view vector.
*
* @param strings Strings vector
* @param strings Strings input data
* @param d_offsets Offsets vector for placing strings into column's memory.
* @param null_count Number of null strings.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Child chars column
*/
std::unique_ptr<cudf::column> child_chars_from_string_vector(
const rmm::device_vector<string_view>& strings,
const int32_t* d_offsets,
cudf::size_type null_count,
cudf::detail::device_span<string_view> strings,
column_view const& offsets,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
45 changes: 25 additions & 20 deletions cpp/src/strings/utilities.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,11 +21,12 @@

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/transform_reduce.h>
Expand All @@ -52,19 +53,20 @@ std::unique_ptr<string_view, std::function<void(string_view*)>> string_from_host
new string_view(reinterpret_cast<char*>(d_str->data()), length), deleter};
}

// build a vector of string_view objects from a strings column
rmm::device_vector<string_view> create_string_vector_from_column(cudf::strings_column_view strings,
rmm::cuda_stream_view stream)
/**
* @copydoc create_string_vector_from_column
*/
rmm::device_uvector<string_view> create_string_vector_from_column(cudf::strings_column_view strings,
rmm::cuda_stream_view stream)
{
auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;

auto count = strings.size();
rmm::device_vector<string_view> strings_vector(count);
string_view* d_strings = strings_vector.data().get();
rmm::device_uvector<string_view> strings_vector(strings.size(), stream);
string_view* d_strings = strings_vector.data();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
count,
strings.size(),
[d_column, d_strings] __device__(size_type idx) {
if (d_column.is_null(idx))
d_strings[idx] = string_view(nullptr, 0);
Expand All @@ -74,35 +76,38 @@ rmm::device_vector<string_view> create_string_vector_from_column(cudf::strings_c
return strings_vector;
}

// build a strings offsets column from a vector of string_views
/**
* @copydoc child_offsets_from_string_vector
*/
std::unique_ptr<cudf::column> child_offsets_from_string_vector(
const rmm::device_vector<string_view>& strings,
cudf::detail::device_span<string_view> strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return child_offsets_from_string_iterator(strings.begin(), strings.size(), stream, mr);
}

// build a strings chars column from an vector of string_views
/**
* @copydoc child_chars_from_string_vector
*/
std::unique_ptr<cudf::column> child_chars_from_string_vector(
const rmm::device_vector<string_view>& strings,
const int32_t* d_offsets,
cudf::size_type null_count,
cudf::detail::device_span<string_view> strings,
column_view const& offsets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
size_type count = strings.size();
auto d_strings = strings.data().get();
size_type bytes = thrust::device_pointer_cast(d_offsets)[count];
auto const d_strings = strings.data();
auto const bytes = cudf::detail::get_value<int32_t>(offsets, strings.size(), stream);
auto const d_offsets = offsets.data<int32_t>();

// create column
auto chars_column =
make_numeric_column(data_type{type_id::INT8}, bytes, mask_state::UNALLOCATED, stream, mr);
// get it's view
auto d_chars = chars_column->mutable_view().data<int8_t>();
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
count,
strings.size(),
[d_strings, d_offsets, d_chars] __device__(size_type idx) {
string_view const d_str = d_strings[idx];
memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
Expand Down

0 comments on commit 43b44e1

Please sign in to comment.