Skip to content

Commit

Permalink
Remove unneeded calls to create_chars_child_column utility (#14997)
Browse files Browse the repository at this point in the history
Removes unneeded calls to `cudf::strings::detail::create_chars_child_column`.
This includes all calls except `make_strings_children` which will be modified in a follow-on PR.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)

URL: #14997
  • Loading branch information
davidwendt authored Feb 16, 2024
1 parent aa9d484 commit 45614e2
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 62 deletions.
26 changes: 13 additions & 13 deletions cpp/include/cudf/strings/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -222,19 +222,19 @@ CUDF_KERNEL void gather_chars_fn_char_parallel(StringIterator strings_begin,
* @return New chars column fit for a strings column.
*/
template <typename StringIterator, typename MapIterator>
std::unique_ptr<cudf::column> gather_chars(StringIterator strings_begin,
MapIterator map_begin,
MapIterator map_end,
cudf::detail::input_offsetalator const offsets,
size_type chars_bytes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
rmm::device_uvector<char> gather_chars(StringIterator strings_begin,
MapIterator map_begin,
MapIterator map_end,
cudf::detail::input_offsetalator const offsets,
size_type chars_bytes,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const output_count = std::distance(map_begin, map_end);
if (output_count == 0) return make_empty_column(type_id::INT8);
if (output_count == 0) return rmm::device_uvector<char>(0, stream, mr);

auto chars_column = create_chars_child_column(chars_bytes, stream, mr);
auto const d_chars = chars_column->mutable_view().template data<char>();
auto chars_data = rmm::device_uvector<char>(chars_bytes, stream, mr);
auto d_chars = chars_data.data();

constexpr int warps_per_threadblock = 4;
// String parallel strategy will be used if average string length is above this threshold.
Expand All @@ -260,7 +260,7 @@ std::unique_ptr<cudf::column> gather_chars(StringIterator strings_begin,
stream.value()>>>(strings_begin, d_chars, offsets, map_begin, output_count);
}
return chars_column;
return chars_data;
}
/**
Expand Down Expand Up @@ -316,12 +316,12 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
// build chars column
auto const offsets_view =
cudf::detail::offsetalator_factory::make_input_iterator(out_offsets_column->view());
auto out_chars_column = gather_chars(
auto out_chars_data = gather_chars(
d_strings->begin<string_view>(), begin, end, offsets_view, total_bytes, stream, mr);
return make_strings_column(output_count,
std::move(out_offsets_column),
std::move(out_chars_column->release().data.release()[0]),
out_chars_data.release(),
0, // caller sets these
rmm::device_buffer{});
}
Expand Down
72 changes: 35 additions & 37 deletions cpp/include/cudf/strings/detail/strings_column_factories.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -98,46 +98,44 @@ std::unique_ptr<column> make_strings_column(IndexPairIterator begin,
(null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr};

// build chars column
std::unique_ptr<column> chars_column =
[offsets_view, bytes = bytes, begin, strings_count, null_count, stream, mr] {
auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1);
// use a character-parallel kernel for long string lengths
if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) {
auto const d_offsets =
cudf::detail::offsetalator_factory::make_input_iterator(offsets_view);
auto const str_begin = thrust::make_transform_iterator(
begin, cuda::proclaim_return_type<string_view>([] __device__(auto ip) {
return string_view{ip.first, ip.second};
}));

return gather_chars(str_begin,
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(strings_count),
d_offsets,
bytes,
stream,
mr);
} else {
// this approach is 2-3x faster for a large number of smaller string lengths
auto chars_column = create_chars_child_column(bytes, stream, mr);
auto d_chars = chars_column->mutable_view().template data<char>();
auto copy_chars = [d_chars] __device__(auto item) {
string_index_pair const str = thrust::get<0>(item);
size_type const offset = thrust::get<1>(item);
if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second);
};
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_zip_iterator(
thrust::make_tuple(begin, offsets_view.template begin<int32_t>())),
strings_count,
copy_chars);
return chars_column;
}
}();
auto chars_data = [offsets_view, bytes = bytes, begin, strings_count, null_count, stream, mr] {
auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1);
// use a character-parallel kernel for long string lengths
if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) {
auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_view);
auto const str_begin = thrust::make_transform_iterator(
begin, cuda::proclaim_return_type<string_view>([] __device__(auto ip) {
return string_view{ip.first, ip.second};
}));

return gather_chars(str_begin,
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(strings_count),
d_offsets,
bytes,
stream,
mr);
} else {
// this approach is 2-3x faster for a large number of smaller string lengths
auto chars_data = rmm::device_uvector<char>(bytes, stream, mr);
auto d_chars = chars_data.data();
auto copy_chars = [d_chars] __device__(auto item) {
string_index_pair const str = thrust::get<0>(item);
size_type const offset = thrust::get<1>(item);
if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second);
};
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_zip_iterator(
thrust::make_tuple(begin, offsets_view.template begin<size_type>())),
strings_count,
copy_chars);
return chars_data;
}
}();

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
chars_data.release(),
null_count,
std::move(null_mask));
}
Expand Down
24 changes: 12 additions & 12 deletions cpp/src/io/csv/durations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/offsets_iterator_factory.cuh>
#include <cudf/strings/detail/convert/int_to_string.cuh>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/utilities.cuh>
Expand Down Expand Up @@ -88,12 +89,12 @@ struct duration_to_string_size_fn {

template <typename T>
struct duration_to_string_fn : public duration_to_string_size_fn<T> {
int32_t const* d_offsets;
cudf::detail::input_offsetalator d_offsets;
char* d_chars;
using duration_to_string_size_fn<T>::d_durations;

duration_to_string_fn(column_device_view const d_durations,
int32_t const* d_offsets,
cudf::detail::input_offsetalator d_offsets,
char* d_chars)
: duration_to_string_size_fn<T>{d_durations}, d_offsets(d_offsets), d_chars(d_chars)
{
Expand Down Expand Up @@ -181,28 +182,27 @@ struct dispatch_from_durations_fn {

// copy null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(durations, stream, mr);

// build offsets column
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<int32_t>(0), duration_to_string_size_fn<T>{d_column});
auto [offsets_column, chars_bytes] = cudf::detail::make_offsets_child_column(
auto offsets_transformer_itr =
cudf::detail::make_counting_transform_iterator(0, duration_to_string_size_fn<T>{d_column});
auto [offsets_column, chars_bytes] = cudf::strings::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto offsets_view = offsets_column->view();
auto d_new_offsets = offsets_view.template data<int32_t>();
auto d_new_offsets =
cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view());

// build chars column
auto chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
auto chars_data = rmm::device_uvector<char>(chars_bytes, stream, mr);
auto d_chars = chars_data.data();

thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
duration_to_string_fn<T>{d_column, d_new_offsets, d_chars});

//
return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column->release().data.release()[0]),
chars_data.release(),
durations.null_count(),
std::move(null_mask));
}
Expand Down

0 comments on commit 45614e2

Please sign in to comment.