diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 442155380a2..7092d114009 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -222,19 +222,19 @@ CUDF_KERNEL void gather_chars_fn_char_parallel(StringIterator strings_begin, * @return New chars column fit for a strings column. */ template -std::unique_ptr gather_chars(StringIterator strings_begin, - MapIterator map_begin, - MapIterator map_end, - cudf::detail::input_offsetalator const offsets, - size_type chars_bytes, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +rmm::device_uvector gather_chars(StringIterator strings_begin, + MapIterator map_begin, + MapIterator map_end, + cudf::detail::input_offsetalator const offsets, + size_type chars_bytes, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto const output_count = std::distance(map_begin, map_end); - if (output_count == 0) return make_empty_column(type_id::INT8); + if (output_count == 0) return rmm::device_uvector(0, stream, mr); - auto chars_column = create_chars_child_column(chars_bytes, stream, mr); - auto const d_chars = chars_column->mutable_view().template data(); + auto chars_data = rmm::device_uvector(chars_bytes, stream, mr); + auto d_chars = chars_data.data(); constexpr int warps_per_threadblock = 4; // String parallel strategy will be used if average string length is above this threshold. @@ -260,7 +260,7 @@ std::unique_ptr gather_chars(StringIterator strings_begin, stream.value()>>>(strings_begin, d_chars, offsets, map_begin, output_count); } - return chars_column; + return chars_data; } /** @@ -316,12 +316,12 @@ std::unique_ptr gather(strings_column_view const& strings, // build chars column auto const offsets_view = cudf::detail::offsetalator_factory::make_input_iterator(out_offsets_column->view()); - auto out_chars_column = gather_chars( + auto out_chars_data = gather_chars( d_strings->begin(), begin, end, offsets_view, total_bytes, stream, mr); return make_strings_column(output_count, std::move(out_offsets_column), - std::move(out_chars_column->release().data.release()[0]), + out_chars_data.release(), 0, // caller sets these rmm::device_buffer{}); } diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index fcbdfa619f4..0adf6e362be 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -98,46 +98,44 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, (null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr}; // build chars column - std::unique_ptr chars_column = - [offsets_view, bytes = bytes, begin, strings_count, null_count, stream, mr] { - auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); - // use a character-parallel kernel for long string lengths - if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) { - auto const d_offsets = - cudf::detail::offsetalator_factory::make_input_iterator(offsets_view); - auto const str_begin = thrust::make_transform_iterator( - begin, cuda::proclaim_return_type([] __device__(auto ip) { - return string_view{ip.first, ip.second}; - })); - - return gather_chars(str_begin, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), - d_offsets, - bytes, - stream, - mr); - } else { - // this approach is 2-3x faster for a large number of smaller string lengths - auto chars_column = create_chars_child_column(bytes, stream, mr); - auto d_chars = chars_column->mutable_view().template data(); - auto copy_chars = [d_chars] __device__(auto item) { - string_index_pair const str = thrust::get<0>(item); - size_type const offset = thrust::get<1>(item); - if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); - }; - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_zip_iterator( - thrust::make_tuple(begin, offsets_view.template begin())), - strings_count, - copy_chars); - return chars_column; - } - }(); + auto chars_data = [offsets_view, bytes = bytes, begin, strings_count, null_count, stream, mr] { + auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); + // use a character-parallel kernel for long string lengths + if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) { + auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_view); + auto const str_begin = thrust::make_transform_iterator( + begin, cuda::proclaim_return_type([] __device__(auto ip) { + return string_view{ip.first, ip.second}; + })); + + return gather_chars(str_begin, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings_count), + d_offsets, + bytes, + stream, + mr); + } else { + // this approach is 2-3x faster for a large number of smaller string lengths + auto chars_data = rmm::device_uvector(bytes, stream, mr); + auto d_chars = chars_data.data(); + auto copy_chars = [d_chars] __device__(auto item) { + string_index_pair const str = thrust::get<0>(item); + size_type const offset = thrust::get<1>(item); + if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); + }; + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_zip_iterator( + thrust::make_tuple(begin, offsets_view.template begin())), + strings_count, + copy_chars); + return chars_data; + } + }(); return make_strings_column(strings_count, std::move(offsets_column), - std::move(chars_column->release().data.release()[0]), + chars_data.release(), null_count, std::move(null_mask)); } diff --git a/cpp/src/io/csv/durations.cu b/cpp/src/io/csv/durations.cu index f4d32edac89..76b1b46dc61 100644 --- a/cpp/src/io/csv/durations.cu +++ b/cpp/src/io/csv/durations.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -88,12 +89,12 @@ struct duration_to_string_size_fn { template struct duration_to_string_fn : public duration_to_string_size_fn { - int32_t const* d_offsets; + cudf::detail::input_offsetalator d_offsets; char* d_chars; using duration_to_string_size_fn::d_durations; duration_to_string_fn(column_device_view const d_durations, - int32_t const* d_offsets, + cudf::detail::input_offsetalator d_offsets, char* d_chars) : duration_to_string_size_fn{d_durations}, d_offsets(d_offsets), d_chars(d_chars) { @@ -181,28 +182,27 @@ struct dispatch_from_durations_fn { // copy null mask rmm::device_buffer null_mask = cudf::detail::copy_bitmask(durations, stream, mr); + // build offsets column - auto offsets_transformer_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), duration_to_string_size_fn{d_column}); - auto [offsets_column, chars_bytes] = cudf::detail::make_offsets_child_column( + auto offsets_transformer_itr = + cudf::detail::make_counting_transform_iterator(0, duration_to_string_size_fn{d_column}); + auto [offsets_column, chars_bytes] = cudf::strings::detail::make_offsets_child_column( offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto offsets_view = offsets_column->view(); - auto d_new_offsets = offsets_view.template data(); + auto d_new_offsets = + cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); // build chars column - auto chars_column = strings::detail::create_chars_child_column(chars_bytes, stream, mr); - auto chars_view = chars_column->mutable_view(); - auto d_chars = chars_view.template data(); + auto chars_data = rmm::device_uvector(chars_bytes, stream, mr); + auto d_chars = chars_data.data(); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), strings_count, duration_to_string_fn{d_column, d_new_offsets, d_chars}); - // return make_strings_column(strings_count, std::move(offsets_column), - std::move(chars_column->release().data.release()[0]), + chars_data.release(), durations.null_count(), std::move(null_mask)); }