diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index 41a2654dce3..e279ee2eb65 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,6 +53,20 @@ rmm::device_uvector create_string_vector_from_column( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Return a normalized offset value from a strings offsets column + * + * @throw std::invalid_argument if `offsets` is neither INT32 nor INT64 + * + * @param offsets Input column of type INT32 or INT64 + * @param index Row value to retrieve + * @param stream CUDA stream used for device memory operations and kernel launches + * @return Value at `offsets[index]` + */ +int64_t get_offset_value(cudf::column_view const& offsets, + size_type index, + rmm::cuda_stream_view stream); + } // namespace detail } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 26cd4fff09b..027466ef13c 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #include #include -#include #include +#include #include #include #include @@ -60,8 +60,8 @@ struct chars_size_transform { __device__ size_t operator()(column_device_view const& col) const { if (col.size() > 0) { - constexpr auto offsets_index = strings_column_view::offsets_column_index; - auto d_offsets = col.child(offsets_index).data(); + auto const offsets = col.child(strings_column_view::offsets_column_index); + auto const d_offsets = cudf::detail::input_offsetalator(offsets.head(), offsets.type()); return d_offsets[col.size() + col.offset()] - d_offsets[col.offset()]; } else { return 0; @@ -112,14 +112,15 @@ auto create_strings_device_views(host_span views, rmm::cuda_s } template -__global__ void fused_concatenate_string_offset_kernel(column_device_view const* input_views, - size_t const* input_offsets, - size_t const* partition_offsets, - size_type const num_input_views, - size_type const output_size, - int32_t* output_data, - bitmask_type* output_mask, - size_type* out_valid_count) +__global__ void fused_concatenate_string_offset_kernel( + column_device_view const* input_views, + size_t const* input_offsets, + size_t const* partition_offsets, + size_type const num_input_views, + size_type const output_size, + cudf::detail::output_offsetalator output_data, + bitmask_type* output_mask, + size_type* out_valid_count) { cudf::thread_index_type output_index = threadIdx.x + blockIdx.x * blockDim.x; size_type warp_valid_count = 0; @@ -132,10 +133,11 @@ __global__ void fused_concatenate_string_offset_kernel(column_device_view const* thrust::seq, input_offsets, input_offsets + num_input_views, output_index)); size_type const partition_index = offset_it - input_offsets; - auto const offset_index = output_index - *offset_it; - auto const& input_view = input_views[partition_index]; - constexpr auto offsets_child = strings_column_view::offsets_column_index; - auto const* input_data = input_view.child(offsets_child).data(); + auto const offset_index = output_index - *offset_it; + auto const& input_view = input_views[partition_index]; + auto const offsets_child = input_view.child(strings_column_view::offsets_column_index); + auto const input_data = + cudf::detail::input_offsetalator(offsets_child.head(), offsets_child.type()); output_data[output_index] = input_data[offset_index + input_view.offset()] // handle parent offset - input_data[input_view.offset()] // subtract first offset if non-zero @@ -186,8 +188,9 @@ __global__ void fused_concatenate_string_chars_kernel(column_device_view const* auto const offset_index = output_index - *offset_it; auto const& input_view = input_views[partition_index]; - constexpr auto offsets_child = strings_column_view::offsets_column_index; - auto const* input_offsets_data = input_view.child(offsets_child).data(); + auto const offsets_child = input_view.child(strings_column_view::offsets_column_index); + auto const input_offsets_data = + cudf::detail::input_offsetalator(offsets_child.head(), offsets_child.type()); constexpr auto chars_child = strings_column_view::chars_column_index; auto const* input_chars_data = input_view.child(chars_child).data(); @@ -225,16 +228,16 @@ std::unique_ptr concatenate(host_span columns, bool const has_nulls = std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.has_nulls(); }); - // create chars column + // create output chars column auto chars_column = create_chars_child_column(total_bytes, stream, mr); auto d_new_chars = chars_column->mutable_view().data(); chars_column->set_null_count(0); - // create offsets column + // create output offsets column auto offsets_column = make_numeric_column( data_type{type_id::INT32}, offsets_count, mask_state::UNALLOCATED, stream, mr); - auto d_new_offsets = offsets_column->mutable_view().data(); - offsets_column->set_null_count(0); + auto itr_new_offsets = + cudf::detail::offsetalator_factory::make_output_iterator(offsets_column->mutable_view()); rmm::device_buffer null_mask{0, stream, mr}; size_type null_count{}; @@ -256,7 +259,7 @@ std::unique_ptr concatenate(host_span columns, d_partition_offsets.data(), static_cast(columns.size()), strings_count, - d_new_offsets, + itr_new_offsets, reinterpret_cast(null_mask.data()), d_valid_count.data()); @@ -286,14 +289,11 @@ std::unique_ptr concatenate(host_span columns, column_view offsets_child = column->child(strings_column_view::offsets_column_index); column_view chars_child = column->child(strings_column_view::chars_column_index); - auto bytes_offset = - cudf::detail::get_value(offsets_child, column_offset, stream); - + auto const bytes_offset = get_offset_value(offsets_child, column_offset, stream); + auto const bytes_end = get_offset_value(offsets_child, column_size + column_offset, stream); // copy the chars column data - auto d_chars = chars_child.data() + bytes_offset; - auto const bytes = - cudf::detail::get_value(offsets_child, column_size + column_offset, stream) - - bytes_offset; + auto d_chars = chars_child.data() + bytes_offset; + auto const bytes = bytes_end - bytes_offset; CUDF_CUDA_TRY( cudaMemcpyAsync(d_new_chars, d_chars, bytes, cudaMemcpyDefault, stream.value())); diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index c8c68d19ce6..13f4776ca33 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -128,6 +129,18 @@ special_case_mapping const* get_special_case_mapping_table() }); } +int64_t get_offset_value(cudf::column_view const& offsets, + size_type index, + rmm::cuda_stream_view stream) +{ + auto const otid = offsets.type().id(); + CUDF_EXPECTS(otid == type_id::INT64 || otid == type_id::INT32, + "Offsets must be of type INT32 or INT64", + std::invalid_argument); + return otid == type_id::INT64 ? cudf::detail::get_value(offsets, index, stream) + : cudf::detail::get_value(offsets, index, stream); +} + } // namespace detail } // namespace strings } // namespace cudf