Skip to content

Commit

Permalink
Generate group offsets from element labels (#11017)
Browse files Browse the repository at this point in the history
Given an array of integer values which may be the labels of some list elements, we want to generate an array of offsets so we can create a lists column from these offsets.

For example:
```
input_labels = [ 0, 0, 0, 0, 1, 1, 4, 4, 4, 4 ]
output = [ 0, 4, 6, 6, 6, 10 ]
```

This is basically the reverse operation of #10945.

Closes #10955.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Devavret Makkar (https://github.com/devavret)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: #11017
  • Loading branch information
ttnghia authored Jun 3, 2022
1 parent ef675dc commit a042be6
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 77 deletions.
92 changes: 91 additions & 1 deletion cpp/include/cudf/detail/labeling/label_segments.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/distance.h>
#include <thrust/for_each.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/reduce.h>
#include <thrust/scan.h>
#include <thrust/uninitialized_fill.h>

Expand Down Expand Up @@ -75,7 +78,7 @@ void label_segments(InputIterator offsets_begin,

// If the offsets array has no more than 2 offset values, there will be at max 1 segment.
// In such cases, the output will just be an array of all `0` values (which we already filled).
// We should terminate here, otherwise the `inclusive_scan` call below still do its entire
// We should terminate from here, otherwise the `inclusive_scan` call below still does its entire
// computation. That is unnecessary and may be expensive if we have the input offsets defining a
// very large segment.
if (thrust::distance(offsets_begin, offsets_end) <= 2) { return; }
Expand All @@ -96,4 +99,91 @@ void label_segments(InputIterator offsets_begin,
thrust::inclusive_scan(rmm::exec_policy(stream), label_begin, label_end, label_begin);
}

/**
* @brief Generate segment offsets from groups of identical label values.
*
* Given a pair of iterators accessing to an array containing groups of identical label values,
* generate offsets for segments defined by these label.
*
* Empty segments are also taken into account. If the input label values are discontinuous, the
* segments corresponding to the missing labels will be inferred as empty segments and their offsets
* will also be generated.
*
* Note that the caller is responsible to make sure the output range for offsets have the correct
* size, which is the maximum label value plus two (i.e., `size = *(labels_end - 1) + 2`).
* Otherwise, the result is undefined.
*
* @code{.pseudo}
* Examples:
*
* labels = [ 0, 0, 0, 0, 1, 1, 4, 4, 4, 4 ]
* output = [ 0, 4, 6, 6, 6, 10 ]
*
* labels = [ 0, 0, 0, 0, 0, 1, 1 ]
* output = [ 0, 5, 7 ]
* @endcode
*
* @param labels_begin The beginning of the labels that define segments.
* @param labels_end The end of the labels that define segments.
* @param offsets_begin The beginning of the output offset range.
* @param offsets_end The end of the output offset range.
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
template <typename InputIterator, typename OutputIterator>
void labels_to_offsets(InputIterator labels_begin,
InputIterator labels_end,
OutputIterator offsets_begin,
OutputIterator offsets_end,
rmm::cuda_stream_view stream)
{
// Always fill the entire output array with `0` value regardless of the input.
using OutputType = typename thrust::iterator_value<OutputIterator>::type;
thrust::uninitialized_fill(rmm::exec_policy(stream), offsets_begin, offsets_end, OutputType{0});

// If there is not any label value, we will have zero segment or all empty segments. We should
// terminate from here because:
// - If we have zero segment, the output array is empty thus `num_segments` computed below is
// wrong and may cascade to undefined behavior if we continue.
// - If we have all empty segments, the output offset values will be all `0`, which we already
// filled above. If we continue, the `exclusive_scan` call below still does its entire
// computation. That is unnecessary and may be expensive if we have the input labels defining
// a very large number of segments.
if (thrust::distance(labels_begin, labels_end) == 0) { return; }

auto const num_segments = thrust::distance(offsets_begin, offsets_end) - 1;

//================================================================================
// Let's consider an example: Given input labels = [ 0, 0, 0, 0, 1, 1, 4, 4, 4, 4 ].

// This stores the unique label values.
// Given the example above, we will have this array containing [0, 1, 4].
auto list_indices = rmm::device_uvector<OutputType>(num_segments, stream);

// Stores the non-zero segment sizes.
// Given the example above, we will have this array containing [4, 2, 4].
auto list_sizes = rmm::device_uvector<OutputType>(num_segments, stream);

// Count the numbers of labels in the each segment.
auto const end = thrust::reduce_by_key(rmm::exec_policy(stream),
labels_begin, // keys
labels_end, // keys
thrust::make_constant_iterator<OutputType>(1),
list_indices.begin(), // output unique label values
list_sizes.begin()); // count for each label
auto const num_non_empty_segments = thrust::distance(list_indices.begin(), end.first);

// Scatter segment sizes into the end position of their corresponding segment indices.
// Given the example above, we scatter [4, 2, 4] by the scatter map [0, 1, 4], resulting
// output = [4, 2, 0, 0, 4, 0].
thrust::scatter(rmm::exec_policy(stream),
list_sizes.begin(),
list_sizes.begin() + num_non_empty_segments,
list_indices.begin(),
offsets_begin);

// Generate offsets from sizes.
// Given the example above, the final output is [0, 4, 6, 6, 6, 10].
thrust::exclusive_scan(rmm::exec_policy(stream), offsets_begin, offsets_end, offsets_begin);
}

} // namespace cudf::detail
89 changes: 13 additions & 76 deletions cpp/src/lists/drop_list_duplicates.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@
#include <thrust/distance.h>
#include <thrust/equal.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/reduce.h>
#include <thrust/scan.h>
#include <thrust/scatter.h>
#include <thrust/uninitialized_fill.h>

#include <optional>

Expand Down Expand Up @@ -436,72 +431,6 @@ std::vector<std::unique_ptr<column>> get_unique_entries_and_list_indices(
->release();
}

/**
* @brief Generate list offsets from entry list indices for the final result lists column(s).
*
* @param num_lists The number of lists.
* @param entries_list_indices The mapping from list entries to their (1-based) list indices.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device resource used to allocate memory.
*/
std::unique_ptr<column> generate_output_offsets(size_type num_lists,
column_view const& entries_list_indices,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// Let consider an example:
// Given the original offsets of the input lists column is [0, 4, 5, 6, 7, 10, 11, 13].
// The original entries_list_indices is [1, 1, 1, 1, 2, 3, 4, 5, 5, 5, 6, 7, 7], and after
// extracting unique entries we have the entries_list_indices becomes [1, 1, 1, 4, 5, 5, 5, 7, 7]
// and num_lists is 7. These are the input to this function.
//
// Through extracting unique list entries, one entry in the list index 1 has been removed (first
// list, as we are using 1-based list index), and entries in the lists with indices {3, 3, 6} have
// been removed completely.

// This variable stores the (1-based) list indices of the unique entries but only one index value
// per non-empty list. Given the example above, we will have this array hold the values
// [1, 4, 5, 7].
auto list_indices = rmm::device_uvector<size_type>(num_lists, stream);

// Stores the non-zero numbers of unique entries per list.
// Given the example above, we will have this array contains the values [3, 1, 3, 2]
auto list_sizes = rmm::device_uvector<size_type>(num_lists, stream);

// Count the numbers of unique entries for each non-empty list.
auto const end = thrust::reduce_by_key(rmm::exec_policy(stream),
entries_list_indices.template begin<size_type>(),
entries_list_indices.template end<size_type>(),
thrust::make_constant_iterator<size_type>(1),
list_indices.begin(),
list_sizes.begin());
auto const num_non_empty_lists = thrust::distance(list_indices.begin(), end.first);

// The output offsets for the output lists column(s).
auto new_offsets = rmm::device_uvector<offset_type>(num_lists + 1, stream, mr);

// The new offsets need to be filled with 0 value first.
thrust::uninitialized_fill_n(
rmm::exec_policy(stream), new_offsets.begin(), num_lists + 1, offset_type{0});

// Scatter non-zero sizes of the output lists into the correct positions.
// Given the example above, we will have new_offsets = [0, 3, 0, 0, 1, 3, 0, 2]
thrust::scatter(rmm::exec_policy(stream),
list_sizes.begin(),
list_sizes.begin() + num_non_empty_lists,
list_indices.begin(),
new_offsets.begin());

// Generate offsets from sizes.
// Given the example above, we will have new_offsets = [0, 3, 3, 3, 4, 7, 7, 9]
thrust::exclusive_scan(
rmm::exec_policy(stream), new_offsets.begin(), new_offsets.end(), new_offsets.begin());

// Done. Hope that your head didn't explode after reading till this point.
return std::make_unique<column>(
data_type{type_to_id<offset_type>()}, num_lists + 1, new_offsets.release());
}

/**
* @brief Common execution code called by all public `drop_list_duplicates` APIs.
*/
Expand Down Expand Up @@ -594,11 +523,19 @@ std::pair<std::unique_ptr<column>, std::unique_ptr<column>> drop_list_duplicates
mr);

// Generate offsets for the output lists column(s).
auto output_offsets = generate_output_offsets(
keys.size(),
unique_entries_and_list_indices.back()->view(), // unique entries' list indices
stream,
mr);
auto output_offsets = [&] {
auto out_offsets = make_numeric_column(
data_type{type_to_id<offset_type>()}, keys.size() + 1, mask_state::UNALLOCATED, stream, mr);
auto const offsets = out_offsets->mutable_view();
auto const labels =
unique_entries_and_list_indices.back()->view(); // unique entries' list indices
cudf::detail::labels_to_offsets(labels.template begin<size_type>(),
labels.template end<size_type>(),
offsets.template begin<size_type>(),
offsets.template end<size_type>(),
stream);
return out_offsets;
}();

// If the values lists column is not given, its corresponding output will be nullptr.
auto out_values =
Expand Down

0 comments on commit a042be6

Please sign in to comment.