Skip to content

Commit

Permalink
Replace remaining uses of device_vector (#8343)
Browse files Browse the repository at this point in the history
Replaces `device_vector` with `device_uvector` in decomp_test.cu (now .cpp) and quantile.cu.

Also updates `DEVELOPER_GUIDE.md` to advise against using `device_vector`.

Closes #2631

Authors:
  - Mark Harris (https://github.com/harrism)

Approvers:
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)
  - Nghia Truong (https://github.com/ttnghia)

URL: #8343
  • Loading branch information
harrism authored Jun 2, 2021
1 parent f9b7c60 commit 48a2663
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 26 deletions.
12 changes: 9 additions & 3 deletions cpp/docs/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -458,8 +458,12 @@ int host_value = int_scalar.value();
Allocates a specified number of elements of the specified type. If no initialization value is
provided, all elements are default initialized (this incurs a kernel launch).
**Note**: `rmm::device_vector<T>` is not yet updated to use `device_memory_resource`s, but support
is forthcoming. Likewise, `device_vector` operations cannot be stream ordered.
**Note**: We have removed all usage of `rmm::device_vector` and `thrust::device_vector` from
libcudf, and you should not use it in new code in libcudf without careful consideration. Instead,
use `rmm::device_uvector` along with the utility factories in `device_factories.hpp`. These
utilities enable creation of `uvector`s from host-side vectors, or creating zero-initialized
`uvector`s, so that they are as convenient to use as `device_vector`. Avoiding `device_vector` has
a number of benefits, as described in the folling section on `rmm::device_uvector`.
#### `rmm::device_uvector<T>`
Expand All @@ -468,7 +472,9 @@ differences:
- As an optimization, elements are uninitialized and no synchronization occurs at construction.
This limits the types `T` to trivially copyable types.
- All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream
on which the operation is performed).
on which the operation is performed). This improves safety when using non-default streams.
- `device_uvector.hpp` does not include any `__device__` code, unlike `thrust/device_vector.hpp`,
which means `device_uvector`s can be used in `.cpp` files, rather than just in `.cu` files.
```c++
cuda_stream s;
Expand Down
10 changes: 7 additions & 3 deletions cpp/src/quantiles/quantile.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <memory>
#include <vector>
Expand Down Expand Up @@ -76,12 +78,13 @@ struct quantile_functor {
auto d_input = column_device_view::create(input, stream);
auto d_output = mutable_column_device_view::create(output->mutable_view());

rmm::device_vector<double> q_device{q};
auto q_device = cudf::detail::make_device_uvector_sync(q);

if (!cudf::is_dictionary(input.type())) {
auto sorted_data =
thrust::make_permutation_iterator(input.data<StorageType>(), ordered_indices);
thrust::transform(q_device.begin(),
thrust::transform(rmm::exec_policy(),
q_device.begin(),
q_device.end(),
d_output->template begin<StorageResult>(),
[sorted_data, interp = interp, size = size] __device__(double q) {
Expand All @@ -90,7 +93,8 @@ struct quantile_functor {
} else {
auto sorted_data = thrust::make_permutation_iterator(
dictionary::detail::make_dictionary_iterator<T>(*d_input), ordered_indices);
thrust::transform(q_device.begin(),
thrust::transform(rmm::exec_policy(),
q_device.begin(),
q_device.end(),
d_output->template begin<StorageResult>(),
[sorted_data, interp = interp, size = size] __device__(double q) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ ConfigureTest(INTEROP_TEST

###################################################################################################
# - io tests --------------------------------------------------------------------------------------
ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cu)
ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp)

ConfigureTest(CSV_TEST io/csv_test.cpp)
ConfigureTest(ORC_TEST io/orc_test.cpp)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@

#include <cudf_test/base_fixture.hpp>

#include <vector>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/device_uvector.hpp>

#include <vector>

/**
* @brief Base test fixture for decompression
Expand All @@ -36,9 +37,6 @@ struct DecompressTest : public cudf::test::BaseFixture {
ASSERT_CUDA_SUCCEEDED(cudaMallocHost((void**)&inf_args, sizeof(cudf::io::gpu_inflate_input_s)));
ASSERT_CUDA_SUCCEEDED(
cudaMallocHost((void**)&inf_stat, sizeof(cudf::io::gpu_inflate_status_s)));

d_inf_args.resize(1);
d_inf_stat.resize(1);
}

void TearDown() override
Expand All @@ -64,19 +62,22 @@ struct DecompressTest : public cudf::test::BaseFixture {
inf_args->dstDevice = static_cast<uint8_t*>(dst.data());
inf_args->srcSize = src.size();
inf_args->dstSize = dst.size();
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_args.data().get(),
rmm::device_uvector<cudf::io::gpu_inflate_input_s> d_inf_args(1, rmm::cuda_stream_default);
rmm::device_uvector<cudf::io::gpu_inflate_status_s> d_inf_stat(1, rmm::cuda_stream_default);
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_args.data(),
inf_args,
sizeof(cudf::io::gpu_inflate_input_s),
cudaMemcpyHostToDevice,
0));
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_stat.data().get(),
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_inf_stat.data(),
inf_stat,
sizeof(cudf::io::gpu_inflate_status_s),
cudaMemcpyHostToDevice,
0));
ASSERT_CUDA_SUCCEEDED(static_cast<Decompressor*>(this)->dispatch());
ASSERT_CUDA_SUCCEEDED(
static_cast<Decompressor*>(this)->dispatch(d_inf_args.data(), d_inf_stat.data()));
ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(inf_stat,
d_inf_stat.data().get(),
d_inf_stat.data(),
sizeof(cudf::io::gpu_inflate_status_s),
cudaMemcpyDeviceToHost,
0));
Expand All @@ -87,41 +88,41 @@ struct DecompressTest : public cudf::test::BaseFixture {

cudf::io::gpu_inflate_input_s* inf_args = nullptr;
cudf::io::gpu_inflate_status_s* inf_stat = nullptr;
rmm::device_vector<cudf::io::gpu_inflate_input_s> d_inf_args;
rmm::device_vector<cudf::io::gpu_inflate_status_s> d_inf_stat;
};

/**
* @brief Derived fixture for GZIP decompression
*/
struct GzipDecompressTest : public DecompressTest<GzipDecompressTest> {
cudaError_t dispatch()
cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args,
cudf::io::gpu_inflate_status_s* d_inf_stat)
{
return cudf::io::gpuinflate(d_inf_args.data().get(), d_inf_stat.data().get(), 1, 1);
return cudf::io::gpuinflate(d_inf_args, d_inf_stat, 1, 1);
}
};

/**
* @brief Derived fixture for Snappy decompression
*/
struct SnappyDecompressTest : public DecompressTest<SnappyDecompressTest> {
cudaError_t dispatch()
cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args,
cudf::io::gpu_inflate_status_s* d_inf_stat)
{
return cudf::io::gpu_unsnap(d_inf_args.data().get(), d_inf_stat.data().get(), 1);
return cudf::io::gpu_unsnap(d_inf_args, d_inf_stat, 1);
}
};

/**
* @brief Derived fixture for Brotli decompression
*/
struct BrotliDecompressTest : public DecompressTest<BrotliDecompressTest> {
cudaError_t dispatch()
cudaError_t dispatch(cudf::io::gpu_inflate_input_s* d_inf_args,
cudf::io::gpu_inflate_status_s* d_inf_stat)
{
rmm::device_buffer d_scratch{cudf::io::get_gpu_debrotli_scratch_size(1),
rmm::cuda_stream_default};

return cudf::io::gpu_debrotli(
d_inf_args.data().get(), d_inf_stat.data().get(), d_scratch.data(), d_scratch.size(), 1);
return cudf::io::gpu_debrotli(d_inf_args, d_inf_stat, d_scratch.data(), d_scratch.size(), 1);
}
};

Expand Down

0 comments on commit 48a2663

Please sign in to comment.