Skip to content

Commit

Permalink
orgin map thrust transform to direact kernel call
Browse files Browse the repository at this point in the history
  • Loading branch information
chrischoy committed Apr 6, 2021
1 parent ccc17a5 commit 9f94072
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 23 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Change Log

## [0.5.3]

- Use custom `gpu_storage` instead of thrust vector for faster constructors

## [0.5.2]

- spmm average cuda function
Expand Down
36 changes: 18 additions & 18 deletions src/coordinate_map_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -432,45 +432,45 @@ CoordinateMapGPU<coordinate_type, TemplatedAllocator>::stride(
num_threads, m_coordinate_size);

LOG_DEBUG("Stride copy done.");
gpu_storage<bool, byte_allocator_type> success(N);
auto &stride_valid_row_index = stride_map.m_valid_row_index;
auto &stride_valid_map_index = stride_map.m_valid_map_index;

stride_valid_row_index.resize(N); // row indices
stride_valid_map_index.resize(N); // map offset

// Insert coordinates
auto insert = detail::insert_coordinate<coordinate_type, map_type,
index_type *>{
*stride_map.m_map, // map
stride_map.const_coordinate_data(), // coordinates,
stride_valid_row_index.data(), // valid row
stride_valid_map_index.data(), // iter offset
m_coordinate_size};
thrust::counting_iterator<uint32_t> count_begin{0};
thrust::transform(count_begin, count_begin + N, success.begin(), insert);
LOG_DEBUG("Stride insertion done.");
index_type const unused_key = std::numeric_limits<index_type>::max();
LOG_DEBUG("unused_key", unused_key);

detail::insert_and_map_kernel<coordinate_type, size_type, index_type,
map_type><<<num_blocks, CUDA_NUM_THREADS>>>(
*stride_map.m_map, //
stride_map.const_coordinate_data(), //
stride_valid_map_index.data(), //
stride_valid_row_index.data(), //
num_threads, m_coordinate_size, unused_key);
CUDA_CHECK(cudaStreamSynchronize(0));
LOG_DEBUG("Stride map size:", m_map->size());

// Valid row index
auto valid_begin = thrust::make_zip_iterator(
thrust::make_tuple(success.begin(), //
stride_valid_row_index.begin(), //
stride_valid_map_index.begin()));
thrust::make_tuple(stride_valid_map_index.begin(), //
stride_valid_row_index.begin()));
size_type const number_of_valid =
thrust::remove_if(thrust::device, //
valid_begin, //
thrust::make_zip_iterator(
thrust::make_tuple(success.end(), //
stride_valid_row_index.end(), //
stride_valid_map_index.end())),
detail::is_first<bool>(false)) -
thrust::make_tuple(stride_valid_map_index.end(), //
stride_valid_row_index.end())),
detail::is_first<index_type>(unused_key)) -
valid_begin;
stride_valid_row_index.resize(number_of_valid);
stride_valid_map_index.resize(number_of_valid);
stride_map.m_size = number_of_valid;
LOG_DEBUG("Reduced to", number_of_valid);

// remap values
thrust::counting_iterator<uint32_t> count_begin{0};
thrust::for_each(count_begin, count_begin + number_of_valid,
detail::update_value_with_offset<index_type, map_type>{
*stride_map.m_map, stride_map.m_valid_map_index.data()});
Expand Down
12 changes: 7 additions & 5 deletions src/coordinate_map_manager.cu
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,13 @@ struct insert_and_map_functor<coordinate_type, coordinate_field_type,

LOG_DEBUG("cuda_copy_n with num_inv_blocks:", num_inv_blocks,
"inverse_mapping.size():", inverse_mapping.size());
detail::cuda_copy_n<default_types::index_type, int64_t>
<<<num_inv_blocks, CUDA_NUM_THREADS>>>(
inverse_mapping.cbegin(), inverse_mapping.size(),
th_inverse_mapping.data_ptr<int64_t>());
CUDA_CHECK(cudaStreamSynchronize(0));
if (inverse_mapping.size() > 0) {
detail::cuda_copy_n<default_types::index_type, int64_t>
<<<num_inv_blocks, CUDA_NUM_THREADS>>>(
inverse_mapping.cbegin(), inverse_mapping.size(),
th_inverse_mapping.data_ptr<int64_t>());
CUDA_CHECK(cudaStreamSynchronize(0));
}

LOG_DEBUG("End of insert_map_functor");
// return std::make_pair(std::move(th_mapping),
Expand Down
5 changes: 5 additions & 0 deletions src/storage.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ public:
gpu_storage(uint64_t const num_elements) { allocate(num_elements); }
gpu_storage(self_type const &other_storage) {
LOG_DEBUG("copy storage constructor");
if (other_storage.size() == 0)
return;

allocate(other_storage.size());
CUDA_CHECK(cudaMemcpy(m_data, other_storage.cdata(),
other_storage.size() * sizeof(data_type),
Expand All @@ -64,6 +67,8 @@ public:
~gpu_storage() { deallocate(); }

data_type *allocate(uint64_t const num_elements) {
if (num_elements == 0)
return nullptr;
m_num_elements = num_elements;
m_data =
(data_type *)m_allocator.allocate(m_num_elements * sizeof(data_type));
Expand Down

0 comments on commit 9f94072

Please sign in to comment.