Skip to content

Commit

Permalink
Statistics cleanup (#7439)
Browse files Browse the repository at this point in the history
Addresses #7347

Authors:
  - Kumar Aatish (@kaatish)

Approvers:
  - David (@davidwendt)
  - Devavret Makkar (@devavret)
  - Vukasin Milovanovic (@vuule)

URL: #7439
  • Loading branch information
kaatish authored Mar 6, 2021
1 parent 3fefef6 commit ab7fe05
Show file tree
Hide file tree
Showing 16 changed files with 275 additions and 331 deletions.
32 changes: 4 additions & 28 deletions cpp/include/cudf/detail/utilities/device_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
* limitations under the License.
*/

#ifndef DEVICE_OPERATORS_CUH
#define DEVICE_OPERATORS_CUH
#pragma once

/**
* @brief definition of the device operators
Expand All @@ -24,6 +23,7 @@

#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>
Expand Down Expand Up @@ -84,16 +84,6 @@ struct DeviceCount {
}
};

/**
* @brief string value for sentinel which is used in min, max reduction
* operators
* This sentinel string value is the highest possible valid UTF-8 encoded
* character. This serves as identity value for maximum operator on string
* values. Also, this char pointer serves as valid device pointer of identity
* value for minimum operator on string values.
*/
static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"};

/* @brief binary `min` operator */
struct DeviceMin {
template <typename T>
Expand Down Expand Up @@ -123,13 +113,7 @@ struct DeviceMin {
typename std::enable_if_t<std::is_same<T, cudf::string_view>::value>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE static constexpr T identity()
{
const char* psentinel{nullptr};
#if defined(__CUDA_ARCH__)
psentinel = &max_string_sentinel[0];
#else
CUDA_TRY(cudaGetSymbolAddress((void**)&psentinel, max_string_sentinel));
#endif
return T(psentinel, 4);
return string_view::max();
}

template <typename T, typename std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
Expand Down Expand Up @@ -167,13 +151,7 @@ struct DeviceMax {
typename std::enable_if_t<std::is_same<T, cudf::string_view>::value>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE static constexpr T identity()
{
const char* psentinel{nullptr};
#if defined(__CUDA_ARCH__)
psentinel = &max_string_sentinel[0];
#else
CUDA_TRY(cudaGetSymbolAddress((void**)&psentinel, max_string_sentinel));
#endif
return T(psentinel, 0);
return string_view::min();
}

template <typename T, typename std::enable_if_t<cudf::is_dictionary<T>()>* = nullptr>
Expand Down Expand Up @@ -242,5 +220,3 @@ struct DeviceLeadLag {
};

} // namespace cudf

#endif
5 changes: 1 addition & 4 deletions cpp/include/cudf/detail/utilities/int_fastdiv.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
* limitations under the License.
*/

#ifndef _INT_FASTDIV_KJGIUHFG
#define _INT_FASTDIV_KJGIUHFG
#pragma once

class int_fastdiv {
public:
Expand Down Expand Up @@ -172,5 +171,3 @@ __host__ __device__ __forceinline__ int operator%(const unsigned char n, const i
{
return ((int)n) % divisor;
}

#endif
5 changes: 3 additions & 2 deletions cpp/include/cudf/strings/string.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace string {
* @param d_str String to check.
* @return true if string has valid integer characters
*/
__device__ bool is_integer(string_view const& d_str)
inline __device__ bool is_integer(string_view const& d_str)
{
if (d_str.empty()) return false;
auto begin = d_str.begin();
Expand Down Expand Up @@ -71,7 +71,7 @@ __device__ bool is_integer(string_view const& d_str)
* @param d_str String to check.
* @return true if string has valid float characters
*/
__device__ bool is_float(string_view const& d_str)
inline __device__ bool is_float(string_view const& d_str)
{
if (d_str.empty()) return false;
// strings allowed by the converter
Expand Down Expand Up @@ -105,6 +105,7 @@ __device__ bool is_float(string_view const& d_str)
}
return result;
}

/** @} */ // end of group
} // namespace string
} // namespace strings
Expand Down
42 changes: 42 additions & 0 deletions cpp/include/cudf/strings/string_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cudf/strings/string_view.hpp>
#include <cudf/utilities/error.hpp>

#include <thrust/count.h>
#include <thrust/find.h>
Expand All @@ -43,9 +44,50 @@ __device__ inline size_type characters_in_string(const char* str, size_type byte
return thrust::count_if(
thrust::seq, ptr, ptr + bytes, [](uint8_t chr) { return is_begin_utf8_char(chr); });
}

/**
* @brief string value for sentinel which is used in min, max reduction
* operators
*
* This sentinel string value is the highest possible valid UTF-8 encoded
* character. This serves as identity value for maximum operator on string
* values. Also, this char pointer serves as valid device pointer of identity
* value for minimum operator on string values.
*/
static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"};
} // namespace detail
} // namespace strings

/**
* @brief Return minimum value associated with the string type
*
* This function is needed to be host callable because it is called by a host
* callable function DeviceMax::identity<string_view>()
*
* @return An empty string
*/
CUDA_HOST_DEVICE_CALLABLE string_view string_view::min() { return string_view(); }

/**
* @brief Return maximum value associated with the string type
*
* This function is needed to be host callable because it is called by a host
* callable function DeviceMin::identity<string_view>()
*
* @return A string value which represents the highest possible valid UTF-8 encoded
* character.
*/
CUDA_HOST_DEVICE_CALLABLE string_view string_view::max()
{
const char* psentinel{nullptr};
#if defined(__CUDA_ARCH__)
psentinel = &cudf::strings::detail::max_string_sentinel[0];
#else
CUDA_TRY(cudaGetSymbolAddress((void**)&psentinel, cudf::strings::detail::max_string_sentinel));
#endif
return string_view(psentinel, 4);
}

__device__ inline size_type string_view::length() const
{
if (_length == UNKNOWN_STRING_LENGTH)
Expand Down
21 changes: 21 additions & 0 deletions cpp/include/cudf/strings/string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,27 @@ class string_view {
*/
CUDA_DEVICE_CALLABLE string_view substr(size_type start, size_type length) const;

/**
* @brief Return minimum value associated with the string type
*
* This function is needed to be host callable because it is called by a host
* callable function DeviceMax::identity<string_view>()
*
* @return An empty string
*/
CUDA_HOST_DEVICE_CALLABLE static string_view min();

/**
* @brief Return maximum value associated with the string type
*
* This function is needed to be host callable because it is called by a host
* callable function DeviceMin::identity<string_view>()
*
* @return A string value which represents the highest possible valid UTF-8 encoded
* character.
*/
CUDA_HOST_DEVICE_CALLABLE static string_view max();

/**
* @brief Default constructor represents an empty string.
*/
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/orc/stats_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ __global__ void __launch_bounds__(init_threads_per_block)
uint32_t t = threadIdx.x;
statistics_group *group = &group_g[threadIdx.y];
if (chunk_id < num_rowgroups and t == 0) {
uint32_t num_rows = cols[col_id].num_rows;
uint32_t num_rows = cols[col_id].leaf_column->size();
group->col = &cols[col_id];
group->start_row = chunk_id * row_index_stride;
group->num_rows = min(num_rows - min(chunk_id * row_index_stride, num_rows), row_index_stride);
Expand Down
15 changes: 13 additions & 2 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@

#include "writer_impl.hpp"

#include <io/utilities/column_utils.cuh>

#include <cudf/null_mask.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
Expand All @@ -39,6 +42,7 @@ namespace detail {
namespace orc {
using namespace cudf::io::orc;
using namespace cudf::io;
using cudf::io::orc::gpu::nvstrdesc_s;

struct row_group_index_info {
int32_t pos = -1; // Position
Expand Down Expand Up @@ -775,7 +779,9 @@ std::vector<StripeInformation> writer::impl::gather_stripes(
}

std::vector<std::vector<uint8_t>> writer::impl::gather_statistic_blobs(
host_span<orc_column_view const> columns, host_span<stripe_rowgroups const> stripe_bounds)
const table_device_view &table,
host_span<orc_column_view const> columns,
host_span<stripe_rowgroups const> stripe_bounds)
{
auto const num_rowgroups = stripes_size(stripe_bounds);
size_t num_stat_blobs = (1 + stripe_bounds.size()) * columns.size();
Expand Down Expand Up @@ -833,6 +839,10 @@ std::vector<std::vector<uint8_t>> writer::impl::gather_statistic_blobs(
}
stat_desc.host_to_device(stream);
stat_merge.host_to_device(stream);

rmm::device_uvector<column_device_view> leaf_column_views =
create_leaf_column_device_views<stats_column_desc>(stat_desc, table, stream);

gpu::orc_init_statistics_groups(stat_groups.data(),
stat_desc.device_ptr(),
columns.size(),
Expand Down Expand Up @@ -1106,10 +1116,11 @@ void writer::impl::write(table_view const &table)
auto stripes =
gather_stripes(num_rows, num_index_streams, stripe_bounds, &enc_data.streams, &strm_descs);

auto device_columns = table_device_view::create(table);
// Gather column statistics
std::vector<std::vector<uint8_t>> column_stats;
if (enable_statistics_ && num_columns > 0 && num_rows > 0) {
column_stats = gather_statistic_blobs(orc_columns, stripe_bounds);
column_stats = gather_statistic_blobs(*device_columns, orc_columns, stripe_bounds);
}

// Allocate intermediate output stream buffer
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/io/orc/writer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <cudf/io/detail/orc.hpp>
#include <cudf/io/orc.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/error.hpp>

#include <thrust/iterator/counting_iterator.h>
Expand Down Expand Up @@ -271,13 +272,16 @@ class writer::impl {
* @brief Returns per-stripe and per-file column statistics encoded
* in ORC protobuf format.
*
* @param table Table information to be written
* @param columns List of columns
* @param stripe_bounds List of stripe boundaries
*
* @return The statistic blobs
*/
std::vector<std::vector<uint8_t>> gather_statistic_blobs(
host_span<orc_column_view const> columns, host_span<stripe_rowgroups const> stripe_bounds);
const table_device_view& table,
host_span<orc_column_view const> columns,
host_span<stripe_rowgroups const> stripe_bounds);

/**
* @brief Writes the specified column's row index stream.
Expand Down
35 changes: 0 additions & 35 deletions cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1961,41 +1961,6 @@ void InitPageFragments(PageFragment *frag,
frag, col_desc, num_fragments, num_columns, fragment_size, num_rows);
}

/**
* @copydoc void init_column_device_views(EncColumnDesc *col_desc,
* column_device_view *leaf_column_views,
* const table_device_view &parent_table_device_view,
* rmm::cuda_stream_view stream)
*/
void init_column_device_views(EncColumnDesc *col_desc,
column_device_view *leaf_column_views,
const table_device_view &parent_column_table_device_view,
rmm::cuda_stream_view stream)
{
cudf::detail::device_single_thread(
[col_desc,
parent_col_view = parent_column_table_device_view,
leaf_column_views] __device__() mutable {
for (size_type i = 0; i < parent_col_view.num_columns(); ++i) {
column_device_view col = parent_col_view.column(i);
if (col.type().id() == type_id::LIST) {
col_desc[i].parent_column = parent_col_view.begin() + i;
} else {
col_desc[i].parent_column = nullptr;
}
// traverse till leaf column
while (col.type().id() == type_id::LIST) {
col = col.child(lists_column_view::child_column_index);
}
// Store leaf_column to device storage
column_device_view *leaf_col_ptr = leaf_column_views + i;
*leaf_col_ptr = col;
col_desc[i].leaf_column = leaf_col_ptr;
}
},
stream);
}

/**
* @brief Launches kernel for initializing fragment statistics groups
*
Expand Down
16 changes: 0 additions & 16 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,6 @@ struct EncColumnDesc : stats_column_desc {
size_type const *level_offsets; //!< Offset array for per-row pre-calculated rep/def level values
uint8_t const *rep_values; //!< Pre-calculated repetition level values
uint8_t const *def_values; //!< Pre-calculated definition level values

column_device_view *leaf_column; //!< Pointer to leaf column
column_device_view *parent_column; //!< Pointer to parent column. Is nullptr if not list type.
};

constexpr int max_page_fragment_size = 5000; //!< Max number of rows in a page fragment
Expand Down Expand Up @@ -448,19 +445,6 @@ void InitPageFragments(PageFragment *frag,
uint32_t num_rows,
rmm::cuda_stream_view stream);

/**
* @brief Set column_device_view pointers in column description array
*
* @param[out] col_desc Column description array [column_id]
* @param[out] leaf_column_views Device array to store leaf columns
* @param[in] parent_table_device_view Table device view containing parent columns
* @param[in] stream CUDA stream to use, default 0
*/
void init_column_device_views(EncColumnDesc *col_desc,
column_device_view *leaf_column_views,
const table_device_view &parent_table_device_view,
rmm::cuda_stream_view stream);

/**
* @brief Launches kernel for initializing fragment statistics groups
*
Expand Down
15 changes: 3 additions & 12 deletions cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "writer_impl.hpp"

#include <io/parquet/compact_protocol_writer.hpp>
#include <io/utilities/column_utils.cuh>

#include <cudf/column/column_device_view.cuh>
#include <cudf/lists/lists_column_view.hpp>
Expand Down Expand Up @@ -495,17 +496,6 @@ class parquet_column_view {
uint8_t _decimal_precision = 0;
};

rmm::device_uvector<column_device_view> writer::impl::create_leaf_column_device_views(
hostdevice_vector<gpu::EncColumnDesc> &col_desc,
const table_device_view &parent_table_device_view)
{
rmm::device_uvector<column_device_view> leaf_column_views(parent_table_device_view.num_columns(),
stream);
gpu::init_column_device_views(
col_desc.device_ptr(), leaf_column_views.data(), parent_table_device_view, stream);
return leaf_column_views;
}

void writer::impl::init_page_fragments(hostdevice_vector<gpu::PageFragment> &frag,
hostdevice_vector<gpu::EncColumnDesc> &col_desc,
uint32_t num_columns,
Expand Down Expand Up @@ -919,7 +909,8 @@ void writer::impl::write(table_view const &table)
if (fragments.size() != 0) {
// Move column info to device
col_desc.host_to_device(stream);
leaf_column_views = create_leaf_column_device_views(col_desc, *parent_column_table_device_view);
leaf_column_views = create_leaf_column_device_views<gpu::EncColumnDesc>(
col_desc, *parent_column_table_device_view, stream);

init_page_fragments(fragments, col_desc, num_columns, num_fragments, num_rows, fragment_size);
}
Expand Down
Loading

0 comments on commit ab7fe05

Please sign in to comment.