Skip to content

Commit

Permalink
rle_stream with dictionary support + micro kernels for fixed and fixed
Browse files Browse the repository at this point in the history
dictionary

Signed-off-by: Alessandro Bellina <abellina@nvidia.com>
  • Loading branch information
abellina committed Feb 27, 2024
1 parent e03623a commit d66835d
Show file tree
Hide file tree
Showing 11 changed files with 1,166 additions and 596 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ add_library(
src/io/parquet/reader_impl_helpers.cpp
src/io/parquet/reader_impl_preprocess.cu
src/io/parquet/writer_impl.cu
src/io/parquet/decode_fixed.cu
src/io/statistics/orc_column_statistics.cu
src/io/statistics/parquet_column_statistics.cu
src/io/text/byte_range_info.cpp
Expand Down
492 changes: 492 additions & 0 deletions cpp/src/io/parquet/decode_fixed.cu

Large diffs are not rendered by default.

42 changes: 42 additions & 0 deletions cpp/src/io/parquet/decode_fixed.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "parquet_gpu.hpp"

namespace cudf {
namespace io {
namespace parquet {
namespace detail {
void DecodePageDataFixed(cudf::detail::hostdevice_vector<PageInfo>& pages,
cudf::detail::hostdevice_vector<ColumnChunkDesc> const& chunks,
std::size_t num_rows,
size_t min_row,
int level_type_size,
rmm::cuda_stream_view stream);

void DecodePageDataFixedDict(cudf::detail::hostdevice_vector<PageInfo>& pages,
cudf::detail::hostdevice_vector<ColumnChunkDesc> const& chunks,
std::size_t num_rows,
size_t min_row,
int level_type_size,
rmm::cuda_stream_view stream);

} // namespace detail
} // namespace parquet
} // namespace io
} // namespace cudf
11 changes: 4 additions & 7 deletions cpp/src/io/parquet/decode_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ CUDF_KERNEL void __launch_bounds__(preprocess_block_size)
// the level stream decoders
__shared__ rle_run<level_t> def_runs[rle_run_buffer_size];
__shared__ rle_run<level_t> rep_runs[rle_run_buffer_size];
rle_stream<level_t, preprocess_block_size> decoders[level_type::NUM_LEVEL_TYPES] = {{def_runs},
{rep_runs}};
rle_stream<level_t, preprocess_block_size, rolling_buf_size>
decoders[level_type::NUM_LEVEL_TYPES] = {{def_runs}, {rep_runs}};

// setup page info
if (!setupLocalPageInfo(
Expand All @@ -239,20 +239,17 @@ CUDF_KERNEL void __launch_bounds__(preprocess_block_size)

// initialize the stream decoders (requires values computed in setupLocalPageInfo)
// the size of the rolling batch buffer
int const max_batch_size = rolling_buf_size;
level_t* rep = reinterpret_cast<level_t*>(pp->lvl_decode_buf[level_type::REPETITION]);
level_t* def = reinterpret_cast<level_t*>(pp->lvl_decode_buf[level_type::DEFINITION]);
level_t* rep = reinterpret_cast<level_t*>(pp->lvl_decode_buf[level_type::REPETITION]);
level_t* def = reinterpret_cast<level_t*>(pp->lvl_decode_buf[level_type::DEFINITION]);
decoders[level_type::DEFINITION].init(s->col.level_bits[level_type::DEFINITION],
s->abs_lvl_start[level_type::DEFINITION],
s->abs_lvl_end[level_type::DEFINITION],
max_batch_size,
def,
s->page.num_input_values);
if (has_repetition) {
decoders[level_type::REPETITION].init(s->col.level_bits[level_type::REPETITION],
s->abs_lvl_start[level_type::REPETITION],
s->abs_lvl_end[level_type::REPETITION],
max_batch_size,
rep,
s->page.num_input_values);
}
Expand Down
Loading

0 comments on commit d66835d

Please sign in to comment.