From 447028f25c9c00e26c1a8e1d3765de7a565ee58c Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 2 Jun 2022 15:19:26 -0700 Subject: [PATCH] Use nvCOMP for ZLIB decompression in ORC reader (#11024) Issue #11023 Adds `DEFLATE` compression type to the nvCOMP adapter. ORC reader uses the adapter when nvCOMP experimental integrations are enabled (for now). Otherwise behavior is unchanged and the internal implementation is still used. No tests as no visible behavior change is expected. Pending: Performance impact Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) - Jim Brennan (https://github.com/jbrennan333) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/11024 --- cpp/src/io/comp/nvcomp_adapter.cpp | 16 ++++++++++++++++ cpp/src/io/comp/nvcomp_adapter.hpp | 2 +- cpp/src/io/orc/reader_impl.cu | 13 +++++++++++-- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index b7b003e0af9..2c2610e53c3 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -29,6 +29,14 @@ #define NVCOMP_HAS_ZSTD 0 #endif +#define NVCOMP_DEFLATE_HEADER +#if __has_include(NVCOMP_DEFLATE_HEADER) +#include NVCOMP_DEFLATE_HEADER +#define NVCOMP_HAS_DEFLATE 1 +#else +#define NVCOMP_HAS_DEFLATE 0 +#endif + namespace cudf::io::nvcomp { template @@ -40,6 +48,10 @@ auto batched_decompress_get_temp_size(compression_type compression, Args&&... ar #if NVCOMP_HAS_ZSTD case compression_type::ZSTD: return nvcompBatchedZstdDecompressGetTempSize(std::forward(args)...); +#endif +#if NVCOMP_HAS_DEFLATE + case compression_type::DEFLATE: + return nvcompBatchedDeflateDecompressGetTempSize(std::forward(args)...); #endif default: CUDF_FAIL("Unsupported compression type"); } @@ -54,6 +66,10 @@ auto batched_decompress_async(compression_type compression, Args&&... args) #if NVCOMP_HAS_ZSTD case compression_type::ZSTD: return nvcompBatchedZstdDecompressAsync(std::forward(args)...); +#endif +#if NVCOMP_HAS_DEFLATE + case compression_type::DEFLATE: + return nvcompBatchedDeflateDecompressAsync(std::forward(args)...); #endif default: CUDF_FAIL("Unsupported compression type"); } diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp index fcf5e30420f..24383baed59 100644 --- a/cpp/src/io/comp/nvcomp_adapter.hpp +++ b/cpp/src/io/comp/nvcomp_adapter.hpp @@ -24,7 +24,7 @@ namespace cudf::io::nvcomp { -enum class compression_type { SNAPPY, ZSTD }; +enum class compression_type { SNAPPY, ZSTD, DEFLATE }; /** * @brief Device batch decompression of given type. diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index b47627b8d7c..8a5927562a6 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -369,8 +369,17 @@ rmm::device_buffer reader::impl::decompress_stripe_data( device_span> inflate_out_view{inflate_out.data(), num_compressed_blocks}; switch (decompressor.compression()) { case compression_type::ZLIB: - gpuinflate( - inflate_in_view, inflate_out_view, inflate_stats, gzip_header_included::NO, stream); + if (nvcomp_integration::is_all_enabled()) { + nvcomp::batched_decompress(nvcomp::compression_type::DEFLATE, + inflate_in_view, + inflate_out_view, + inflate_stats, + max_uncomp_block_size, + stream); + } else { + gpuinflate( + inflate_in_view, inflate_out_view, inflate_stats, gzip_header_included::NO, stream); + } break; case compression_type::SNAPPY: if (nvcomp_integration::is_stable_enabled()) {