Update nvcomp to 3.0.4 (includes API changes) (#314)

Update the nvCOMP version used for compression/decompression to 3.0.4. See also: rapidsai/cudf#13815 rapidsai/rapids-cmake#451 Authors: - Vukasin Milovanovic (https://github.com/vuule) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) - Mads R. B. Kristensen (https://github.com/madsbk) - Ray Douglass (https://github.com/raydouglass) URL: #314
rapidsai · Nov 9, 2023 · b8f6218 · b8f6218
1 parent 34f6d8e
commit b8f6218
Show file tree

Hide file tree

Showing 10 changed files with 77 additions and 118 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -18,7 +18,7 @@ jobs:
       - conda-python-build
       - conda-python-tests
       - docs-build
-      - devcontainer
+#     - devcontainer
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.12
   checks:
@@ -58,11 +58,11 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_docs.sh"
-  devcontainer:
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12
-    with:
-      build_command: |
-        sccache -z;
-        build-all;
-        sccache -s;
+# devcontainer:
+#   secrets: inherit
+#   uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12
+#   with:
+#     build_command: |
+#       sccache -z;
+#       build-all;
+#       sccache -s;
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -26,7 +26,7 @@ dependencies:
 - numpy>=1.21
 - numpydoc
 - nvcc_linux-64=11.8
-- nvcomp==2.6.1
+- nvcomp==3.0.4
 - packaging
 - pre-commit
 - pytest

diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -25,7 +25,7 @@ dependencies:
 - numcodecs <0.12.0
 - numpy>=1.21
 - numpydoc
-- nvcomp==2.6.1
+- nvcomp==3.0.4
 - packaging
 - pre-commit
 - pytest

diff --git a/conda/recipes/kvikio/conda_build_config.yaml b/conda/recipes/kvikio/conda_build_config.yaml
@@ -17,4 +17,4 @@ cmake_version:
   - ">=3.26.4"
 
 nvcomp_version:
-  - "=2.6.1"
+  - "=3.0.4"
diff --git a/cpp/cmake/fetch_rapids.cmake b/cpp/cmake/fetch_rapids.cmake
@@ -11,6 +11,8 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
+set(rapids-cmake-repo vuule/rapids-cmake)
+set(rapids-cmake-branch upgrade-nvcomp-3.0.0)
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/KVIKIO_RAPIDS.cmake)
   file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/RAPIDS.cmake
        ${CMAKE_CURRENT_BINARY_DIR}/KVIKIO_RAPIDS.cmake

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -142,7 +142,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - nvcomp==2.6.1
+          - nvcomp==3.0.4
     specific:
       - output_types: conda
         matrices:

diff --git a/python/kvikio/_lib/libnvcomp.pyx b/python/kvikio/_lib/libnvcomp.pyx
@@ -39,8 +39,15 @@ from kvikio._lib.nvcomp_cxx_api cimport (
     SnappyManager,
     create_manager,
     cudaStream_t,
+    nvcompBatchedANSDefaultOpts,
+    nvcompBatchedANSOpts_t,
+    nvcompBatchedBitcompFormatOpts,
     nvcompBatchedCascadedDefaultOpts,
     nvcompBatchedCascadedOpts_t,
+    nvcompBatchedGdeflateOpts_t,
+    nvcompBatchedLZ4Opts_t,
+    nvcompBatchedSnappyDefaultOpts,
+    nvcompBatchedSnappyOpts_t,
     nvcompManagerBase,
     nvcompType_t,
 )
@@ -134,14 +141,6 @@ cdef class _nvcompManager:
             <DecompressionConfig&>self._decompression_config.get()[0]
         )
 
-    def set_scratch_buffer(self, Array new_scratch_buffer):
-        return self._impl.set_scratch_buffer(
-            <uint8_t*>new_scratch_buffer.ptr
-        )
-
-    def get_required_scratch_buffer_size(self):
-        return self._impl.get_required_scratch_buffer_size()
-
     def get_compressed_output_size(self, Array comp_buffer):
         return self._impl.get_compressed_output_size(
             <uint8_t*>comp_buffer.ptr
@@ -157,6 +156,7 @@ cdef class _ANSManager(_nvcompManager):
     ):
         self._impl = <nvcompManagerBase*>new ANSManager(
             uncomp_chunk_size,
+            <nvcompBatchedANSOpts_t>nvcompBatchedANSDefaultOpts,  # TODO
             <cudaStream_t><void*>0,  # TODO
             device_id
         )
@@ -165,14 +165,16 @@ cdef class _ANSManager(_nvcompManager):
 cdef class _BitcompManager(_nvcompManager):
     def __cinit__(
         self,
+        size_t uncomp_chunk_size,
         nvcompType_t data_type,
         int bitcomp_algo,
         user_stream,
         const int device_id
     ):
+        cdef opts = nvcompBatchedBitcompFormatOpts(bitcomp_algo, data_type)
         self._impl = <nvcompManagerBase*>new BitcompManager(
-            <nvcompType_t>data_type,
-            bitcomp_algo,
+            uncomp_chunk_size,
+            opts,
             <cudaStream_t><void*>0,  # TODO
             device_id
         )
@@ -186,6 +188,7 @@ cdef class _CascadedManager(_nvcompManager):
         const int device_id,
     ):
         self._impl = <nvcompManagerBase*>new CascadedManager(
+            _options["chunk_size"],
             <nvcompBatchedCascadedOpts_t>nvcompBatchedCascadedDefaultOpts,  # TODO
             <cudaStream_t><void*>0,  # TODO
             device_id,
@@ -200,9 +203,10 @@ cdef class _GdeflateManager(_nvcompManager):
         user_stream,
         const int device_id
     ):
+        cdef opts = nvcompBatchedGdeflateOpts_t(algo)
         self._impl = <nvcompManagerBase*>new GdeflateManager(
             chunk_size,
-            algo,
+            opts,
             <cudaStream_t><void*>0,  # TODO
             device_id
         )
@@ -220,9 +224,10 @@ cdef class _LZ4Manager(_nvcompManager):
         # from anywhere up. I'm not going to rabbit hole on it until
         # everything else works.
         # cdef cudaStream_t stream = <cudaStream_t><void*>user_stream
+        cdef opts = nvcompBatchedLZ4Opts_t(data_type)
         self._impl = <nvcompManagerBase*>new LZ4Manager(
             uncomp_chunk_size,
-            data_type,
+            opts,
             <cudaStream_t><void*>0,  # TODO
             device_id
         )
@@ -240,6 +245,7 @@ cdef class _SnappyManager(_nvcompManager):
         # everything else works.
         self._impl = <nvcompManagerBase*>new SnappyManager(
             uncomp_chunk_size,
+            <nvcompBatchedSnappyOpts_t>nvcompBatchedSnappyDefaultOpts,
             <cudaStream_t><void*>0,  # TODO
             device_id
         )

diff --git a/python/kvikio/_lib/nvcomp_cxx_api.pxd b/python/kvikio/_lib/nvcomp_cxx_api.pxd
@@ -106,8 +106,6 @@ cdef extern from "nvcomp/nvcompManager.hpp" namespace 'nvcomp':
             uint8_t* decomp_buffer,
             const uint8_t* comp_buffer,
             const DecompressionConfig& decomp_config)
-        void set_scratch_buffer(uint8_t* new_scratch_buffer) except +
-        size_t get_required_scratch_buffer_size() except +
         size_t get_compressed_output_size(uint8_t* comp_buffer) except +
 
     cdef cppclass PimplManager "nvcomp::PimplManager":
@@ -125,32 +123,47 @@ cdef extern from "nvcomp/nvcompManager.hpp" namespace 'nvcomp':
             uint8_t* decomp_buffer,
             const uint8_t* comp_buffer,
             const DecompressionConfig& decomp_config) except +
-        void set_scratch_buffer(uint8_t* new_scratch_buffer) except +
-        size_t get_required_scratch_buffer_size() except +
         size_t get_compressed_output_size(uint8_t* comp_buffer) except +
 
 # C++ Concrete ANS Manager
+cdef extern from "nvcomp/ans.h" nogil:
+    ctypedef enum nvcompANSType_t:
+        nvcomp_rANS = 0
+
+    ctypedef struct nvcompBatchedANSOpts_t:
+        nvcompANSType_t type
+    cdef nvcompBatchedANSOpts_t nvcompBatchedANSDefaultOpts
+
 cdef extern from "nvcomp/ans.hpp":
     cdef cppclass ANSManager "nvcomp::ANSManager":
         ANSManager(
             size_t uncomp_chunk_size,
+            const nvcompBatchedANSOpts_t& format_opts,
             cudaStream_t user_stream,
             const int device_id
         ) except +
 
 # C++ Concrete Bitcomp Manager
+cdef extern from "nvcomp/bitcomp.h" nogil:
+    ctypedef struct nvcompBatchedBitcompFormatOpts:
+        int algorithm_type
+        nvcompType_t data_type
+    cdef nvcompBatchedBitcompFormatOpts nvcompBatchedBitcompDefaultOpts
+
 cdef extern from "nvcomp/bitcomp.hpp":
     cdef cppclass BitcompManager "nvcomp::BitcompManager":
         BitcompManager(
-            nvcompType_t data_type,
-            int bitcomp_algo,
+            size_t uncomp_chunk_size,
+            const nvcompBatchedBitcompFormatOpts& format_opts,
             cudaStream_t user_stream,
             const int device_id
         ) except +
 
 # C++ Concrete Cascaded Manager
 cdef extern from "nvcomp/cascaded.h" nogil:
     ctypedef struct nvcompBatchedCascadedOpts_t:
+        size_t chunk_size
+        nvcompType_t type
         int num_RLEs
         int num_deltas
         int use_bp
@@ -159,36 +172,53 @@ cdef extern from "nvcomp/cascaded.h" nogil:
 cdef extern from "nvcomp/cascaded.hpp" nogil:
     cdef cppclass CascadedManager "nvcomp::CascadedManager":
         CascadedManager(
+            size_t uncomp_chunk_size,
             const nvcompBatchedCascadedOpts_t& options,
             cudaStream_t user_stream,
             int device_id
         )
 
 # C++ Concrete Gdeflate Manager
+cdef extern from "nvcomp/gdeflate.h" nogil:
+    ctypedef struct nvcompBatchedGdeflateOpts_t:
+        int algo
+    cdef nvcompBatchedGdeflateOpts_t nvcompBatchedGdeflateDefaultOpts
+
 cdef extern from "nvcomp/gdeflate.hpp":
     cdef cppclass GdeflateManager "nvcomp::GdeflateManager":
         GdeflateManager(
             int uncomp_chunk_size,
-            int algo,
+            const nvcompBatchedGdeflateOpts_t& format_opts,
             cudaStream_t user_stream,
             const int device_id
         ) except +
 
 # C++ Concrete LZ4 Manager
+cdef extern from "nvcomp/gdeflate.h" nogil:
+    ctypedef struct nvcompBatchedLZ4Opts_t:
+        nvcompType_t data_type
+    cdef nvcompBatchedLZ4Opts_t nvcompBatchedLZ4DefaultOpts
+
 cdef extern from "nvcomp/lz4.hpp":
     cdef cppclass LZ4Manager "nvcomp::LZ4Manager":
         LZ4Manager(
             size_t uncomp_chunk_size,
-            nvcompType_t data_type,
+            const nvcompBatchedLZ4Opts_t& format_opts,
             cudaStream_t user_stream,
             const int device_id
         ) except +
 
 # C++ Concrete Snappy Manager
+cdef extern from "nvcomp/snappy.h" nogil:
+    ctypedef struct nvcompBatchedSnappyOpts_t:
+        int reserved
+    cdef nvcompBatchedSnappyOpts_t nvcompBatchedSnappyDefaultOpts
+
 cdef extern from "nvcomp/snappy.hpp":
     cdef cppclass SnappyManager "nvcomp::SnappyManager":
         SnappyManager(
             size_t uncomp_chunk_size,
+            const nvcompBatchedSnappyOpts_t& format_opts,
             cudaStream_t user_stream,
             const int device_id
         ) except +
diff --git a/python/kvikio/nvcomp.py b/python/kvikio/nvcomp.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
 # See file LICENSE for terms.
 
 from enum import Enum
@@ -192,35 +192,6 @@ def configure_decompression_with_compressed_buffer(
             asarray(data)
         )
 
-    def get_required_scratch_buffer_size(self) -> int:
-        """Return the size of the optional scratch buffer.
-
-        Returns
-        -------
-        int
-        """
-        return self._manager.get_required_scratch_buffer_size()
-
-    def set_scratch_buffer(self, new_scratch_buffer: cp.ndarray) -> None:
-        """Use a pre-allocated buffer for compression.
-
-        Use a GPU-allocated buffer that will be used for compression
-        temporary storage instead of allowing the library to create the
-        scratch buffer.
-        Can reduce memory usage.
-
-        Parameters
-        ----------
-        new_scratch_buffer : cp.ndarray
-            The buffer that you allocated on the GPU for compressor temporary
-            storage.
-
-        Returns
-        -------
-        cp.ndarray
-        """
-        return self._manager.set_scratch_buffer(asarray(new_scratch_buffer))
-
     def get_compressed_output_size(self, comp_buffer: cp.ndarray) -> int:
         """Return the actual size of compression result.
 
@@ -277,7 +248,11 @@ def __init__(self, **kwargs):
         super().__init__(kwargs)
 
         self._manager = _lib._BitcompManager(
-            self.data_type.value, self.bitcomp_algo, self.stream, self.device_id
+            self.chunk_size,
+            self.data_type.value,
+            self.bitcomp_algo,
+            self.stream,
+            self.device_id,
         )