Skip to content

Commit

Permalink
Update nvcomp to 3.0.4 (includes API changes) (#314)
Browse files Browse the repository at this point in the history
Update the nvCOMP version used for compression/decompression to 3.0.4.

See also:

rapidsai/cudf#13815
rapidsai/rapids-cmake#451

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Ray Douglass (https://github.com/raydouglass)

URL: #314
  • Loading branch information
vuule authored Nov 9, 2023
1 parent 34f6d8e commit b8f6218
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 118 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- conda-python-build
- conda-python-tests
- docs-build
- devcontainer
# - devcontainer
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-23.12
checks:
Expand Down Expand Up @@ -58,11 +58,11 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
devcontainer:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12
with:
build_command: |
sccache -z;
build-all;
sccache -s;
# devcontainer:
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.12
# with:
# build_command: |
# sccache -z;
# build-all;
# sccache -s;
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies:
- numpy>=1.21
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==2.6.1
- nvcomp==3.0.4
- packaging
- pre-commit
- pytest
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies:
- numcodecs <0.12.0
- numpy>=1.21
- numpydoc
- nvcomp==2.6.1
- nvcomp==3.0.4
- packaging
- pre-commit
- pytest
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/kvikio/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ cmake_version:
- ">=3.26.4"

nvcomp_version:
- "=2.6.1"
- "=3.0.4"
2 changes: 2 additions & 0 deletions cpp/cmake/fetch_rapids.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================
set(rapids-cmake-repo vuule/rapids-cmake)
set(rapids-cmake-branch upgrade-nvcomp-3.0.0)
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/KVIKIO_RAPIDS.cmake)
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.12/RAPIDS.cmake
${CMAKE_CURRENT_BINARY_DIR}/KVIKIO_RAPIDS.cmake
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ dependencies:
common:
- output_types: conda
packages:
- nvcomp==2.6.1
- nvcomp==3.0.4
specific:
- output_types: conda
matrices:
Expand Down
30 changes: 18 additions & 12 deletions python/kvikio/_lib/libnvcomp.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,15 @@ from kvikio._lib.nvcomp_cxx_api cimport (
SnappyManager,
create_manager,
cudaStream_t,
nvcompBatchedANSDefaultOpts,
nvcompBatchedANSOpts_t,
nvcompBatchedBitcompFormatOpts,
nvcompBatchedCascadedDefaultOpts,
nvcompBatchedCascadedOpts_t,
nvcompBatchedGdeflateOpts_t,
nvcompBatchedLZ4Opts_t,
nvcompBatchedSnappyDefaultOpts,
nvcompBatchedSnappyOpts_t,
nvcompManagerBase,
nvcompType_t,
)
Expand Down Expand Up @@ -134,14 +141,6 @@ cdef class _nvcompManager:
<DecompressionConfig&>self._decompression_config.get()[0]
)

def set_scratch_buffer(self, Array new_scratch_buffer):
return self._impl.set_scratch_buffer(
<uint8_t*>new_scratch_buffer.ptr
)

def get_required_scratch_buffer_size(self):
return self._impl.get_required_scratch_buffer_size()

def get_compressed_output_size(self, Array comp_buffer):
return self._impl.get_compressed_output_size(
<uint8_t*>comp_buffer.ptr
Expand All @@ -157,6 +156,7 @@ cdef class _ANSManager(_nvcompManager):
):
self._impl = <nvcompManagerBase*>new ANSManager(
uncomp_chunk_size,
<nvcompBatchedANSOpts_t>nvcompBatchedANSDefaultOpts, # TODO
<cudaStream_t><void*>0, # TODO
device_id
)
Expand All @@ -165,14 +165,16 @@ cdef class _ANSManager(_nvcompManager):
cdef class _BitcompManager(_nvcompManager):
def __cinit__(
self,
size_t uncomp_chunk_size,
nvcompType_t data_type,
int bitcomp_algo,
user_stream,
const int device_id
):
cdef opts = nvcompBatchedBitcompFormatOpts(bitcomp_algo, data_type)
self._impl = <nvcompManagerBase*>new BitcompManager(
<nvcompType_t>data_type,
bitcomp_algo,
uncomp_chunk_size,
opts,
<cudaStream_t><void*>0, # TODO
device_id
)
Expand All @@ -186,6 +188,7 @@ cdef class _CascadedManager(_nvcompManager):
const int device_id,
):
self._impl = <nvcompManagerBase*>new CascadedManager(
_options["chunk_size"],
<nvcompBatchedCascadedOpts_t>nvcompBatchedCascadedDefaultOpts, # TODO
<cudaStream_t><void*>0, # TODO
device_id,
Expand All @@ -200,9 +203,10 @@ cdef class _GdeflateManager(_nvcompManager):
user_stream,
const int device_id
):
cdef opts = nvcompBatchedGdeflateOpts_t(algo)
self._impl = <nvcompManagerBase*>new GdeflateManager(
chunk_size,
algo,
opts,
<cudaStream_t><void*>0, # TODO
device_id
)
Expand All @@ -220,9 +224,10 @@ cdef class _LZ4Manager(_nvcompManager):
# from anywhere up. I'm not going to rabbit hole on it until
# everything else works.
# cdef cudaStream_t stream = <cudaStream_t><void*>user_stream
cdef opts = nvcompBatchedLZ4Opts_t(data_type)
self._impl = <nvcompManagerBase*>new LZ4Manager(
uncomp_chunk_size,
data_type,
opts,
<cudaStream_t><void*>0, # TODO
device_id
)
Expand All @@ -240,6 +245,7 @@ cdef class _SnappyManager(_nvcompManager):
# everything else works.
self._impl = <nvcompManagerBase*>new SnappyManager(
uncomp_chunk_size,
<nvcompBatchedSnappyOpts_t>nvcompBatchedSnappyDefaultOpts,
<cudaStream_t><void*>0, # TODO
device_id
)
Expand Down
46 changes: 38 additions & 8 deletions python/kvikio/_lib/nvcomp_cxx_api.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@ cdef extern from "nvcomp/nvcompManager.hpp" namespace 'nvcomp':
uint8_t* decomp_buffer,
const uint8_t* comp_buffer,
const DecompressionConfig& decomp_config)
void set_scratch_buffer(uint8_t* new_scratch_buffer) except +
size_t get_required_scratch_buffer_size() except +
size_t get_compressed_output_size(uint8_t* comp_buffer) except +

cdef cppclass PimplManager "nvcomp::PimplManager":
Expand All @@ -125,32 +123,47 @@ cdef extern from "nvcomp/nvcompManager.hpp" namespace 'nvcomp':
uint8_t* decomp_buffer,
const uint8_t* comp_buffer,
const DecompressionConfig& decomp_config) except +
void set_scratch_buffer(uint8_t* new_scratch_buffer) except +
size_t get_required_scratch_buffer_size() except +
size_t get_compressed_output_size(uint8_t* comp_buffer) except +

# C++ Concrete ANS Manager
cdef extern from "nvcomp/ans.h" nogil:
ctypedef enum nvcompANSType_t:
nvcomp_rANS = 0

ctypedef struct nvcompBatchedANSOpts_t:
nvcompANSType_t type
cdef nvcompBatchedANSOpts_t nvcompBatchedANSDefaultOpts

cdef extern from "nvcomp/ans.hpp":
cdef cppclass ANSManager "nvcomp::ANSManager":
ANSManager(
size_t uncomp_chunk_size,
const nvcompBatchedANSOpts_t& format_opts,
cudaStream_t user_stream,
const int device_id
) except +

# C++ Concrete Bitcomp Manager
cdef extern from "nvcomp/bitcomp.h" nogil:
ctypedef struct nvcompBatchedBitcompFormatOpts:
int algorithm_type
nvcompType_t data_type
cdef nvcompBatchedBitcompFormatOpts nvcompBatchedBitcompDefaultOpts

cdef extern from "nvcomp/bitcomp.hpp":
cdef cppclass BitcompManager "nvcomp::BitcompManager":
BitcompManager(
nvcompType_t data_type,
int bitcomp_algo,
size_t uncomp_chunk_size,
const nvcompBatchedBitcompFormatOpts& format_opts,
cudaStream_t user_stream,
const int device_id
) except +

# C++ Concrete Cascaded Manager
cdef extern from "nvcomp/cascaded.h" nogil:
ctypedef struct nvcompBatchedCascadedOpts_t:
size_t chunk_size
nvcompType_t type
int num_RLEs
int num_deltas
int use_bp
Expand All @@ -159,36 +172,53 @@ cdef extern from "nvcomp/cascaded.h" nogil:
cdef extern from "nvcomp/cascaded.hpp" nogil:
cdef cppclass CascadedManager "nvcomp::CascadedManager":
CascadedManager(
size_t uncomp_chunk_size,
const nvcompBatchedCascadedOpts_t& options,
cudaStream_t user_stream,
int device_id
)

# C++ Concrete Gdeflate Manager
cdef extern from "nvcomp/gdeflate.h" nogil:
ctypedef struct nvcompBatchedGdeflateOpts_t:
int algo
cdef nvcompBatchedGdeflateOpts_t nvcompBatchedGdeflateDefaultOpts

cdef extern from "nvcomp/gdeflate.hpp":
cdef cppclass GdeflateManager "nvcomp::GdeflateManager":
GdeflateManager(
int uncomp_chunk_size,
int algo,
const nvcompBatchedGdeflateOpts_t& format_opts,
cudaStream_t user_stream,
const int device_id
) except +

# C++ Concrete LZ4 Manager
cdef extern from "nvcomp/gdeflate.h" nogil:
ctypedef struct nvcompBatchedLZ4Opts_t:
nvcompType_t data_type
cdef nvcompBatchedLZ4Opts_t nvcompBatchedLZ4DefaultOpts

cdef extern from "nvcomp/lz4.hpp":
cdef cppclass LZ4Manager "nvcomp::LZ4Manager":
LZ4Manager(
size_t uncomp_chunk_size,
nvcompType_t data_type,
const nvcompBatchedLZ4Opts_t& format_opts,
cudaStream_t user_stream,
const int device_id
) except +

# C++ Concrete Snappy Manager
cdef extern from "nvcomp/snappy.h" nogil:
ctypedef struct nvcompBatchedSnappyOpts_t:
int reserved
cdef nvcompBatchedSnappyOpts_t nvcompBatchedSnappyDefaultOpts

cdef extern from "nvcomp/snappy.hpp":
cdef cppclass SnappyManager "nvcomp::SnappyManager":
SnappyManager(
size_t uncomp_chunk_size,
const nvcompBatchedSnappyOpts_t& format_opts,
cudaStream_t user_stream,
const int device_id
) except +
37 changes: 6 additions & 31 deletions python/kvikio/nvcomp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

from enum import Enum
Expand Down Expand Up @@ -192,35 +192,6 @@ def configure_decompression_with_compressed_buffer(
asarray(data)
)

def get_required_scratch_buffer_size(self) -> int:
"""Return the size of the optional scratch buffer.
Returns
-------
int
"""
return self._manager.get_required_scratch_buffer_size()

def set_scratch_buffer(self, new_scratch_buffer: cp.ndarray) -> None:
"""Use a pre-allocated buffer for compression.
Use a GPU-allocated buffer that will be used for compression
temporary storage instead of allowing the library to create the
scratch buffer.
Can reduce memory usage.
Parameters
----------
new_scratch_buffer : cp.ndarray
The buffer that you allocated on the GPU for compressor temporary
storage.
Returns
-------
cp.ndarray
"""
return self._manager.set_scratch_buffer(asarray(new_scratch_buffer))

def get_compressed_output_size(self, comp_buffer: cp.ndarray) -> int:
"""Return the actual size of compression result.
Expand Down Expand Up @@ -277,7 +248,11 @@ def __init__(self, **kwargs):
super().__init__(kwargs)

self._manager = _lib._BitcompManager(
self.data_type.value, self.bitcomp_algo, self.stream, self.device_id
self.chunk_size,
self.data_type.value,
self.bitcomp_algo,
self.stream,
self.device_id,
)


Expand Down
Loading

0 comments on commit b8f6218

Please sign in to comment.