Skip to content

Commit

Permalink
Introduce TORCH_DISABLE_GPU_ASSERTS (pytorch#84190)
Browse files Browse the repository at this point in the history
- Asserts for CUDA are enabled by default
- Disabled for ROCm by default by setting `TORCH_DISABLE_GPU_ASSERTS` to `ON`
- Can be enabled for ROCm by setting above variable to`OFF` during build or can be forcefully enabled by setting `ROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON`

This is follow up changes as per comment in PR pytorch#81790, comment [link](pytorch#81790 (comment))

Pull Request resolved: pytorch#84190
Approved by: https://github.com/jeffdaily, https://github.com/malfet
  • Loading branch information
pruthvistony authored and pytorchmergebot committed Nov 2, 2022
1 parent b18d0f1 commit 1e2c4a6
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 21 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ if(NOT USE_XNNPACK AND CMAKE_VERSION VERSION_LESS ${XNNPACK_MIN_CMAKE_VER})
endif()
option(USE_ZMQ "Use ZMQ" OFF)
option(USE_ZSTD "Use ZSTD" OFF)
option(TORCH_DISABLE_GPU_ASSERTS "Disable GPU asserts by default" OFF)
# Ensure that an ITT build is the default for x86 CPUs
cmake_dependent_option(
USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON
Expand Down
11 changes: 6 additions & 5 deletions c10/macros/Macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,8 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
// CUDA_KERNEL_ASSERT checks the assertion
// even when NDEBUG is defined. This is useful for important assertions in CUDA
// code that would otherwise be suppressed when building Release.
#if defined(__ANDROID__) || defined(__APPLE__) || \
(defined(USE_ROCM) && ROCM_VERSION < 40100) || \
(defined(USE_ROCM) && defined(ROCM_DISABLE_GPU_ASSERTS))
#if defined(__ANDROID__) || defined(__APPLE__) || \
(defined(USE_ROCM) && ROCM_VERSION < 40100)
// Those platforms do not support assert()
#define CUDA_KERNEL_ASSERT(cond)
#define SYCL_KERNEL_ASSERT(cond)
Expand Down Expand Up @@ -368,7 +367,9 @@ extern SYCL_EXTERNAL void __assert_fail(
unsigned int line,
const char* func);
#else // __SYCL_DEVICE_ONLY__
#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)))
#if ( \
defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__)) && \
!defined(TORCH_DISABLE_GPU_ASSERTS))
// CUDA supports __assert_fail function which are common for both device
// and host side code.
__host__ __device__
Expand All @@ -386,7 +387,7 @@ __host__ __device__
const char* function) throw() __attribute__((__noreturn__));

#if (defined(__HIP_ARCH__) || defined(__HIP__)) && \
!defined(ROCM_DISABLE_GPU_ASSERTS)
!defined(TORCH_DISABLE_GPU_ASSERTS)
// ROCm supports __assert_fail only as a device side function.
__device__ __attribute__((noinline)) __attribute__((weak)) void __assert_fail(
const char* assertion,
Expand Down
2 changes: 2 additions & 0 deletions caffe2/core/macros.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ static_assert(
#cmakedefine CAFFE2_USE_NVTX
#cmakedefine CAFFE2_USE_ITT
#cmakedefine CAFFE2_USE_TRT
#cmakedefine TORCH_DISABLE_GPU_ASSERTS

#ifndef EIGEN_MPL2_ONLY
#cmakedefine EIGEN_MPL2_ONLY
Expand Down Expand Up @@ -85,4 +86,5 @@ static_assert(
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
{"USE_ITT", "${CAFFE2_USE_ITT}"}, \
{"USE_TRT", "${CAFFE2_USE_TRT}"}, \
{"TORCH_DISABLE_GPU_ASSERTS", "${TORCH_DISABLE_GPU_ASSERTS}"}, \
}
10 changes: 10 additions & 0 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,16 @@ if(ANDROID)
list(APPEND Caffe2_DEPENDENCY_LIBS log)
endif()

# ---[ Kernel asserts
# Kernel asserts are enabled by default for CUDA and disabled for ROCm.
# For ROCm, it can be enabled by setting ROCM_FORCE_ENABLE_GPU_ASSERTS
if(USE_ROCM AND ROCM_FORCE_ENABLE_GPU_ASSERTS)
message(STATUS "Forcefully enabling kernel asserts on ROCM")
elseif(USE_ROCM AND NOT ROCM_FORCE_ENABLE_GPU_ASSERTS)
message(STATUS "Disabling kernel asserts for ROCm")
caffe2_update_option(TORCH_DISABLE_GPU_ASSERTS ON)
endif()

# ---[ LLVM
if(USE_LLVM)
message(STATUS "Looking for LLVM in ${USE_LLVM}")
Expand Down
1 change: 1 addition & 0 deletions cmake/Summary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -199,4 +199,5 @@ function(caffe2_print_configuration_summary)
# coreml
message(STATUS " USE_COREML_DELEGATE : ${USE_COREML_DELEGATE}")
message(STATUS " BUILD_LAZY_TS_BACKEND : ${BUILD_LAZY_TS_BACKEND}")
message(STATUS " TORCH_DISABLE_GPU_ASSERTS : ${TORCH_DISABLE_GPU_ASSERTS}")
endfunction()
16 changes: 0 additions & 16 deletions cmake/public/LoadHIP.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,6 @@ message("Building PyTorch for GPU arch: ${PYTORCH_ROCM_ARCH}")
# Add HIP to the CMAKE Module Path
set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})

#Disable kernel assert due to performance regression
set(ROCM_ENABLE_KERNEL_ASSERTS FALSE CACHE BOOL "Kernel asserts are disabled by default for ROCm")

macro(find_package_and_print_version PACKAGE_NAME)
find_package("${PACKAGE_NAME}" ${ARGN})
message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
Expand Down Expand Up @@ -286,19 +283,6 @@ if(HIP_FOUND)
find_package_and_print_version(hipcub REQUIRED)
find_package_and_print_version(rocthrust REQUIRED)

if(ROCM_VERSION_DEV VERSION_GREATER_EQUAL "4.1.0")
if(ROCM_ENABLE_KERNEL_ASSERTS)
message("ROCm version >= 4.1; enabling asserts")
else()
add_definitions(-DROCM_DISABLE_GPU_ASSERTS)
message("ROCm version >= 4.1; kernel asserts are disabled")
endif()
else()
# Disable Asserts In Code (Can't use asserts on HIP stack.)
add_definitions(-DNDEBUG)
message("ROCm version < 4.1; disablng asserts")
endif()

if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
else()
Expand Down
11 changes: 11 additions & 0 deletions docs/source/notes/hip.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,14 @@ Refer to CUDA Semantics doc
---------------------------

For any sections not listed here, please refer to the CUDA semantics doc: :ref:`cuda-semantics`


Enabling kernel asserts
-----------------------

Kernel asserts are supported on ROCm, but they are disabled due to performance overhead. It can be enabled
by recompiling the PyTorch from source.

Please add below line as an argument to cmake command parameters::

-DROCM_FORCE_ENABLE_GPU_ASSERTS:BOOL=ON

0 comments on commit 1e2c4a6

Please sign in to comment.