fix: CUDA 12.2 defines half operators for all arches

microsoft · Aug 15, 2023 · cee050f · cee050f
1 parent 0f232c5
commit cee050f
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/onnxruntime/core/providers/cuda/cu_inc/common.cuh b/onnxruntime/core/providers/cuda/cu_inc/common.cuh
@@ -19,7 +19,8 @@ namespace onnxruntime {
 namespace cuda {
 
 // float16 arithmetic is supported after sm5.3 with intrinsics, and cuda does not provide fallback for lower versions
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 530
+// CUDA 12.2 does not limit the definition based on sm53 anymore and defines for all arches
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530) && ((__CUDACC_VER_MAJOR__ < 12) || ((__CUDACC_VER_MAJOR__ == 12 ) && (__CUDACC_VER_MINOR__ < 2)))
 __device__ __forceinline__ half operator+(const half& lh, const half& rh) { return half((float)lh + (float)rh); }
 __device__ __forceinline__ half operator-(const half& lh, const half& rh) { return half((float)lh - (float)rh); }
 __device__ __forceinline__ half operator*(const half& lh, const half& rh) { return half((float)lh * (float)rh); }