From 94ed3b86af1aba30c8ed16f632b508cdb14348e7 Mon Sep 17 00:00:00 2001
From: risemeup1 <62429225+risemeup1@users.noreply.github.com>
Date: Sun, 28 Jan 2024 19:18:57 +0800
Subject: [PATCH] Update install paddle (#61235)

* fix bug

* fix bug

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* support install paddle not depends cuda and cudnn

* fix
---
 CMakeLists.txt                                |   7 +
 paddle/phi/CMakeLists.txt                     |   1 +
 paddle/phi/backends/dynload/dynamic_loader.cc | 135 ++++++++++--------
 paddle/phi/core/flags.cc                      |  67 +++++++++
 python/paddle/__init__.py                     |  27 ++++
 python/setup.py.in                            |  55 ++++++-
 setup.py                                      |  89 +++++++++---
 7 files changed, 297 insertions(+), 84 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 90d6c22f386c6..5a8c5309e0a75 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,6 +96,13 @@ if(WITH_GPU AND WITH_ROCM)
 endif()
 
 if(WITH_GPU AND NOT APPLE)
+  #(Note risemeup1): The cudart dynamic library libcudart.so is used by set CUDA_USE_STATIC_CUDA_RUNTIME and CMAKE_CUDA_FLAGS
+  if(LINUX)
+    set(CUDA_USE_STATIC_CUDA_RUNTIME
+        OFF
+        CACHE BOOL "" FORCE)
+    set(CMAKE_CUDA_FLAGS "--cudart shared")
+  endif()
   enable_language(CUDA)
   message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: "
                  "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}")
diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt
index 64c18b2b60ff0..fe099b30d0b33 100644
--- a/paddle/phi/CMakeLists.txt
+++ b/paddle/phi/CMakeLists.txt
@@ -133,6 +133,7 @@ if(WITH_GPU)
     phi ${PHI_BUILD_TYPE}
     SRCS ${PHI_SRCS}
     DEPS ${PHI_DEPS})
+
 elseif(WITH_ROCM)
   hip_add_library(phi ${PHI_BUILD_TYPE} ${PHI_SRCS})
   target_link_libraries(phi ${PHI_DEPS})
diff --git a/paddle/phi/backends/dynload/dynamic_loader.cc b/paddle/phi/backends/dynload/dynamic_loader.cc
index bdb9e120d2884..098de189d24b0 100644
--- a/paddle/phi/backends/dynload/dynamic_loader.cc
+++ b/paddle/phi/backends/dynload/dynamic_loader.cc
@@ -16,7 +16,6 @@ limitations under the License. */
 #include <cstdlib>
 #include <string>
 #include <vector>
-
 #include "paddle/phi/backends/dynload/cupti_lib_path.h"
 #include "paddle/phi/backends/dynload/port.h"
 #include "paddle/phi/core/enforce.h"
@@ -30,57 +29,20 @@ limitations under the License. */
 #include "glog/logging.h"
 #include "paddle/phi/core/flags.h"
 
-PHI_DEFINE_string(cudnn_dir,  // NOLINT
-                  "",
-                  "Specify path for loading libcudnn.so. For instance, "
-                  "/usr/local/cudnn/lib. If empty [default], dlopen "
-                  "will search cudnn from LD_LIBRARY_PATH");
-
-PHI_DEFINE_string(  // NOLINT
-    cuda_dir,
-    "",
-    "Specify path for loading cuda library, such as libcublas, libcublasLt "
-    "libcurand, libcusolver. For instance, /usr/local/cuda/lib64. "
-    "If default, dlopen will search cuda from LD_LIBRARY_PATH");
-
-PHI_DEFINE_string(nccl_dir,  // NOLINT
-                  "",
-                  "Specify path for loading nccl library, such as libnccl.so. "
-                  "For instance, /usr/local/cuda/lib64. If default, "
-                  "dlopen will search cuda from LD_LIBRARY_PATH");
-
-PHI_DEFINE_string(cupti_dir,
-                  "",
-                  "Specify path for loading cupti.so.");  // NOLINT
-
-PHI_DEFINE_string(  // NOLINT
-    tensorrt_dir,
-    "",
-    "Specify path for loading tensorrt library, such as libnvinfer.so.");
-
-PHI_DEFINE_string(mklml_dir,
-                  "",
-                  "Specify path for loading libmklml_intel.so.");  // NOLINT
-
-PHI_DEFINE_string(lapack_dir,
-                  "",
-                  "Specify path for loading liblapack.so.");  // NOLINT
-
-PHI_DEFINE_string(mkl_dir,  // NOLINT
-                  "",
-                  "Specify path for loading libmkl_rt.so. "
-                  "For insrance, /opt/intel/oneapi/mkl/latest/lib/intel64/."
-                  "If default, "
-                  "dlopen will search mkl from LD_LIBRARY_PATH");
-
-PHI_DEFINE_string(op_dir,  // NOLINT
-                  "",
-                  "Specify path for loading user-defined op library.");
-
-PHI_DEFINE_string(cusparselt_dir,  // NOLINT
-                  "",
-                  "Specify path for loading libcusparseLt.so.");
-
+PHI_DECLARE_string(cudnn_dir);
+PHI_DECLARE_string(cuda_dir);
+PHI_DECLARE_string(cublas_dir);
+PHI_DECLARE_string(nccl_dir);
+PHI_DECLARE_string(cupti_dir);
+PHI_DECLARE_string(tensorrt_dir);
+PHI_DECLARE_string(mklml_dir);
+PHI_DECLARE_string(lapack_dir);
+PHI_DECLARE_string(mkl_dir);
+PHI_DECLARE_string(op_dir);
+PHI_DECLARE_string(cusparselt_dir);
+PHI_DECLARE_string(curand_dir);
+PHI_DECLARE_string(cusolver_dir);
+PHI_DECLARE_string(cusparse_dir);
 #ifdef PADDLE_WITH_HIP
 
 PHI_DEFINE_string(miopen_dir,
@@ -324,6 +286,17 @@ void* GetCublasDsoHandle() {
 #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
   return GetDsoHandleFromSearchPath(
       FLAGS_cuda_dir, win_cublas_lib, true, {cuda_lib_path});
+#elif defined(__linux__) && defined(PADDLE_WITH_CUDA)
+  if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
+    return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublas.so.11");
+  } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) {
+    return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublas.so.12");
+  } else {
+    std::string warning_msg(
+        "Your CUDA_VERSION is less than 11 or greater than 12, paddle "
+        "temporarily no longer supports");
+    return nullptr;
+  }
 #elif defined(PADDLE_WITH_HIP)
   return GetDsoHandleFromSearchPath(FLAGS_rocm_dir, "librocblas.so");
 #else
@@ -333,7 +306,18 @@ void* GetCublasDsoHandle() {
 
 void* GetCublasLtDsoHandle() {
 // APIs available after CUDA 10.1
-#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 10010
+#if defined(__linux__) && defined(PADDLE_WITH_CUDA)
+  if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
+    return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublasLt.so.11");
+  } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) {
+    return GetDsoHandleFromSearchPath(FLAGS_cublas_dir, "libcublasLt.so.12");
+  } else {
+    std::string warning_msg(
+        "Your CUDA_VERSION is less than 11 or greater than 12, paddle "
+        "temporarily no longer supports");
+    return nullptr;
+  }
+#elif !defined(__linux__) && defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 10010
   return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublasLt.so");
 #else
   std::string warning_msg(
@@ -369,7 +353,7 @@ void* GetCUDNNDsoHandle() {
   return GetDsoHandleFromSearchPath(FLAGS_miopen_dir, "libMIOpen.so", false);
 #else
   return GetDsoHandleFromSearchPath(
-      FLAGS_cudnn_dir, "libcudnn.so", false, {cuda_lib_path});
+      FLAGS_cudnn_dir, "libcudnn.so.8", false, {cuda_lib_path});
 #endif
 }
 
@@ -377,9 +361,22 @@ void* GetCUPTIDsoHandle() {
 #if defined(__APPLE__) || defined(__OSX__)
   return GetDsoHandleFromSearchPath(
       FLAGS_cupti_dir, "libcupti.dylib", false, {cupti_lib_path});
+#elif defined(__linux__) && defined(PADDLE_WITH_CUDA)
+  if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
+    return GetDsoHandleFromSearchPath(
+        FLAGS_cupti_dir, "libcupti.so.11.7", false, {cupti_lib_path});
+  } else if (CUDA_VERSION >= 12000 && CUDA_VERSION < 12030) {
+    return GetDsoHandleFromSearchPath(
+        FLAGS_cupti_dir, "libcupti.so.12", false, {cupti_lib_path});
+  } else {
+    std::string warning_msg(
+        "Your CUDA_VERSION is less than 11 or greater than 12, paddle "
+        "temporarily no longer supports");
+    return nullptr;
+  }
 #else
   return GetDsoHandleFromSearchPath(
-      FLAGS_cupti_dir, "libcupti.so", false, {cupti_lib_path});
+      FLAGS_cupti_dir, "libcupti.so.11.7", false, {cupti_lib_path});
 #endif
 }
 
@@ -392,7 +389,7 @@ void* GetCurandDsoHandle() {
 #elif defined(PADDLE_WITH_HIP)
   return GetDsoHandleFromSearchPath(FLAGS_rocm_dir, "libhiprand.so");
 #else
-  return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so");
+  return GetDsoHandleFromSearchPath(FLAGS_curand_dir, "libcurand.so.10");
 #endif
 }
 
@@ -424,7 +421,7 @@ void* GetCusolverDsoHandle() {
   return GetDsoHandleFromSearchPath(
       FLAGS_cuda_dir, win_cusolver_lib, true, {cuda_lib_path});
 #else
-  return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.so");
+  return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.so.11");
 #endif
 }
 
@@ -434,6 +431,17 @@ void* GetCusparseDsoHandle() {
 #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
   return GetDsoHandleFromSearchPath(
       FLAGS_cuda_dir, win_cusparse_lib, true, {cuda_lib_path});
+#elif defined(__linux__) && defined(PADDLE_WITH_CUDA)
+  if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
+    return GetDsoHandleFromSearchPath(FLAGS_cusparse_dir, "libcusparse.so.11");
+  } else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) {
+    return GetDsoHandleFromSearchPath(FLAGS_cusparse_dir, "libcusparse.so.12");
+  } else {
+    std::string warning_msg(
+        "Your CUDA_VERSION is less than 11 or greater than 12, paddle "
+        "temporarily no longer.");
+    return nullptr;
+  }
 #elif defined(PADDLE_WITH_HIP)
   return GetDsoHandleFromSearchPath(FLAGS_rocm_dir, "librocsparse.so");
 #else
@@ -528,7 +536,7 @@ void* GetNCCLDsoHandle() {
       FLAGS_rccl_dir, "librccl.so", true, {}, warning_msg);
 #else
   return GetDsoHandleFromSearchPath(
-      FLAGS_nccl_dir, "libnccl.so", true, {}, warning_msg);
+      FLAGS_nccl_dir, "libnccl.so.2", true, {}, warning_msg);
 #endif
 }
 
@@ -581,6 +589,17 @@ void* GetNvtxDsoHandle() {
 void* GetCUFFTDsoHandle() {
 #if defined(__APPLE__) || defined(__OSX__)
   return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcufft.dylib");
+#elif defined(__linux__) && defined(PADDLE_WITH_CUDA)
+  if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
+    return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcufft.so.10");
+  } else if (CUDA_VERSION >= 12000 && CUDA_VERSION < 13000) {
+    return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcufft.so.11");
+  } else {
+    std::string warning_msg(
+        "Your CUDA_VERSION is less than 11 or greater than 12, paddle "
+        "temporarily no longer.");
+    return nullptr;
+  }
 #elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
   return GetDsoHandleFromSearchPath(
       FLAGS_cuda_dir, win_cufft_lib, true, {cuda_lib_path});
diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc
index 205cdd9742204..f8da1238d23d8 100644
--- a/paddle/phi/core/flags.cc
+++ b/paddle/phi/core/flags.cc
@@ -1528,3 +1528,70 @@ PHI_DEFINE_EXPORTED_int64(alloc_fill_value,
                           -1,
                           "Whether to fill fixed value after allocation. "
                           "This is usefull for debugging.");
+
+PHI_DEFINE_EXPORTED_string(
+    cudnn_dir,  // NOLINT
+    "",
+    "Specify path for loading libcudnn.so. For instance, "
+    "/usr/local/cudnn/lib. If empty [default], dlopen "
+    "will search cudnn from LD_LIBRARY_PATH");
+
+PHI_DEFINE_EXPORTED_string(  // NOLINT
+    cuda_dir,
+    "",
+    "Specify path for loading cuda library, such as libcublas, libcublasLt "
+    "libcurand, libcusolver. For instance, /usr/local/cuda/lib64. "
+    "If default, dlopen will search cuda from LD_LIBRARY_PATH");
+
+PHI_DEFINE_EXPORTED_string(cublas_dir,  // NOLINT
+                           "",
+                           "Specify path for loading libcublas.so.");
+PHI_DEFINE_EXPORTED_string(
+    nccl_dir,  // NOLINT
+    "",
+    "Specify path for loading nccl library, such as libnccl.so. "
+    "For instance, /usr/local/cuda/lib64. If default, "
+    "dlopen will search cuda from LD_LIBRARY_PATH");
+
+PHI_DEFINE_EXPORTED_string(cupti_dir,
+                           "",
+                           "Specify path for loading cupti.so.");  // NOLINT
+
+PHI_DEFINE_EXPORTED_string(  // NOLINT
+    tensorrt_dir,
+    "",
+    "Specify path for loading tensorrt library, such as libnvinfer.so.");
+
+PHI_DEFINE_EXPORTED_string(
+    mklml_dir,
+    "",
+    "Specify path for loading libmklml_intel.so.");  // NOLINT
+
+PHI_DEFINE_EXPORTED_string(lapack_dir,
+                           "",
+                           "Specify path for loading liblapack.so.");  // NOLINT
+
+PHI_DEFINE_EXPORTED_string(
+    mkl_dir,  // NOLINT
+    "",
+    "Specify path for loading libmkl_rt.so. "
+    "For insrance, /opt/intel/oneapi/mkl/latest/lib/intel64/."
+    "If default, "
+    "dlopen will search mkl from LD_LIBRARY_PATH");
+
+PHI_DEFINE_EXPORTED_string(op_dir,  // NOLINT
+                           "",
+                           "Specify path for loading user-defined op library.");
+
+PHI_DEFINE_EXPORTED_string(cusparselt_dir,  // NOLINT
+                           "",
+                           "Specify path for loading libcusparseLt.so.");
+PHI_DEFINE_EXPORTED_string(curand_dir,  // NOLINT
+                           "",
+                           "Specify path for loading libcurand.so.10.");
+PHI_DEFINE_EXPORTED_string(cusolver_dir,  // NOLINT
+                           "",
+                           "Specify path for loading libcusolver.so.*.");
+PHI_DEFINE_EXPORTED_string(cusparse_dir,  // NOLINT
+                           "",
+                           "Specify path for loading libcusparse.so.*.");
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 4603248f0fd10..120e1142c2113 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -561,6 +561,33 @@
     if os.path.exists(cuh_file):
         os.environ.setdefault('runtime_include_dir', runtime_include_dir)
 
+if is_compiled_with_cuda():
+    import os
+
+    if os.name == 'posix':
+        package_dir = os.path.dirname(os.path.abspath(__file__))
+        cublas_lib_path = package_dir + "/.." + "/nvidia/cublas/lib"
+        set_flags({"FLAGS_cublas_dir": cublas_lib_path})
+
+        cudnn_lib_path = package_dir + "/.." + "/nvidia/cudnn/lib"
+        set_flags({"FLAGS_cudnn_dir": cudnn_lib_path})
+
+        curand_lib_path = package_dir + "/.." + "/nvidia/curand/lib"
+        set_flags({"FLAGS_curand_dir": curand_lib_path})
+
+        cusolver_lib_path = package_dir + "/.." + "/nvidia/cusolver/lib"
+        set_flags({"FLAGS_cusolver_dir": cusolver_lib_path})
+
+        cusparse_lib_path = package_dir + "/.." + "/nvidia/cusparse/lib"
+        set_flags({"FLAGS_cusparse_dir": cusparse_lib_path})
+
+        nccl_lib_path = package_dir + "/.." + "/nvidia/nccl/lib"
+        set_flags({"FLAGS_nccl_dir": nccl_lib_path})
+
+        cupti_dir_lib_path = package_dir + "/.." + "/nvidia/cuda_cupti/lib"
+        set_flags({"FLAGS_cupti_dir": cupti_dir_lib_path})
+
+
 disable_static()
 
 from .pir_utils import IrGuard
diff --git a/python/setup.py.in b/python/setup.py.in
index 3ec9e1577009f..37cbb638e4aab 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -394,6 +394,50 @@ def is_transpiler():
 write_distributed_training_mode_py(filename='@PADDLE_BINARY_DIR@/python/paddle/incubate/distributed/fleet/parameter_server/version.py')
 
 
+def get_paddle_extra_install_requirements():
+    #(Note risemeup1): Paddle will install the pypi cuda package provided by Nvidia, which includes the cuda runtime, cudnn, and cublas, thereby making the operation of 'pip install paddle' no longer dependent on the installation of cuda and cudnn.
+    paddle_cuda_install_requirements = os.getenv(
+        "PADDLE_CUDA_INSTALL_REQUIREMENTS", None
+    )
+    if paddle_cuda_install_requirements is not None:
+        PADDLE_CUDA_INSTALL_REQUIREMENTS = {
+            "V11": (
+                "nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-nccl-cu11==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64'"
+            ),
+            "V12": (
+                "nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-nccl-cu12==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64'"
+            ),
+        }
+        try:
+            output = subprocess.check_output(['nvcc', '--version']).decode('utf-8')
+            version_line = [line for line in output.split('\n') if 'release' in line][0]
+            version = version_line.split(' ')[-1].split(',')[0]
+            cuda_major_version = version.split('.')[0]
+        except Exception as e:
+            raise ValueError("CUDA not found")
+        
+        paddle_cuda_requires = PADDLE_CUDA_INSTALL_REQUIREMENTS[cuda_major_version].split("|")
+        
+        return paddle_cuda_requires
+    else:
+        return []
+
 packages=['paddle',
           'paddle.libs',
           'paddle.utils',
@@ -577,6 +621,10 @@ if sys.version_info >= (3,8):
             continue
         setup_requires_tmp+=[setup_requires_i]
     setup_requires = setup_requires_tmp
+    if platform.system() == 'Linux' and platform.machine() == 'x86_64':
+        paddle_cuda_requires = get_paddle_extra_install_requirements()
+        setup_requires += paddle_cuda_requires
+    
 
 # the prefix is sys.prefix which should always be usr
 paddle_bins = ''
@@ -784,14 +832,13 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
                 # change rpath of pir.ext for loading 3rd party libb
                 commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${IR_NAME}")
         else:
-            commands = ["patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/base/${FLUID_CORE_NAME}" + '.so']
-            commands.append("patchelf --set-rpath '$ORIGIN' ${PADDLE_BINARY_DIR}/python/paddle/libs/${COMMON_NAME}")
+            commands = ["patchelf --set-rpath '$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/base/${FLUID_CORE_NAME}" + '.so']
             if('${WITH_SHARED_PHI}' == 'ON'):
                 # change rpath of phi.ext for loading 3rd party lib
-                commands.append("patchelf --set-rpath '$ORIGIN' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}")
+                commands.append("patchelf --set-rpath '$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN:$ORIGIN/../libs' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}")
             if('${WITH_SHARED_IR}' == 'ON'):
                 # change rpath of pir.ext for loading 3rd party lib
-                commands.append("patchelf --set-rpath '$ORIGIN' ${PADDLE_BINARY_DIR}/python/paddle/libs/${IR_NAME}")
+                commands.append("patchelf --set-rpath '$ORIGIN:$ORIGIN/../libs' ${PADDLE_BINARY_DIR}/python/paddle/libs/${IR_NAME}")
         # The sw_64 not suppot patchelf, so we just disable that.
         if platform.machine() != 'sw_64' and platform.machine() != 'mips64':
             for command in commands:
diff --git a/setup.py b/setup.py
index 7e7189e10cb43..67774e58138b8 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,6 @@
 import subprocess
 import sys
 import time
-import warnings
 from contextlib import contextmanager
 from subprocess import CalledProcessError
 
@@ -39,7 +38,7 @@
 python_version = platform.python_version()
 version_detail = sys.version_info
 version = str(version_detail[0]) + '.' + str(version_detail[1])
-env_version = str(os.getenv("PY_VERSION"))
+env_version = os.getenv("PY_VERSION", None)
 
 if version_detail < (3, 8):
     raise RuntimeError(
@@ -47,21 +46,15 @@
         f"you are using Python {python_version}"
     )
 elif env_version is None:
-    print(f"Export PY_VERSION = { python_version }")
+    print(f"export PY_VERSION = { version }")
     os.environ["PY_VERSION"] = python_version
 
 elif env_version != version:
-    warnings.warn(
-        f"You set PY_VERSION={env_version}, but "
-        f"your current python environment is {version} "
-        f"we will attempt to use the python version you set to execute."
+    raise ValueError(
+        f"You have set the PY_VERSION environment variable to {env_version}, but "
+        f"your current Python version is {version}, "
+        f"Please keep them consistent."
     )
-    cmd = 'which python' + env_version
-    res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
-    if res.returncode == 0:
-        os.environ["PYTHON_EXECUTABLE"] = res
-    else:
-        raise RuntimeError("We can't find the version you set in your machine")
 
 
 # check cmake
@@ -921,6 +914,7 @@ def get_setup_requires():
                 continue
             setup_requires_tmp += [setup_requires_i]
         setup_requires = setup_requires_tmp
+
         return setup_requires
     else:
         raise RuntimeError(
@@ -928,6 +922,57 @@ def get_setup_requires():
         )
 
 
+def get_paddle_extra_install_requirements():
+    # (Note risemeup1): Paddle will install the pypi cuda package provided by Nvidia, which includes the cuda runtime, cudnn, and cublas, thereby making the operation of 'pip install paddle' no longer dependent on the installation of cuda and cudnn.
+    paddle_cuda_install_requirements = os.getenv(
+        "PADDLE_CUDA_INSTALL_REQUIREMENTS", None
+    )
+    if paddle_cuda_install_requirements is not None:
+        PADDLE_CUDA_INSTALL_REQUIREMENTS = {
+            "V11": (
+                "nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-nccl-cu11==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64'"
+            ),
+            "V12": (
+                "nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+                "nvidia-nccl-cu12==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64'"
+            ),
+        }
+        try:
+            output = subprocess.check_output(['nvcc', '--version']).decode(
+                'utf-8'
+            )
+            version_line = [
+                line for line in output.split('\n') if 'release' in line
+            ][0]
+            version = version_line.split(' ')[-1].split(',')[0]
+            cuda_major_version = version.split('.')[0]
+        except Exception as e:
+            raise ValueError("CUDA not found")
+
+        paddle_cuda_requires = PADDLE_CUDA_INSTALL_REQUIREMENTS[
+            cuda_major_version
+        ].split("|")
+
+        return paddle_cuda_requires
+    else:
+        return []
+
+
 def get_package_data_and_package_dir():
     if os.name != 'nt':
         package_data = {
@@ -990,6 +1035,7 @@ def get_package_data_and_package_dir():
     shutil.copy(env_dict.get("LAPACK_LIB"), libs_path)
     shutil.copy(env_dict.get("GFORTRAN_LIB"), libs_path)
     shutil.copy(env_dict.get("GNU_RT_LIB_1"), libs_path)
+
     if env_dict.get("WITH_CUDNN_DSO") == 'ON' and os.path.exists(
         env_dict.get("CUDNN_LIBRARY")
     ):
@@ -1012,6 +1058,7 @@ def get_package_data_and_package_dir():
                 if os.path.exists(cudnn_lib):
                     package_data['paddle.libs'] += [os.path.basename(cudnn_lib)]
                     shutil.copy(cudnn_lib, libs_path)
+
     if not sys.platform.startswith("linux"):
         package_data['paddle.libs'] += [
             os.path.basename(env_dict.get("GNU_RT_LIB_2"))
@@ -1224,28 +1271,22 @@ def get_package_data_and_package_dir():
                     )
             else:
                 commands = [
-                    "patchelf --set-rpath '$ORIGIN/../libs/' "
+                    "patchelf --set-rpath '$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN/../libs/' "
                     + env_dict.get("PADDLE_BINARY_DIR")
                     + '/python/paddle/base/'
                     + env_dict.get("FLUID_CORE_NAME")
                     + '.so'
                 ]
-                commands.append(
-                    "patchelf --set-rpath '$ORIGIN' "
-                    + env_dict.get("PADDLE_BINARY_DIR")
-                    + '/python/paddle/libs/'
-                    + env_dict.get("COMMON_NAME")
-                )
                 if env_dict.get("WITH_SHARED_PHI") == "ON":
                     commands.append(
-                        "patchelf --set-rpath '$ORIGIN' "
+                        "patchelf --set-rpath '$ORIGIN/../../nvidia/cuda_runtime/lib:$ORIGIN:$ORIGIN/../libs' "
                         + env_dict.get("PADDLE_BINARY_DIR")
                         + '/python/paddle/libs/'
                         + env_dict.get("PHI_NAME")
                     )
                 if env_dict.get("WITH_SHARED_IR") == "ON":
                     commands.append(
-                        "patchelf --set-rpath '$ORIGIN' "
+                        "patchelf --set-rpath '$ORIGIN:$ORIGIN/../libs' "
                         + env_dict.get("PADDLE_BINARY_DIR")
                         + '/python/paddle/libs/'
                         + env_dict.get("IR_NAME")
@@ -1396,6 +1437,10 @@ def get_headers():
 def get_setup_parameters():
     # get setup_requires
     setup_requires = get_setup_requires()
+    if platform.system() == 'Linux' and platform.machine() == 'x86_64':
+        paddle_cuda_requires = get_paddle_extra_install_requirements()
+        setup_requires += paddle_cuda_requires
+
     packages = [
         'paddle',
         'paddle.libs',