From a0a7d1166709652ba4ea7dfac743f343c1744892 Mon Sep 17 00:00:00 2001 From: Aaron Siddhartha Mondal Date: Tue, 28 May 2024 08:32:22 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=8D=A6=20Bring=20CUDA=20setup=20in=20sync?= =?UTF-8?q?=20with=20recent=20nixpkgs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a subtle bug. The CUDA_TOOLKIT changed on the nix side and no longer contained the actual cuda driver. This lead to only the stubs being added to the runpath of CUDA executables. The stubs are only meant to be used at link time but not functional during runtime. This change prepends the runpath with the correct CUDA driver. Also removes the now obsolete `LL_CUDA_RUNTIME` flag. --- docs/reference/toolchain.md | 17 ++++++++--------- ll/BUILD.bazel | 6 ------ ll/args.bzl | 19 ++++++++++--------- ll/attributes.bzl | 6 ------ ll/toolchain.bzl | 1 - modules/defaultLlEnv.nix | 1 - modules/rules_ll.nix | 2 -- 7 files changed, 18 insertions(+), 34 deletions(-) diff --git a/docs/reference/toolchain.md b/docs/reference/toolchain.md index f7607f2..023b129 100644 --- a/docs/reference/toolchain.md +++ b/docs/reference/toolchain.md @@ -6,14 +6,14 @@ This file declares the `ll_toolchain` rule. ## `ll_toolchain` -
ll_toolchain(name, LL_AMD_INCLUDES, LL_AMD_LIBRARIES, LL_CFLAGS, LL_CUDA_DRIVER, LL_CUDA_RUNTIME,
-             LL_CUDA_TOOLKIT, LL_DYNAMIC_LINKER, LL_LDFLAGS, address_sanitizer, archiver,
-             bitcode_linker, builtin_includes, c_driver, clang_tidy, clang_tidy_runner,
-             compiler_runtime, cov, cpp_abihdrs, cpp_abilib, cpp_driver, cpp_stdhdrs, cpp_stdlib,
-             hip_libraries, hip_runtime, leak_sanitizer, linker, linker_wrapper, llvm_project_deps,
-             machine_code_tool, memory_sanitizer, objcopy, offload_bundler, offload_packager, opt,
-             profdata, profile, rocm_device_libs, symbolizer, thread_sanitizer,
-             undefined_behavior_sanitizer, unwind_library)
+
ll_toolchain(name, LL_AMD_INCLUDES, LL_AMD_LIBRARIES, LL_CFLAGS, LL_CUDA_DRIVER, LL_CUDA_TOOLKIT,
+             LL_DYNAMIC_LINKER, LL_LDFLAGS, address_sanitizer, archiver, bitcode_linker,
+             builtin_includes, c_driver, clang_tidy, clang_tidy_runner, compiler_runtime, cov,
+             cpp_abihdrs, cpp_abilib, cpp_driver, cpp_stdhdrs, cpp_stdlib, hip_libraries, hip_runtime,
+             leak_sanitizer, linker, linker_wrapper, llvm_project_deps, machine_code_tool,
+             memory_sanitizer, objcopy, offload_bundler, offload_packager, opt, profdata, profile,
+             rocm_device_libs, symbolizer, thread_sanitizer, undefined_behavior_sanitizer,
+             unwind_library)
`attributes` @@ -24,7 +24,6 @@ This file declares the `ll_toolchain` rule. | `LL_AMD_LIBRARIES` | Label, optional, defaults to None.

Link search paths for dependencies making use of AMD toolchains.

Affects the `hip_amdgpu` toolchain. | | `LL_CFLAGS` | Label, optional, defaults to None.

Arbitrary flags added to all compile actions. | | `LL_CUDA_DRIVER` | Label, optional, defaults to None.

The path to the CUDA driver.

Affects the `cuda_nvptx` and `hip_nvptx` toolchains. | -| `LL_CUDA_RUNTIME` | Label, optional, defaults to None.

The path to the CUDA runtime.

Affects the `cuda_nvptx` and `hip_nvptx` toolchains. | | `LL_CUDA_TOOLKIT` | Label, optional, defaults to None.

The path to the CUDA toolkit.

Affects the `cuda_nvptx` and `hip_nvptx` toolchains. | | `LL_DYNAMIC_LINKER` | Label, optional, defaults to None.

The linker from the glibc we compile and link against. | | `LL_LDFLAGS` | Label, optional, defaults to None.

Arbitrary flags added to all link actions. | diff --git a/ll/BUILD.bazel b/ll/BUILD.bazel index ce73067..a8244a8 100644 --- a/ll/BUILD.bazel +++ b/ll/BUILD.bazel @@ -94,7 +94,6 @@ string_flag( "LL_AMD_INCLUDES", "LL_AMD_LIBRARIES", "LL_CUDA_TOOLKIT", - "LL_CUDA_RUNTIME", "LL_CUDA_DRIVER", # Unset values default to an empty string. @@ -125,11 +124,6 @@ ll_toolchain( ":cuda_nvptx": ":LL_CUDA_DRIVER", "//conditions:default": "LL_UNSET", }), - LL_CUDA_RUNTIME = select({ - ":hip_nvptx": ":LL_CUDA_RUNTIME", - ":cuda_nvptx": ":LL_CUDA_RUNTIME", - "//conditions:default": "LL_UNSET", - }), LL_CUDA_TOOLKIT = select({ ":hip_nvptx": ":LL_CUDA_TOOLKIT", ":cuda_nvptx": ":LL_CUDA_TOOLKIT", diff --git a/ll/args.bzl b/ll/args.bzl index 4766470..9a168d8 100644 --- a/ll/args.bzl +++ b/ll/args.bzl @@ -484,15 +484,16 @@ def link_executable_args(ctx, in_files, out_file, mode): "cuda_nvptx", "hip_nvptx", ]: - for location in [toolchain.LL_CUDA_TOOLKIT, toolchain.LL_CUDA_RUNTIME]: - if location != "": - args.add(location, format = "-rpath=%s/lib") - args.add(location, format = "-L%s/lib") - - # TODO: Not pretty. With the right nix packages we can probably - # do this more elegantly. - args.add(location, format = "-rpath=%s/lib/stubs") - args.add(location, format = "-L%s/lib/stubs") + # Both the CUDA driver and the CUDA toolkit contain `libcuda.so`. + # Link against `/lib/libcuda.so` at build time, but make + # sure that `/lib/libcuda.so` takes precedence at runtime. + if toolchain.LL_CUDA_DRIVER != "": + args.add(toolchain.LL_CUDA_DRIVER, format = "-rpath=%s/lib") + + if toolchain.LL_CUDA_TOOLKIT != "": + args.add(toolchain.LL_CUDA_TOOLKIT, format = "-rpath=%s/lib") + args.add(toolchain.LL_CUDA_TOOLKIT, format = "-L%s/lib") + args.add(toolchain.LL_CUDA_TOOLKIT, format = "-L%s/lib/stubs") args.add("-lcuda") args.add("-lcudart_static") diff --git a/ll/attributes.bzl b/ll/attributes.bzl index 2daa33d..2ad865e 100644 --- a/ll/attributes.bzl +++ b/ll/attributes.bzl @@ -589,12 +589,6 @@ LL_TOOLCHAIN_ATTRS = { Affects the `cuda_nvptx` and `hip_nvptx` toolchains. """, ), - "LL_CUDA_RUNTIME": attr.label( - doc = """The path to the CUDA runtime. - - Affects the `cuda_nvptx` and `hip_nvptx` toolchains. - """, - ), "LL_CUDA_DRIVER": attr.label( doc = """The path to the CUDA driver. diff --git a/ll/toolchain.bzl b/ll/toolchain.bzl index a28531b..461afa9 100644 --- a/ll/toolchain.bzl +++ b/ll/toolchain.bzl @@ -70,7 +70,6 @@ def _ll_toolchain_impl(ctx): LL_AMD_INCLUDES = ctx.attr.LL_AMD_INCLUDES[BuildSettingInfo].value, LL_AMD_LIBRARIES = ctx.attr.LL_AMD_LIBRARIES[BuildSettingInfo].value, LL_CUDA_TOOLKIT = ctx.attr.LL_CUDA_TOOLKIT[BuildSettingInfo].value, - LL_CUDA_RUNTIME = ctx.attr.LL_CUDA_RUNTIME[BuildSettingInfo].value, LL_CUDA_DRIVER = ctx.attr.LL_CUDA_DRIVER[BuildSettingInfo].value, ), ] diff --git a/modules/defaultLlEnv.nix b/modules/defaultLlEnv.nix index 03c2e70..e52dcd2 100644 --- a/modules/defaultLlEnv.nix +++ b/modules/defaultLlEnv.nix @@ -74,6 +74,5 @@ in # Flags for CUDA dependencies. "LL_CUDA_TOOLKIT=${lib.strings.optionalString pkgs.config.cudaSupport "${cudatoolkit}"}" - "LL_CUDA_RUNTIME=${lib.strings.optionalString pkgs.config.cudaSupport "${cudatoolkit.lib}"}" "LL_CUDA_DRIVER=${lib.strings.optionalString pkgs.config.cudaSupport "${nvidia_x11}"}" ] diff --git a/modules/rules_ll.nix b/modules/rules_ll.nix index 1e68080..c2bab93 100644 --- a/modules/rules_ll.nix +++ b/modules/rules_ll.nix @@ -49,9 +49,7 @@ in - `LL_DYNAMIC_LINKER` - `LL_AMD_INCLUDES` - `LL_AMD_LIBRARIES` - - `LL_AMD_RPATHS` - `LL_CUDA_TOOLKIT` - - `LL_CUDA_RUNTIME` - `LL_CUDA_DRIVER` Attempting to set any other value will result in Bazel errors.