Add clang as a supported cuda compiler

mozilla · Oct 2, 2023 · 2cc6cde · 2cc6cde
1 parent 8253364
commit 2cc6cde
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ sccache - Shared Compilation Cache
 
 sccache is a [ccache](https://ccache.dev/)-like compiler caching tool. It is used as a compiler wrapper and avoids compilation when possible, storing cached results either on [local disk](docs/Local.md) or in one of [several cloud storage backends](#storage-options).
 
-sccache includes support for caching the compilation of C/C++ code, [Rust](docs/Rust.md), as well as NVIDIA's CUDA using [nvcc](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html).
+sccache includes support for caching the compilation of C/C++ code, [Rust](docs/Rust.md), as well as NVIDIA's CUDA using [nvcc](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html), and [clang](https://llvm.org/docs/CompileCudaWithLLVM.html).
 
 sccache also provides [icecream](https://github.com/icecc/icecream)-style distributed compilation (automatic packaging of local toolchains) for all supported compilers (including Rust). The distributed compilation system includes several security features that icecream lacks such as authentication, transport layer encryption, and sandboxed compiler execution on build servers. See [the distributed quickstart](docs/DistributedQuickstart.md) guide for more information.
 

diff --git a/src/compiler/clang.rs b/src/compiler/clang.rs
@@ -336,6 +336,80 @@ mod test {
         assert_eq!(ovec!["-arch", "xyz"], a.arch_args);
     }
 
+    #[test]
+    fn test_parse_arguments_cuda() {
+        let a = parses!("-c", "foo.cu", "-o", "foo.o");
+        assert_eq!(Some("foo.cu"), a.input.to_str());
+        assert_eq!(Language::Cuda, a.language);
+        assert_map_contains!(
+            a.outputs,
+            (
+                "obj",
+                ArtifactDescriptor {
+                    path: PathBuf::from("foo.o"),
+                    optional: false
+                }
+            )
+        );
+        assert!(a.preprocessor_args.is_empty());
+        assert!(a.common_args.is_empty());
+    }
+
+    #[test]
+    fn test_parse_arguments_cuda_flags() {
+        let a = parses!(
+            "-c",
+            "foo.cpp",
+            "-x",
+            "cuda",
+            "--cuda-gpu-arch=sm_50",
+            "-o",
+            "foo.o"
+        );
+        assert_eq!(Some("foo.cpp"), a.input.to_str());
+        assert_eq!(Language::Cuda, a.language);
+        assert_map_contains!(
+            a.outputs,
+            (
+                "obj",
+                ArtifactDescriptor {
+                    path: PathBuf::from("foo.o"),
+                    optional: false
+                }
+            )
+        );
+        assert!(a.preprocessor_args.is_empty());
+        assert_eq!(ovec!["--cuda-gpu-arch=sm_50"], a.common_args);
+
+        let b = parses!(
+            "-c",
+            "foo.cpp",
+            "-x",
+            "cu",
+            "--cuda-gpu-arch=sm_50",
+            "--no-cuda-include-ptx=sm_50",
+            "-o",
+            "foo.o"
+        );
+        assert_eq!(Some("foo.cpp"), b.input.to_str());
+        assert_eq!(Language::Cuda, b.language);
+        assert_map_contains!(
+            b.outputs,
+            (
+                "obj",
+                ArtifactDescriptor {
+                    path: PathBuf::from("foo.o"),
+                    optional: false
+                }
+            )
+        );
+        assert!(b.preprocessor_args.is_empty());
+        assert_eq!(
+            ovec!["--cuda-gpu-arch=sm_50", "--no-cuda-include-ptx=sm_50"],
+            b.common_args
+        );
+    }
+
     #[test]
     fn test_dependent_lib() {
         let a = parses!(

diff --git a/src/compiler/gcc.rs b/src/compiler/gcc.rs
@@ -367,6 +367,7 @@ where
                     "objective-c" => Some(Language::ObjectiveC),
                     "objective-c++" => Some(Language::ObjectiveCxx),
                     "cu" => Some(Language::Cuda),
+                    "cuda" => Some(Language::Cuda),
                     _ => cannot_cache!("-x"),
                 };
             }

diff --git a/tests/system.rs b/tests/system.rs
@@ -38,7 +38,7 @@ use std::io::{self, Read, Write};
 use std::path::{Path, PathBuf};
 use std::process::{Command, Output, Stdio};
 use std::str;
-use which::which_in;
+use which::{which, which_in};
 
 mod harness;
 
@@ -57,7 +57,7 @@ const COMPILERS: &[&str] = &["gcc", "clang", "clang++"];
 #[cfg(target_os = "macos")]
 const COMPILERS: &[&str] = &["clang", "clang++"];
 
-const CUDA_COMPILERS: &[&str] = &["nvcc"];
+const CUDA_COMPILERS: &[&str] = &["nvcc", "clang++"];
 
 //TODO: could test gcc when targeting mingw.
 
@@ -88,6 +88,34 @@ fn compile_cmdline<T: AsRef<OsStr>>(
     }
     arg
 }
+// TODO: This will fail if gcc/clang is actually a ccache wrapper, as it is the
+// default case on Fedora, e.g.
+fn compile_cuda_cmdline<T: AsRef<OsStr>>(
+    compiler: &str,
+    exe: T,
+    input: &str,
+    output: &str,
+    mut extra_args: Vec<OsString>,
+) -> Vec<OsString> {
+    let mut arg = match compiler {
+        "nvcc" => vec_from!(OsString, exe.as_ref(), "-c", input, "-o", output),
+        "clang++" => {
+            vec_from!(
+                OsString,
+                exe,
+                "-c",
+                input,
+                "--cuda-gpu-arch=sm_50",
+                format!("-Fo{}", output)
+            )
+        }
+        _ => panic!("Unsupported compiler: {}", compiler),
+    };
+    if !extra_args.is_empty() {
+        arg.append(&mut extra_args)
+    }
+    arg
+}
 
 const INPUT: &str = "test.c";
 const INPUT_CLANG_MULTICALL: &str = "test_clang_multicall.c";
@@ -465,7 +493,7 @@ fn test_cuda_compiles(compiler: Compiler, tempdir: &Path) {
     let out_file = tempdir.join(OUTPUT);
     trace!("compile A");
     sccache_command()
-        .args(&compile_cmdline(
+        .args(&compile_cuda_cmdline(
             name,
             &exe,
             INPUT_FOR_CUDA_A,
@@ -488,7 +516,7 @@ fn test_cuda_compiles(compiler: Compiler, tempdir: &Path) {
     trace!("compile A");
     fs::remove_file(&out_file).unwrap();
     sccache_command()
-        .args(&compile_cmdline(
+        .args(&compile_cuda_cmdline(
             name,
             &exe,
             INPUT_FOR_CUDA_A,
@@ -513,7 +541,7 @@ fn test_cuda_compiles(compiler: Compiler, tempdir: &Path) {
     // phase is correctly running and outputing text
     trace!("compile B");
     sccache_command()
-        .args(&compile_cmdline(
+        .args(&compile_cuda_cmdline(
             name,
             &exe,
             INPUT_FOR_CUDA_B,
@@ -663,18 +691,24 @@ fn find_compilers() -> Vec<Compiler> {
 
 fn find_cuda_compilers() -> Vec<Compiler> {
     let cwd = env::current_dir().unwrap();
-    CUDA_COMPILERS
-        .iter()
-        .filter_map(|c| {
-            which_in(c, env::var_os("PATH"), &cwd)
-                .ok()
-                .map(|full_path| Compiler {
-                    name: c,
-                    exe: full_path.into(),
-                    env_vars: vec![],
-                })
-        })
-        .collect::<Vec<_>>()
+    // CUDA compilers like clang don't come with all of the components for compilation.
+    // To consider a machine to have any cuda compilers we rely on the existence of `nvcc`
+    let compilers = match which("nvcc") {
+        Ok(_) => CUDA_COMPILERS
+            .iter()
+            .filter_map(|c| {
+                which_in(c, env::var_os("PATH"), &cwd)
+                    .ok()
+                    .map(|full_path| Compiler {
+                        name: c,
+                        exe: full_path.into(),
+                        env_vars: vec![],
+                    })
+            })
+            .collect::<Vec<_>>(),
+        Err(_) => vec![],
+    };
+    compilers
 }
 
 // TODO: This runs multiple test cases, for multiple compilers. It should be