From e4debdc807d48bbb3f91d91a26b6377ac9e2bc8b Mon Sep 17 00:00:00 2001 From: Gavin Zhao Date: Fri, 9 Feb 2024 19:59:22 -0500 Subject: [PATCH] Add basic HIP tests Signed-off-by: Gavin Zhao --- tests/system.rs | 306 ++++++++++++++++++++++++++++++++++++++++++++++- tests/test_a.hip | 10 ++ tests/test_b.hip | 10 ++ tests/test_c.hip | 10 ++ 4 files changed, 335 insertions(+), 1 deletion(-) create mode 100644 tests/test_a.hip create mode 100644 tests/test_b.hip create mode 100644 tests/test_c.hip diff --git a/tests/system.rs b/tests/system.rs index 26de59c50..b85c5cdb7 100644 --- a/tests/system.rs +++ b/tests/system.rs @@ -33,7 +33,7 @@ use serial_test::serial; use std::collections::HashMap; use std::env; use std::ffi::{OsStr, OsString}; -use std::fmt; +use std::fmt::{self, format}; use std::io::{self, Read, Write}; use std::path::{Path, PathBuf}; use std::process::{Command, Output, Stdio}; @@ -105,6 +105,7 @@ fn compile_cmdline>( } arg } + // TODO: This will fail if gcc/clang is actually a ccache wrapper, as it is the // default case on Fedora, e.g. fn compile_cuda_cmdline>( @@ -134,6 +135,33 @@ fn compile_cuda_cmdline>( arg } +// TODO: This will fail if gcc/clang is actually a ccache wrapper, as it is the +// default case on Fedora, e.g. +// +// archs is a list of GPU architectures to compile for. +fn compile_hip_cmdline>( + compiler: &str, + exe: T, + input: &str, + output: &str, + archs: &Vec, + mut extra_args: Vec, +) -> Vec { + let mut arg = match compiler { + "clang" => { + vec_from!(OsString, exe, "-x", "hip", "-c", input, "-o", output) + } + _ => panic!("Unsupported compiler: \"{}\"", compiler), + }; + for arch in archs { + arg.push(format!("--offload-arch={}", arch).into()); + } + if !extra_args.is_empty() { + arg.append(&mut extra_args) + } + arg +} + const INPUT: &str = "test.c"; const INPUT_CLANG_MULTICALL: &str = "test_clang_multicall.c"; const INPUT_WITH_WHITESPACE: &str = "test_whitespace.c"; @@ -144,6 +172,9 @@ const INPUT_WITH_DEFINE: &str = "test_with_define.c"; const INPUT_FOR_CUDA_A: &str = "test_a.cu"; const INPUT_FOR_CUDA_B: &str = "test_b.cu"; const INPUT_FOR_CUDA_C: &str = "test_c.cu"; +const INPUT_FOR_HIP_A: &str = "test_a.hip"; +const INPUT_FOR_HIP_B: &str = "test_b.hip"; +const INPUT_FOR_HIP_C: &str = "test_c.hip"; const OUTPUT: &str = "test.o"; // Copy the source files into the tempdir so we can compile with relative paths, since the commandline winds up in the hash key. @@ -688,6 +719,210 @@ fn run_sccache_cuda_command_tests(compiler: Compiler, tempdir: &Path) { test_proper_lang_stat_tracking(compiler, tempdir); } +fn test_hip_compiles(compiler: &Compiler, tempdir: &Path) { + let Compiler { + name, + exe, + env_vars, + } = compiler; + trace!("run_sccache_command_test: {}", name); + // Compile multiple source files. + copy_to_tempdir(&[INPUT_FOR_HIP_A, INPUT_FOR_HIP_B], tempdir); + + let target_arch = vec!["gfx900".to_string()]; + + let out_file = tempdir.join(OUTPUT); + trace!("compile A"); + sccache_command() + .args(&compile_hip_cmdline( + name, + exe, + INPUT_FOR_HIP_A, + OUTPUT, + &target_arch, + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + trace!("request stats"); + get_stats(|info| { + assert_eq!(1, info.stats.compile_requests); + assert_eq!(1, info.stats.requests_executed); + assert_eq!(0, info.stats.cache_hits.all()); + assert_eq!(1, info.stats.cache_misses.all()); + assert_eq!(&1, info.stats.cache_misses.get("HIP").unwrap()); + let adv_hip_key = adv_key_kind("hip", compiler.name); + assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap()); + }); + trace!("compile A"); + fs::remove_file(&out_file).unwrap(); + sccache_command() + .args(&compile_hip_cmdline( + name, + exe, + INPUT_FOR_HIP_A, + OUTPUT, + &target_arch, + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + trace!("request stats"); + get_stats(|info| { + assert_eq!(2, info.stats.compile_requests); + assert_eq!(2, info.stats.requests_executed); + assert_eq!(1, info.stats.cache_hits.all()); + assert_eq!(1, info.stats.cache_misses.all()); + assert_eq!(&1, info.stats.cache_hits.get("HIP").unwrap()); + assert_eq!(&1, info.stats.cache_misses.get("HIP").unwrap()); + let adv_hip_key = adv_key_kind("hip", compiler.name); + assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_hip_key).unwrap()); + assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap()); + }); + // By compiling another input source we verify that the pre-processor + // phase is correctly running and outputting text + trace!("compile B"); + sccache_command() + .args(&compile_hip_cmdline( + name, + exe, + INPUT_FOR_HIP_B, + OUTPUT, + &target_arch, + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + trace!("request stats"); + get_stats(|info| { + assert_eq!(3, info.stats.compile_requests); + assert_eq!(3, info.stats.requests_executed); + assert_eq!(1, info.stats.cache_hits.all()); + assert_eq!(2, info.stats.cache_misses.all()); + assert_eq!(&1, info.stats.cache_hits.get("HIP").unwrap()); + assert_eq!(&2, info.stats.cache_misses.get("HIP").unwrap()); + let adv_hip_key = adv_key_kind("hip", compiler.name); + assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_hip_key).unwrap()); + assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap()); + }); +} + +fn test_hip_compiles_multi_targets(compiler: &Compiler, tempdir: &Path) { + let Compiler { + name, + exe, + env_vars, + } = compiler; + trace!("run_sccache_command_test: {}", name); + // Compile multiple source files. + copy_to_tempdir(&[INPUT_FOR_HIP_A, INPUT_FOR_HIP_B], tempdir); + + let target_arches: Vec = vec!["gfx900".to_string(), "gfx1030".to_string()]; + + let out_file = tempdir.join(OUTPUT); + trace!("compile A with gfx900 and gfx1030"); + sccache_command() + .args(&compile_hip_cmdline( + name, + exe, + INPUT_FOR_HIP_A, + OUTPUT, + &target_arches, + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + trace!("request stats"); + get_stats(|info| { + assert_eq!(1, info.stats.compile_requests); + assert_eq!(1, info.stats.requests_executed); + assert_eq!(0, info.stats.cache_hits.all()); + assert_eq!(1, info.stats.cache_misses.all()); + assert_eq!(&1, info.stats.cache_misses.get("HIP").unwrap()); + let adv_hip_key = adv_key_kind("hip", compiler.name); + assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap()); + }); + + trace!("compile A with with gfx900 and gfx1030 again"); + fs::remove_file(&out_file).unwrap(); + sccache_command() + .args(&compile_hip_cmdline( + name, + exe, + INPUT_FOR_HIP_A, + OUTPUT, + &target_arches, + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + trace!("request stats"); + get_stats(|info| { + assert_eq!(2, info.stats.compile_requests); + assert_eq!(2, info.stats.requests_executed); + assert_eq!(1, info.stats.cache_hits.all()); + assert_eq!(1, info.stats.cache_misses.all()); + assert_eq!(&1, info.stats.cache_hits.get("HIP").unwrap()); + assert_eq!(&1, info.stats.cache_misses.get("HIP").unwrap()); + let adv_hip_key = adv_key_kind("hip", compiler.name); + assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_hip_key).unwrap()); + assert_eq!(&1, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap()); + }); + + // By compiling another input source we verify that the pre-processor + // phase is correctly running and outputting text + trace!("compile B with gfx900 and gfx1030"); + sccache_command() + .args(&compile_hip_cmdline( + name, + exe, + INPUT_FOR_HIP_B, + OUTPUT, + &target_arches, + Vec::new(), + )) + .current_dir(tempdir) + .envs(env_vars.clone()) + .assert() + .success(); + assert!(fs::metadata(&out_file).map(|m| m.len() > 0).unwrap()); + trace!("request stats"); + get_stats(|info| { + assert_eq!(3, info.stats.compile_requests); + assert_eq!(3, info.stats.requests_executed); + assert_eq!(1, info.stats.cache_hits.all()); + assert_eq!(2, info.stats.cache_misses.all()); + assert_eq!(&1, info.stats.cache_hits.get("HIP").unwrap()); + assert_eq!(&2, info.stats.cache_misses.get("HIP").unwrap()); + let adv_hip_key = adv_key_kind("hip", compiler.name); + assert_eq!(&1, info.stats.cache_hits.get_adv(&adv_hip_key).unwrap()); + assert_eq!(&2, info.stats.cache_misses.get_adv(&adv_hip_key).unwrap()); + }); +} + +fn run_sccache_hip_command_tests(compiler: Compiler, tempdir: &Path) { + zero_stats(); + test_hip_compiles(&compiler, tempdir); + zero_stats(); + test_hip_compiles_multi_targets(&compiler, tempdir); + // test_proper_lang_stat_tracking(compiler, tempdir); +} + fn test_clang_multicall(compiler: Compiler, tempdir: &Path) { let Compiler { name, @@ -843,6 +1078,46 @@ fn find_cuda_compilers() -> Vec { compilers } +// We detect the HIP Clang compiler through 2 methods: +// 1. If the env var HIP_CLANG_PATH is set, try $HIP_CLANG_PATH/clang. This is the same behavior as +// hipcc, but is rarely know, so we have another option. +// 2. If the env var ROCM_PATH is set, try $ROCM_PATH/llvm/bin/clang. This is the location in +// AMD's official debian packages. +// 3. Otherwise, just bail. +fn find_hip_compiler() -> Option { + let env_vars: Vec<(OsString, OsString)> = env::vars_os().collect(); + + if let Ok(hip_clang_path) = env::var("HIP_CLANG_PATH") { + let hip_clang_path = Path::new(&hip_clang_path).join("clang"); + + match hip_clang_path.try_exists() { + Ok(true) => { + return Some(Compiler { + name: "clang", + exe: hip_clang_path.into_os_string(), + env_vars, + }); + } + _ => {} + } + } + if let Ok(rocm_path) = env::var("ROCM_PATH") { + let hip_clang_path = Path::new(&rocm_path).join("llvm").join("bin").join("clang"); + + match hip_clang_path.try_exists() { + Ok(true) => { + return Some(Compiler { + name: "hip", + exe: hip_clang_path.into_os_string(), + env_vars, + }); + } + _ => {} + } + } + None +} + // TODO: This runs multiple test cases, for multiple compilers. It should be // split up to run them individually. In the current form, it is hard to see // which sub test cases are executed, and if one fails, the remaining tests @@ -926,3 +1201,32 @@ fn test_cuda_sccache_command(preprocessor_cache_mode: bool) { stop_local_daemon(); } } + +#[test_case(true ; "with preprocessor cache")] +#[test_case(false ; "without preprocessor cache")] +#[serial] +#[cfg(any(unix, target_env = "msvc"))] +fn test_hip_sccache_command(preprocessor_cache_mode: bool) { + let _ = env_logger::try_init(); + let tempdir = tempfile::Builder::new() + .prefix("sccache_system_test") + .tempdir() + .unwrap(); + + if let Some(compiler) = find_hip_compiler() { + stop_local_daemon(); + // Create the configurations + let sccache_cfg = sccache_client_cfg(tempdir.path(), preprocessor_cache_mode); + write_json_cfg(tempdir.path(), "sccache-cfg.json", &sccache_cfg); + let sccache_cached_cfg_path = tempdir.path().join("sccache-cached-cfg"); + // Start a server. + trace!("start server"); + start_local_daemon( + &tempdir.path().join("sccache-cfg.json"), + &sccache_cached_cfg_path, + ); + run_sccache_hip_command_tests(compiler, tempdir.path()); + zero_stats(); + stop_local_daemon(); + } +} diff --git a/tests/test_a.hip b/tests/test_a.hip new file mode 100644 index 000000000..d723f3fac --- /dev/null +++ b/tests/test_a.hip @@ -0,0 +1,10 @@ + +#include +#include + +__global__ void cuda_entry_point(int*, int*) {} +__device__ void cuda_device_func(int*, int*) {} + +int main() { + printf("%s says hello world\n", __FILE__); +} diff --git a/tests/test_b.hip b/tests/test_b.hip new file mode 100644 index 000000000..d723f3fac --- /dev/null +++ b/tests/test_b.hip @@ -0,0 +1,10 @@ + +#include +#include + +__global__ void cuda_entry_point(int*, int*) {} +__device__ void cuda_device_func(int*, int*) {} + +int main() { + printf("%s says hello world\n", __FILE__); +} diff --git a/tests/test_c.hip b/tests/test_c.hip new file mode 100644 index 000000000..d723f3fac --- /dev/null +++ b/tests/test_c.hip @@ -0,0 +1,10 @@ + +#include +#include + +__global__ void cuda_entry_point(int*, int*) {} +__device__ void cuda_device_func(int*, int*) {} + +int main() { + printf("%s says hello world\n", __FILE__); +}