Skip to content

Commit

Permalink
[Clang] Automatically enable -fconvergent-functions on GPU targets (#…
Browse files Browse the repository at this point in the history
…111076)

Summary:
This patch causes us to respect the `-fconvergent-functions` and
`-fno-convergent-functions` options correctly. GPU targets should have
this set all the time, but we now offer `-fno-convergent-functions` to
opt-out if you want to test broken behavior. This munged about with a
lot of the old weird logic, but I don't think it makes any real changes.
  • Loading branch information
jhuber6 authored Oct 4, 2024
1 parent 8d661fd commit d8f2251
Show file tree
Hide file tree
Showing 13 changed files with 290 additions and 289 deletions.
9 changes: 5 additions & 4 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1195,10 +1195,11 @@ def cxx_isystem : JoinedOrSeparate<["-"], "cxx-isystem">, Group<clang_i_Group>,
def c : Flag<["-"], "c">, Flags<[NoXarchOption]>,
Visibility<[ClangOption, FlangOption]>, Group<Action_Group>,
HelpText<"Only run preprocess, compile, and assemble steps">;
defm convergent_functions : BoolFOption<"convergent-functions",
LangOpts<"ConvergentFunctions">, DefaultFalse,
NegFlag<SetFalse, [], [ClangOption], "Assume all functions may be convergent.">,
PosFlag<SetTrue, [], [ClangOption, CC1Option]>>;
def fconvergent_functions : Flag<["-"], "fconvergent-functions">,
Visibility<[ClangOption, CC1Option]>,
HelpText< "Assume all functions may be convergent.">;
def fno_convergent_functions : Flag<["-"], "fno-convergent-functions">,
Visibility<[ClangOption, CC1Option]>;

// Common offloading options
let Group = offload_Group in {
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6280,8 +6280,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_unique_internal_linkage_names);
Args.addOptInFlag(CmdArgs, options::OPT_funique_basic_block_section_names,
options::OPT_fno_unique_basic_block_section_names);
Args.addOptInFlag(CmdArgs, options::OPT_fconvergent_functions,
options::OPT_fno_convergent_functions);

if (Arg *A = Args.getLastArg(options::OPT_fsplit_machine_functions,
options::OPT_fno_split_machine_functions)) {
Expand All @@ -6298,6 +6296,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_finstrument_functions,
options::OPT_finstrument_functions_after_inlining,
options::OPT_finstrument_function_entry_bare);
Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
options::OPT_fno_convergent_functions);

// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
// for sampling, overhead of call arc collection is way too high and there's
Expand Down
18 changes: 9 additions & 9 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3687,10 +3687,10 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts,
if (Opts.Blocks && !(Opts.OpenCL && Opts.OpenCLVersion == 200))
GenerateArg(Consumer, OPT_fblocks);

if (Opts.ConvergentFunctions &&
!(Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || Opts.SYCLIsDevice ||
Opts.HLSL))
if (Opts.ConvergentFunctions)
GenerateArg(Consumer, OPT_fconvergent_functions);
else
GenerateArg(Consumer, OPT_fno_convergent_functions);

if (Opts.NoBuiltin && !Opts.Freestanding)
GenerateArg(Consumer, OPT_fno_builtin);
Expand Down Expand Up @@ -4106,9 +4106,12 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL
&& Opts.OpenCLVersion == 200);

Opts.ConvergentFunctions = Args.hasArg(OPT_fconvergent_functions) ||
Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
Opts.SYCLIsDevice || Opts.HLSL;
bool HasConvergentOperations = Opts.OpenMPIsTargetDevice || Opts.OpenCL ||
Opts.CUDAIsDevice || Opts.SYCLIsDevice ||
Opts.HLSL || T.isAMDGPU() || T.isNVPTX();
Opts.ConvergentFunctions =
Args.hasFlag(OPT_fconvergent_functions, OPT_fno_convergent_functions,
HasConvergentOperations);

Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding;
if (!Opts.NoBuiltin)
Expand Down Expand Up @@ -4164,9 +4167,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
bool IsTargetSpecified =
Opts.OpenMPIsTargetDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);

Opts.ConvergentFunctions =
Opts.ConvergentFunctions || Opts.OpenMPIsTargetDevice;

if (Opts.OpenMP || Opts.OpenMPSimd) {
if (int Version = getLastArgIntValue(
Args, OPT_fopenmp_version_EQ,
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/nvptx_attributes.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s

// CHECK: Function Attrs: noinline nounwind optnone
// CHECK: Function Attrs: convergent noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@foo
// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ const B& f(A *a) {


//.
// CHECK: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
// CHECK: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CHECK: attributes #[[ATTR3]] = { nounwind }
// CHECK: attributes #[[ATTR4]] = { noreturn }
//.
Expand Down
4 changes: 2 additions & 2 deletions clang/test/OpenMP/target_parallel_for_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3782,7 +3782,7 @@ int bar(int n){
// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
// CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
// CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK9-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK9-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
Expand Down Expand Up @@ -4561,7 +4561,7 @@ int bar(int n){
// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
// CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
// CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK11-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK11-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
Expand Down
8 changes: 4 additions & 4 deletions clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9131,7 +9131,7 @@ int bar(int n){
// CHECK17-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
// CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
// CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK17-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
Expand Down Expand Up @@ -9935,7 +9935,7 @@ int bar(int n){
// CHECK19-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
// CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
// CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK19-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
Expand Down Expand Up @@ -10739,7 +10739,7 @@ int bar(int n){
// CHECK21-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK21-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
// CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
// CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK21-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK21-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
Expand Down Expand Up @@ -11629,7 +11629,7 @@ int bar(int n){
// CHECK23-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4
// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
// CHECK23-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4
// CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR4:[0-9]+]]
// CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv()
// CHECK23-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8
// CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK23-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8
Expand Down
Loading

0 comments on commit d8f2251

Please sign in to comment.