Skip to content

Commit

Permalink
[SROA] Propagate no-signed-zeros(nsz) fast-math flag on the phi node …
Browse files Browse the repository at this point in the history
…using function attribute (#83381)

Its expected that the sequence `return X > 0.0 ? X : -X`, compiled with
-Ofast, produces fabs intrinsic. However, at this point, LLVM is unable
to do so.

The above sequence goes through the following transformation during the
pass pipeline:
1) SROA pass generates the phi node. Here, it does not infer the
fast-math flags on the phi node unlike clang frontend typically does.
2) Phi node eventually gets translated into select instruction. 
Because of missing no-signed-zeros(nsz) fast-math flag on the select
instruction, InstCombine pass fails to fold the sequence into fabs
intrinsic.

This patch, as a part of SROA, tries to propagate nsz fast-math flag on
the phi node using function attribute enabling this folding.

Closes #51601

Co-authored-by: Sushant Gokhale <sgokhale@nvidia.com>
  • Loading branch information
yashssh and sushgokh authored Jul 2, 2024
1 parent 7ee421d commit cd1e6a5
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 0 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
Expand Down Expand Up @@ -1122,6 +1123,17 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
for (unsigned i = 0; i != NumEdges; ++i)
APN->addIncoming(IncomingVals[AllocaNo], Pred);

// For the sequence `return X > 0.0 ? X : -X`, it is expected that this
// results in fabs intrinsic. However, without no-signed-zeros(nsz) flag
// on the phi node generated at this stage, fabs folding does not
// happen. So, we try to infer nsz flag from the function attributes to
// enable this fabs folding.
if (APN->isComplete() && isa<FPMathOperator>(APN) &&
BB->getParent()
->getFnAttribute("no-signed-zeros-fp-math")
.getValueAsBool())
APN->setHasNoSignedZeros(true);

// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/Transforms/PhaseOrdering/generate-fabs.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes='default<O1>' -S < %s | FileCheck %s

; Following test must generate fabs intrinsic. It goes through following stages
; 1. SROA propagates the nsz function attribute on the phi node.
; 2. SimplifyCFG pass converts phi node to select.
; 3. InstCombine converts select with nsz flag into fabs intrinsic.

define double @fabs_fcmp_olt_nsz_func_attr(double %0, double %1) "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: define double @fabs_fcmp_olt_nsz_func_attr(
; CHECK-SAME: double [[TMP0:%.*]], double [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[X_0:%.*]] = tail call nnan nsz double @llvm.fabs.f64(double [[TMP0]])
; CHECK-NEXT: ret double [[X_0]]
entry:
%x = alloca double
store double %0, ptr %x
%cmp = fcmp nnan nsz olt double %0, 0.000000e+00
br i1 %cmp, label %if.then, label %return

if.then: ; preds = %entry
%fneg = fneg nnan nsz double %0
store double %fneg, ptr %x
br label %return

return: ; preds = %entry, %if.then
%ret = load double, ptr %x
ret double %ret
}
79 changes: 79 additions & 0 deletions llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes='sroa' -S | FileCheck %s
define double @phi_with_nsz(double %x) "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: define double @phi_with_nsz(
; CHECK-SAME: double [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nsz double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret double [[X_ADDR_0]]
entry:
%x.addr = alloca double
%cmp = fcmp olt double %x, 0.0
br i1 %cmp, label %if.then, label %return

if.then: ; preds = %entry
%fneg = fneg double %x
store double %fneg, ptr %x.addr
br label %return

return: ; preds = %entry,%if.then
%retval = load double, ptr %x.addr
ret double %retval
}

define <2 x double> @vector_phi_with_nsz(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-signed-zeros-fp-math"="true" {
; CHECK-LABEL: define <2 x double> @vector_phi_with_nsz(
; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nsz <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]]
entry:
%x.addr = alloca <2 x double>
store <2 x double> %a, ptr %x.addr
br i1 %cmp, label %if.then, label %return

if.then: ; preds = %entry
store <2 x double> %b, ptr %x.addr
br label %return

return: ; preds = %entry,%if.then
%retval = load <2 x double>, ptr %x.addr
ret <2 x double> %retval
}

define double @phi_without_nsz(double %x) "no-signed-zeros-fp-math"="false" {
; CHECK-LABEL: define double @phi_without_nsz(
; CHECK-SAME: double [[X:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret double [[X_ADDR_0]]
entry:
%x.addr = alloca double
%cmp = fcmp olt double %x, 0.0
br i1 %cmp, label %if.then, label %return

if.then: ; preds = %entry
%fneg = fneg double %x
store double %fneg, ptr %x.addr
br label %return

return: ; preds = %entry,%if.then
%retval = load double, ptr %x.addr
ret double %retval
}

0 comments on commit cd1e6a5

Please sign in to comment.