Skip to content

Commit

Permalink
[Attributor] Change function internalization to not replace uses in i…
Browse files Browse the repository at this point in the history
…nternalized callers

The current implementation of function internalization creats a copy of each
function and replaces every use. This has the downside that the external
versions of the functions will call into the internalized versions of the
functions. This prevents them from being fully independent of eachother. This
patch replaces the current internalization scheme with a method that creates
all the copies of the functions intended to be internalized first and then
replaces the uses as long as their caller is not already internalized.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D106931

(cherry picked from commit adbaa39)
  • Loading branch information
jhuber6 authored and tstellar committed Aug 4, 2021
1 parent 4783a6c commit 6d04cd4
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 44 deletions.
20 changes: 20 additions & 0 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1855,6 +1855,10 @@ struct Attributor {
///
static void createShallowWrapper(Function &F);

/// Returns true if the function \p F can be internalized. i.e. it has a
/// compatible linkage.
static bool isInternalizable(Function &F);

/// Make another copy of the function \p F such that the copied version has
/// internal linkage afterwards and can be analysed. Then we replace all uses
/// of the original function to the copied one
Expand All @@ -1870,6 +1874,22 @@ struct Attributor {
/// null pointer.
static Function *internalizeFunction(Function &F, bool Force = false);

/// Make copies of each function in the set \p FnSet such that the copied
/// version has internal linkage afterwards and can be analysed. Then we
/// replace all uses of the original function to the copied one. The map
/// \p FnMap contains a mapping of functions to their internalized versions.
///
/// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
/// linkage can be internalized because these linkages guarantee that other
/// definitions with the same name have the same semantics as this one.
///
/// This version will internalize all the functions in the set \p FnSet at
/// once and then replace the uses. This prevents internalized functions being
/// called by external functions when there is an internalized version in the
/// module.
static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
DenseMap<Function *, Function *> &FnMap);

/// Return the data layout associated with the anchor scope.
const DataLayout &getDataLayout() const { return InfoCache.DL; }

Expand Down
104 changes: 70 additions & 34 deletions llvm/lib/Transforms/IPO/Attributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1925,49 +1925,85 @@ void Attributor::createShallowWrapper(Function &F) {
NumFnShallowWrappersCreated++;
}

bool Attributor::isInternalizable(Function &F) {
if (F.isDeclaration() || F.hasLocalLinkage() ||
GlobalValue::isInterposableLinkage(F.getLinkage()))
return false;
return true;
}

Function *Attributor::internalizeFunction(Function &F, bool Force) {
if (!AllowDeepWrapper && !Force)
return nullptr;
if (F.isDeclaration() || F.hasLocalLinkage() ||
GlobalValue::isInterposableLinkage(F.getLinkage()))
if (!isInternalizable(F))
return nullptr;

Module &M = *F.getParent();
FunctionType *FnTy = F.getFunctionType();

// create a copy of the current function
Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
F.getName() + ".internalized");
ValueToValueMapTy VMap;
auto *NewFArgIt = Copied->arg_begin();
for (auto &Arg : F.args()) {
auto ArgName = Arg.getName();
NewFArgIt->setName(ArgName);
VMap[&Arg] = &(*NewFArgIt++);
}
SmallVector<ReturnInst *, 8> Returns;
SmallPtrSet<Function *, 2> FnSet = {&F};
DenseMap<Function *, Function *> InternalizedFns;
internalizeFunctions(FnSet, InternalizedFns);

// Copy the body of the original function to the new one
CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly,
Returns);

// Set the linakage and visibility late as CloneFunctionInto has some implicit
// requirements.
Copied->setVisibility(GlobalValue::DefaultVisibility);
Copied->setLinkage(GlobalValue::PrivateLinkage);
return InternalizedFns[&F];
}

// Copy metadata
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
F.getAllMetadata(MDs);
for (auto MDIt : MDs)
if (!Copied->hasMetadata())
Copied->addMetadata(MDIt.first, *MDIt.second);
bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
DenseMap<Function *, Function *> &FnMap) {
for (Function *F : FnSet)
if (!Attributor::isInternalizable(*F))
return false;

M.getFunctionList().insert(F.getIterator(), Copied);
F.replaceAllUsesWith(Copied);
Copied->setDSOLocal(true);
FnMap.clear();
// Generate the internalized version of each function.
for (Function *F : FnSet) {
Module &M = *F->getParent();
FunctionType *FnTy = F->getFunctionType();

// Create a copy of the current function
Function *Copied =
Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(),
F->getName() + ".internalized");
ValueToValueMapTy VMap;
auto *NewFArgIt = Copied->arg_begin();
for (auto &Arg : F->args()) {
auto ArgName = Arg.getName();
NewFArgIt->setName(ArgName);
VMap[&Arg] = &(*NewFArgIt++);
}
SmallVector<ReturnInst *, 8> Returns;

// Copy the body of the original function to the new one
CloneFunctionInto(Copied, F, VMap,
CloneFunctionChangeType::LocalChangesOnly, Returns);

// Set the linakage and visibility late as CloneFunctionInto has some
// implicit requirements.
Copied->setVisibility(GlobalValue::DefaultVisibility);
Copied->setLinkage(GlobalValue::PrivateLinkage);

// Copy metadata
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
F->getAllMetadata(MDs);
for (auto MDIt : MDs)
if (!Copied->hasMetadata())
Copied->addMetadata(MDIt.first, *MDIt.second);

M.getFunctionList().insert(F->getIterator(), Copied);
Copied->setDSOLocal(true);
FnMap[F] = Copied;
}

// Replace all uses of the old function with the new internalized function
// unless the caller is a function that was just internalized.
for (Function *F : FnSet) {
auto &InternalizedFn = FnMap[F];
auto IsNotInternalized = [&](Use &U) -> bool {
if (auto *CB = dyn_cast<CallBase>(U.getUser()))
return !FnMap.lookup(CB->getCaller());
return false;
};
F->replaceUsesWithIf(InternalizedFn, IsNotInternalized);
}

return Copied;
return true;
}

bool Attributor::isValidFunctionSignatureRewrite(
Expand Down
16 changes: 10 additions & 6 deletions llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4176,28 +4176,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
ORE.emit([&]() {
OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
return ORA << "Could not internalize function. "
<< "Some optimizations may not be possible.";
<< "Some optimizations may not be possible. [OMP140]";
});
};

// Create internal copies of each function if this is a kernel Module. This
// allows iterprocedural passes to see every call edge.
DenseSet<const Function *> InternalizedFuncs;
if (isOpenMPDevice(M))
DenseMap<Function *, Function *> InternalizedMap;
if (isOpenMPDevice(M)) {
SmallPtrSet<Function *, 16> InternalizeFns;
for (Function &F : M)
if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
!DisableInternalization) {
if (Attributor::internalizeFunction(F, /* Force */ true)) {
InternalizedFuncs.insert(&F);
if (Attributor::isInternalizable(F)) {
InternalizeFns.insert(&F);
} else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
EmitRemark(F);
}
}

Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
}

// Look at every function in the Module unless it was internalized.
SmallVector<Function *, 16> SCC;
for (Function &F : M)
if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
SCC.push_back(&F);

if (SCC.empty())
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/OpenMP/custom_state_machines.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1664,15 +1664,15 @@ attributes #10 = { convergent nounwind readonly willreturn }
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; CHECK: if.then:
; CHECK-NEXT: br label [[RETURN:%.*]]
; CHECK: if.end:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]]
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]]
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR8]]
; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR8]]
; CHECK-NEXT: br label [[RETURN]]
; CHECK: return:
; CHECK-NEXT: ret void
Expand Down

0 comments on commit 6d04cd4

Please sign in to comment.