Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Coro] [async] Disable inlining in async coroutine splitting for swifttailcc thunks #87549

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions llvm/lib/Transforms/Coroutines/CoroSplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,16 +256,18 @@ static bool replaceCoroEndAsync(AnyCoroEndInst *End) {
// Insert the return instruction.
Builder.SetInsertPoint(End);
Builder.CreateRetVoid();
InlineFunctionInfo FnInfo;

// Remove the rest of the block, by splitting it into an unreachable block.
auto *BB = End->getParent();
BB->splitBasicBlock(End);
BB->getTerminator()->eraseFromParent();

auto InlineRes = InlineFunction(*MustTailCall, FnInfo);
assert(InlineRes.isSuccess() && "Expected inlining to succeed");
(void)InlineRes;
if (MustTailCallFunc->getCallingConv() != CallingConv::SwiftTail) {
InlineFunctionInfo FnInfo;
auto InlineRes = InlineFunction(*MustTailCall, FnInfo);
assert(InlineRes.isSuccess() && "Expected inlining to succeed");
(void)InlineRes;
}

// We have cleaned up the coro.end block above.
return false;
Expand Down Expand Up @@ -1882,8 +1884,11 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
auto *TailCall = coro::createMustTailCall(Suspend->getDebugLoc(), Fn, TTI,
FnArgs, Builder);
Builder.CreateRetVoid();
InlineFunctionInfo FnInfo;
(void)InlineFunction(*TailCall, FnInfo);

if (Fn->getCallingConv() != CallingConv::SwiftTail) {
InlineFunctionInfo FnInfo;
(void)InlineFunction(*TailCall, FnInfo);
}

// Replace the lvm.coro.async.resume intrisic call.
replaceAsyncResumeFunction(Suspend, Continuation);
Expand Down
39 changes: 39 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-async-c-cc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; RUN: opt < %s -O0 -S | FileCheck --check-prefixes=CHECK %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@0 = internal constant { i32, i32 } { i32 trunc (i64 sub (i64 ptrtoint (ptr @craSH to i64), i64 ptrtoint (ptr getelementptr inbounds ({ i32, i32 }, ptr @0, i32 0, i32 1) to i64)) to i32), i32 64 }

define dso_local void @af_suspend_fn(ptr %0, i64 %1, ptr %2) #0 {
ret void
}

; Make sure that this test case does not crash and produce a split function.
; CHECK: craSH.resume.0

define dso_local void @craSH(ptr %0) #0 {
%2 = call token @llvm.coro.id.async(i32 64, i32 8, i32 0, ptr @0)
%3 = call ptr @llvm.coro.begin(token %2, ptr null)
%4 = getelementptr inbounds { ptr, { ptr, ptr }, i64, { ptr, i1 }, i64, i64 }, ptr poison, i32 0, i32 0
%5 = call ptr @llvm.coro.async.resume()
store ptr %5, ptr %4, align 8
%6 = call { ptr, ptr, ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0p0p0s(i32 0, ptr %5, ptr @ctxt_proj_fn, ptr @af_suspend_fn, ptr poison, i64 -1, ptr poison)
ret void
}

define dso_local ptr @ctxt_proj_fn(ptr %0) #0 {
ret ptr %0
}

declare { ptr, ptr, ptr } @llvm.coro.suspend.async.sl_p0p0p0s(i32, ptr, ptr, ...) #1

declare token @llvm.coro.id.async(i32, i32, i32, ptr) #2

declare ptr @llvm.coro.begin(token, ptr writeonly) #2

declare ptr @llvm.coro.async.resume() #1

attributes #0 = { "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+clwb,+clzero,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mwaitx,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" }
attributes #1 = { nomerge nounwind }
attributes #2 = { nounwind }
158 changes: 158 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-async-mutually-recursive.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
; RUN: opt < %s -passes='default<O2>' -S | FileCheck --check-prefixes=CHECK %s
; RUN: opt < %s -O0 -S | FileCheck --check-prefixes=CHECK-O0 %s


; CHECK-NOT: llvm.coro.suspend.async
; CHECK-O0-NOT: llvm.coro.suspend.async

; This test used to crash during updating the call graph in coro splitting.

target datalayout = "p:64:64:64"

%swift.async_func_pointer = type <{ i32, i32 }>

@"$s1d3fooyySbYaFTu" = hidden global %swift.async_func_pointer <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @"$s1d3fooyySbYaF" to i64), i64 ptrtoint (ptr @"$s1d3fooyySbYaFTu" to i64)) to i32), i32 16 }>
@"$s1d3baryySbYaFTu" = hidden global %swift.async_func_pointer <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @"$s1d3baryySbYaF" to i64), i64 ptrtoint (ptr @"$s1d3baryySbYaFTu" to i64)) to i32), i32 16 }>

define swifttailcc void @"$s1d3fooyySbYaF"(ptr swiftasync %0, i1 %1) {
entry:
%2 = alloca ptr, align 8
%c.debug = alloca i1, align 8
%3 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, ptr @"$s1d3fooyySbYaFTu")
%4 = call ptr @llvm.coro.begin(token %3, ptr null)
store ptr %0, ptr %2, align 8
call void @llvm.memset.p0.i64(ptr align 8 %c.debug, i8 0, i64 1, i1 false)
store i1 %1, ptr %c.debug, align 8
call void asm sideeffect "", "r"(ptr %c.debug)
%5 = load i32, ptr getelementptr inbounds (%swift.async_func_pointer, ptr @"$s1d3baryySbYaFTu", i32 0, i32 1), align 8
%6 = zext i32 %5 to i64
%7 = call swiftcc ptr @swift_task_alloc(i64 %6) #4
call void @llvm.lifetime.start.p0(i64 -1, ptr %7)
%8 = load ptr, ptr %2, align 8
%9 = getelementptr inbounds <{ ptr, ptr }>, ptr %7, i32 0, i32 0
store ptr %8, ptr %9, align 8
%10 = call ptr @llvm.coro.async.resume()
%11 = getelementptr inbounds <{ ptr, ptr }>, ptr %7, i32 0, i32 1
store ptr %10, ptr %11, align 8
%12 = call { ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0s(i32 0, ptr %10, ptr @__swift_async_resume_project_context, ptr @"$s1d3fooyySbYaF.0", ptr @"$s1d3baryySbYaF", ptr %7, i1 %1)
%13 = extractvalue { ptr } %12, 0
%14 = call ptr @__swift_async_resume_project_context(ptr %13)
store ptr %14, ptr %2, align 8
call swiftcc void @swift_task_dealloc(ptr %7) #4
call void @llvm.lifetime.end.p0(i64 -1, ptr %7)
%15 = load ptr, ptr %2, align 8
%16 = getelementptr inbounds <{ ptr, ptr }>, ptr %15, i32 0, i32 1
%17 = load ptr, ptr %16, align 8
%18 = load ptr, ptr %2, align 8
%19 = call i1 (ptr, i1, ...) @llvm.coro.end.async(ptr %4, i1 false, ptr @"$s1d3fooyySbYaF.0.1", ptr %17, ptr %18)
unreachable
}

declare token @llvm.coro.id.async(i32, i32, i32, ptr) #1

declare void @llvm.trap() #2

declare ptr @llvm.coro.begin(token, ptr) #1

declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1 immarg) #3

define hidden swifttailcc void @"$s1d3baryySbYaF"(ptr swiftasync %0, i1 %1) {
entry:
%2 = alloca ptr, align 8
%c.debug = alloca i1, align 8
%3 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, ptr @"$s1d3baryySbYaFTu")
%4 = call ptr @llvm.coro.begin(token %3, ptr null)
store ptr %0, ptr %2, align 8
call void @llvm.memset.p0.i64(ptr align 8 %c.debug, i8 0, i64 1, i1 false)
store i1 %1, ptr %c.debug, align 8
call void asm sideeffect "", "r"(ptr %c.debug)
br i1 %1, label %5, label %17

5: ; preds = %entry
%6 = xor i1 %1, true
%7 = load i32, ptr getelementptr inbounds (%swift.async_func_pointer, ptr @"$s1d3fooyySbYaFTu", i32 0, i32 1), align 8
%8 = zext i32 %7 to i64
%9 = call swiftcc ptr @swift_task_alloc(i64 %8) #4
call void @llvm.lifetime.start.p0(i64 -1, ptr %9)
%10 = load ptr, ptr %2, align 8
%11 = getelementptr inbounds <{ ptr, ptr }>, ptr %9, i32 0, i32 0
store ptr %10, ptr %11, align 8
%12 = call ptr @llvm.coro.async.resume()
%13 = getelementptr inbounds <{ ptr, ptr }>, ptr %9, i32 0, i32 1
store ptr %12, ptr %13, align 8
%14 = call { ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0s(i32 0, ptr %12, ptr @__swift_async_resume_project_context, ptr @"$s1d3baryySbYaF.0.2", ptr @"$s1d3fooyySbYaF", ptr %9, i1 %6)
%15 = extractvalue { ptr } %14, 0
%16 = call ptr @__swift_async_resume_project_context(ptr %15)
store ptr %16, ptr %2, align 8
call swiftcc void @swift_task_dealloc(ptr %9) #4
call void @llvm.lifetime.end.p0(i64 -1, ptr %9)
br label %18

17: ; preds = %entry
br label %18

18: ; preds = %5, %17
%19 = load ptr, ptr %2, align 8
%20 = getelementptr inbounds <{ ptr, ptr }>, ptr %19, i32 0, i32 1
%21 = load ptr, ptr %20, align 8
%22 = load ptr, ptr %2, align 8
%23 = call i1 (ptr, i1, ...) @llvm.coro.end.async(ptr %4, i1 false, ptr @"$s1d3baryySbYaF.0", ptr %21, ptr %22)
unreachable
}

declare swiftcc ptr @swift_task_alloc(i64) #4

declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5

declare ptr @llvm.coro.async.resume() #6

define linkonce_odr hidden ptr @__swift_async_resume_project_context(ptr %0) #7 {
entry:
%1 = load ptr, ptr %0, align 8
%2 = call ptr @llvm.swift.async.context.addr()
store ptr %1, ptr %2, align 8
ret ptr %1
}

declare ptr @llvm.swift.async.context.addr() #1

define internal swifttailcc void @"$s1d3fooyySbYaF.0"(ptr %0, ptr %1, i1 %2) #8 {
entry:
musttail call swifttailcc void %0(ptr swiftasync %1, i1 %2)
ret void
}

declare { ptr } @llvm.coro.suspend.async.sl_p0s(i32, ptr, ptr, ...) #6

declare swiftcc void @swift_task_dealloc(ptr) #4

declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5

define internal swifttailcc void @"$s1d3fooyySbYaF.0.1"(ptr %0, ptr %1) #8 {
entry:
musttail call swifttailcc void %0(ptr swiftasync %1)
ret void
}

declare i1 @llvm.coro.end.async(ptr, i1, ...) #1

define internal swifttailcc void @"$s1d3baryySbYaF.0"(ptr %0, ptr %1) #8 {
entry:
musttail call swifttailcc void %0(ptr swiftasync %1)
ret void
}

define internal swifttailcc void @"$s1d3baryySbYaF.0.2"(ptr %0, ptr %1, i1 %2) #8 {
entry:
musttail call swifttailcc void %0(ptr swiftasync %1, i1 %2)
ret void
}

attributes #1 = { nounwind }
attributes #2 = { cold noreturn nounwind }
attributes #3 = { nocallback nofree nounwind willreturn}
attributes #4 = { nounwind }
attributes #5 = { nocallback nofree nosync nounwind willreturn }
attributes #6 = { nomerge nounwind }
attributes #7 = { alwaysinline nounwind }
attributes #8 = { alwaysinline nounwind }
30 changes: 15 additions & 15 deletions llvm/test/Transforms/Coroutines/swift-async-dbg.ll
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
; RUN: opt -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s
; RUN: opt -mtriple='x86_64' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s
; RUN: opt -mtriple='i386-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s --check-prefix=NOENTRY
; RUN: opt -mtriple='armv7-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s --check-prefix=NOENTRY
; RUN: opt -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s
; RUN: opt -mtriple='x86_64' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s
; RUN: opt -mtriple='i386-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s --check-prefix=NOENTRY
; RUN: opt -mtriple='armv7-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s --check-prefix=NOENTRY

;; Replicate those tests with non-instruction debug markers.
; RUN: opt --try-experimental-debuginfo-iterators -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s
; RUN: opt --try-experimental-debuginfo-iterators -mtriple='x86_64' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s
; RUN: opt --try-experimental-debuginfo-iterators -mtriple='i386-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s --check-prefix=NOENTRY
; RUN: opt --try-experimental-debuginfo-iterators -mtriple='armv7-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s --check-prefix=NOENTRY
; RUN: opt --try-experimental-debuginfo-iterators -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s
; RUN: opt --try-experimental-debuginfo-iterators -mtriple='x86_64' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s
; RUN: opt --try-experimental-debuginfo-iterators -mtriple='i386-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s --check-prefix=NOENTRY
; RUN: opt --try-experimental-debuginfo-iterators -mtriple='armv7-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s --check-prefix=NOENTRY

; NOENTRY-NOT: OP_llvm_entry_value

Expand Down Expand Up @@ -93,37 +93,37 @@ define swifttailcc void @coroutineA(ptr swiftasync %arg) !dbg !48 {
@coroutineBTu = global <{i32, i32}> <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @"coroutineB" to i64), i64 ptrtoint (ptr @"coroutineBTu" to i64)) to i32), i32 16 }>, align 8
@coroutineATu = global <{i32, i32}> <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @"coroutineA" to i64), i64 ptrtoint (ptr @"coroutineATu" to i64)) to i32), i32 16 }>, align 8

define weak_odr hidden ptr @__swift_async_resume_get_context(ptr %arg) !dbg !64 {
define weak_odr hidden ptr @__swift_async_resume_get_context(ptr %arg) alwaysinline !dbg !64 {
ret ptr %arg, !dbg !65
}
define hidden swifttailcc void @coroutineA.1(ptr %arg, i64 %arg1, i64 %arg2, ptr %arg3) !dbg !66 {
define hidden swifttailcc void @coroutineA.1(ptr %arg, i64 %arg1, i64 %arg2, ptr %arg3) alwaysinline !dbg !66 {
musttail call swifttailcc void @swift_task_switch(ptr swiftasync %arg3, ptr %arg, i64 %arg1, i64 %arg2), !dbg !67
ret void, !dbg !67
}

define weak_odr hidden ptr @__swift_async_resume_project_context(ptr %arg) !dbg !68 {
define weak_odr hidden ptr @__swift_async_resume_project_context(ptr %arg) alwaysinline !dbg !68 {
%i1 = load ptr, ptr %arg, align 8, !dbg !69
%i2 = call ptr @llvm.swift.async.context.addr(), !dbg !69
store ptr %i1, ptr %i2, align 8, !dbg !69
ret ptr %i1, !dbg !69
}
define hidden swifttailcc void @coroutineA.0(ptr %arg, ptr %arg1) !dbg !70 {
define hidden swifttailcc void @coroutineA.0(ptr %arg, ptr %arg1) alwaysinline !dbg !70 {
musttail call swifttailcc void %arg(ptr swiftasync %arg1), !dbg !71
ret void, !dbg !71
}
define hidden swifttailcc void @coroutineA.0.1(ptr %arg, ptr %arg1) !dbg !72 {
define hidden swifttailcc void @coroutineA.0.1(ptr %arg, ptr %arg1) alwaysinline !dbg !72 {
musttail call swifttailcc void %arg(ptr swiftasync %arg1), !dbg !73
ret void, !dbg !73
}
define swifttailcc void @coroutineB(ptr swiftasync %arg) !dbg !37 {
define swifttailcc void @coroutineB(ptr swiftasync %arg) alwaysinline !dbg !37 {
%i2 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, ptr nonnull @coroutineBTu)
%i3 = call ptr @llvm.coro.begin(token %i2, ptr null)
%i6 = getelementptr inbounds <{ ptr, ptr }>, ptr %arg, i64 0, i32 1, !dbg !42
%i712 = load ptr, ptr %i6, align 8, !dbg !42
%i10 = call i1 (ptr, i1, ...) @llvm.coro.end.async(ptr %i3, i1 false, ptr nonnull @coroutineB.0, ptr %i712, ptr %arg), !dbg !42
unreachable, !dbg !42
}
define hidden swifttailcc void @coroutineB.0(ptr %arg, ptr %arg1) !dbg !44 {
define hidden swifttailcc void @coroutineB.0(ptr %arg, ptr %arg1) alwaysinline !dbg !44 {
musttail call swifttailcc void %arg(ptr swiftasync %arg1), !dbg !47
ret void, !dbg !47
}
Expand Down
Loading