diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 3a43b1edcaba37..c965945d88b79a 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -256,16 +256,18 @@ static bool replaceCoroEndAsync(AnyCoroEndInst *End) { // Insert the return instruction. Builder.SetInsertPoint(End); Builder.CreateRetVoid(); - InlineFunctionInfo FnInfo; // Remove the rest of the block, by splitting it into an unreachable block. auto *BB = End->getParent(); BB->splitBasicBlock(End); BB->getTerminator()->eraseFromParent(); - auto InlineRes = InlineFunction(*MustTailCall, FnInfo); - assert(InlineRes.isSuccess() && "Expected inlining to succeed"); - (void)InlineRes; + if (MustTailCallFunc->getCallingConv() != CallingConv::SwiftTail) { + InlineFunctionInfo FnInfo; + auto InlineRes = InlineFunction(*MustTailCall, FnInfo); + assert(InlineRes.isSuccess() && "Expected inlining to succeed"); + (void)InlineRes; + } // We have cleaned up the coro.end block above. return false; @@ -1882,8 +1884,11 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape, auto *TailCall = coro::createMustTailCall(Suspend->getDebugLoc(), Fn, TTI, FnArgs, Builder); Builder.CreateRetVoid(); - InlineFunctionInfo FnInfo; - (void)InlineFunction(*TailCall, FnInfo); + + if (Fn->getCallingConv() != CallingConv::SwiftTail) { + InlineFunctionInfo FnInfo; + (void)InlineFunction(*TailCall, FnInfo); + } // Replace the lvm.coro.async.resume intrisic call. replaceAsyncResumeFunction(Suspend, Continuation); diff --git a/llvm/test/Transforms/Coroutines/coro-async-c-cc.ll b/llvm/test/Transforms/Coroutines/coro-async-c-cc.ll new file mode 100644 index 00000000000000..ca10181fabd029 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-async-c-cc.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -O0 -S | FileCheck --check-prefixes=CHECK %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@0 = internal constant { i32, i32 } { i32 trunc (i64 sub (i64 ptrtoint (ptr @craSH to i64), i64 ptrtoint (ptr getelementptr inbounds ({ i32, i32 }, ptr @0, i32 0, i32 1) to i64)) to i32), i32 64 } + +define dso_local void @af_suspend_fn(ptr %0, i64 %1, ptr %2) #0 { + ret void +} + +; Make sure that this test case does not crash and produce a split function. +; CHECK: craSH.resume.0 + +define dso_local void @craSH(ptr %0) #0 { + %2 = call token @llvm.coro.id.async(i32 64, i32 8, i32 0, ptr @0) + %3 = call ptr @llvm.coro.begin(token %2, ptr null) + %4 = getelementptr inbounds { ptr, { ptr, ptr }, i64, { ptr, i1 }, i64, i64 }, ptr poison, i32 0, i32 0 + %5 = call ptr @llvm.coro.async.resume() + store ptr %5, ptr %4, align 8 + %6 = call { ptr, ptr, ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0p0p0s(i32 0, ptr %5, ptr @ctxt_proj_fn, ptr @af_suspend_fn, ptr poison, i64 -1, ptr poison) + ret void +} + +define dso_local ptr @ctxt_proj_fn(ptr %0) #0 { + ret ptr %0 +} + +declare { ptr, ptr, ptr } @llvm.coro.suspend.async.sl_p0p0p0s(i32, ptr, ptr, ...) #1 + +declare token @llvm.coro.id.async(i32, i32, i32, ptr) #2 + +declare ptr @llvm.coro.begin(token, ptr writeonly) #2 + +declare ptr @llvm.coro.async.resume() #1 + +attributes #0 = { "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+clwb,+clzero,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mwaitx,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" } +attributes #1 = { nomerge nounwind } +attributes #2 = { nounwind } diff --git a/llvm/test/Transforms/Coroutines/coro-async-mutually-recursive.ll b/llvm/test/Transforms/Coroutines/coro-async-mutually-recursive.ll new file mode 100644 index 00000000000000..4931fe998daa60 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-async-mutually-recursive.ll @@ -0,0 +1,158 @@ +; RUN: opt < %s -passes='default' -S | FileCheck --check-prefixes=CHECK %s +; RUN: opt < %s -O0 -S | FileCheck --check-prefixes=CHECK-O0 %s + + +; CHECK-NOT: llvm.coro.suspend.async +; CHECK-O0-NOT: llvm.coro.suspend.async + +; This test used to crash during updating the call graph in coro splitting. + +target datalayout = "p:64:64:64" + +%swift.async_func_pointer = type <{ i32, i32 }> + +@"$s1d3fooyySbYaFTu" = hidden global %swift.async_func_pointer <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @"$s1d3fooyySbYaF" to i64), i64 ptrtoint (ptr @"$s1d3fooyySbYaFTu" to i64)) to i32), i32 16 }> +@"$s1d3baryySbYaFTu" = hidden global %swift.async_func_pointer <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @"$s1d3baryySbYaF" to i64), i64 ptrtoint (ptr @"$s1d3baryySbYaFTu" to i64)) to i32), i32 16 }> + +define swifttailcc void @"$s1d3fooyySbYaF"(ptr swiftasync %0, i1 %1) { +entry: + %2 = alloca ptr, align 8 + %c.debug = alloca i1, align 8 + %3 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, ptr @"$s1d3fooyySbYaFTu") + %4 = call ptr @llvm.coro.begin(token %3, ptr null) + store ptr %0, ptr %2, align 8 + call void @llvm.memset.p0.i64(ptr align 8 %c.debug, i8 0, i64 1, i1 false) + store i1 %1, ptr %c.debug, align 8 + call void asm sideeffect "", "r"(ptr %c.debug) + %5 = load i32, ptr getelementptr inbounds (%swift.async_func_pointer, ptr @"$s1d3baryySbYaFTu", i32 0, i32 1), align 8 + %6 = zext i32 %5 to i64 + %7 = call swiftcc ptr @swift_task_alloc(i64 %6) #4 + call void @llvm.lifetime.start.p0(i64 -1, ptr %7) + %8 = load ptr, ptr %2, align 8 + %9 = getelementptr inbounds <{ ptr, ptr }>, ptr %7, i32 0, i32 0 + store ptr %8, ptr %9, align 8 + %10 = call ptr @llvm.coro.async.resume() + %11 = getelementptr inbounds <{ ptr, ptr }>, ptr %7, i32 0, i32 1 + store ptr %10, ptr %11, align 8 + %12 = call { ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0s(i32 0, ptr %10, ptr @__swift_async_resume_project_context, ptr @"$s1d3fooyySbYaF.0", ptr @"$s1d3baryySbYaF", ptr %7, i1 %1) + %13 = extractvalue { ptr } %12, 0 + %14 = call ptr @__swift_async_resume_project_context(ptr %13) + store ptr %14, ptr %2, align 8 + call swiftcc void @swift_task_dealloc(ptr %7) #4 + call void @llvm.lifetime.end.p0(i64 -1, ptr %7) + %15 = load ptr, ptr %2, align 8 + %16 = getelementptr inbounds <{ ptr, ptr }>, ptr %15, i32 0, i32 1 + %17 = load ptr, ptr %16, align 8 + %18 = load ptr, ptr %2, align 8 + %19 = call i1 (ptr, i1, ...) @llvm.coro.end.async(ptr %4, i1 false, ptr @"$s1d3fooyySbYaF.0.1", ptr %17, ptr %18) + unreachable +} + +declare token @llvm.coro.id.async(i32, i32, i32, ptr) #1 + +declare void @llvm.trap() #2 + +declare ptr @llvm.coro.begin(token, ptr) #1 + +declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1 immarg) #3 + +define hidden swifttailcc void @"$s1d3baryySbYaF"(ptr swiftasync %0, i1 %1) { +entry: + %2 = alloca ptr, align 8 + %c.debug = alloca i1, align 8 + %3 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, ptr @"$s1d3baryySbYaFTu") + %4 = call ptr @llvm.coro.begin(token %3, ptr null) + store ptr %0, ptr %2, align 8 + call void @llvm.memset.p0.i64(ptr align 8 %c.debug, i8 0, i64 1, i1 false) + store i1 %1, ptr %c.debug, align 8 + call void asm sideeffect "", "r"(ptr %c.debug) + br i1 %1, label %5, label %17 + +5: ; preds = %entry + %6 = xor i1 %1, true + %7 = load i32, ptr getelementptr inbounds (%swift.async_func_pointer, ptr @"$s1d3fooyySbYaFTu", i32 0, i32 1), align 8 + %8 = zext i32 %7 to i64 + %9 = call swiftcc ptr @swift_task_alloc(i64 %8) #4 + call void @llvm.lifetime.start.p0(i64 -1, ptr %9) + %10 = load ptr, ptr %2, align 8 + %11 = getelementptr inbounds <{ ptr, ptr }>, ptr %9, i32 0, i32 0 + store ptr %10, ptr %11, align 8 + %12 = call ptr @llvm.coro.async.resume() + %13 = getelementptr inbounds <{ ptr, ptr }>, ptr %9, i32 0, i32 1 + store ptr %12, ptr %13, align 8 + %14 = call { ptr } (i32, ptr, ptr, ...) @llvm.coro.suspend.async.sl_p0s(i32 0, ptr %12, ptr @__swift_async_resume_project_context, ptr @"$s1d3baryySbYaF.0.2", ptr @"$s1d3fooyySbYaF", ptr %9, i1 %6) + %15 = extractvalue { ptr } %14, 0 + %16 = call ptr @__swift_async_resume_project_context(ptr %15) + store ptr %16, ptr %2, align 8 + call swiftcc void @swift_task_dealloc(ptr %9) #4 + call void @llvm.lifetime.end.p0(i64 -1, ptr %9) + br label %18 + +17: ; preds = %entry + br label %18 + +18: ; preds = %5, %17 + %19 = load ptr, ptr %2, align 8 + %20 = getelementptr inbounds <{ ptr, ptr }>, ptr %19, i32 0, i32 1 + %21 = load ptr, ptr %20, align 8 + %22 = load ptr, ptr %2, align 8 + %23 = call i1 (ptr, i1, ...) @llvm.coro.end.async(ptr %4, i1 false, ptr @"$s1d3baryySbYaF.0", ptr %21, ptr %22) + unreachable +} + +declare swiftcc ptr @swift_task_alloc(i64) #4 + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5 + +declare ptr @llvm.coro.async.resume() #6 + +define linkonce_odr hidden ptr @__swift_async_resume_project_context(ptr %0) #7 { +entry: + %1 = load ptr, ptr %0, align 8 + %2 = call ptr @llvm.swift.async.context.addr() + store ptr %1, ptr %2, align 8 + ret ptr %1 +} + +declare ptr @llvm.swift.async.context.addr() #1 + +define internal swifttailcc void @"$s1d3fooyySbYaF.0"(ptr %0, ptr %1, i1 %2) #8 { +entry: + musttail call swifttailcc void %0(ptr swiftasync %1, i1 %2) + ret void +} + +declare { ptr } @llvm.coro.suspend.async.sl_p0s(i32, ptr, ptr, ...) #6 + +declare swiftcc void @swift_task_dealloc(ptr) #4 + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5 + +define internal swifttailcc void @"$s1d3fooyySbYaF.0.1"(ptr %0, ptr %1) #8 { +entry: + musttail call swifttailcc void %0(ptr swiftasync %1) + ret void +} + +declare i1 @llvm.coro.end.async(ptr, i1, ...) #1 + +define internal swifttailcc void @"$s1d3baryySbYaF.0"(ptr %0, ptr %1) #8 { +entry: + musttail call swifttailcc void %0(ptr swiftasync %1) + ret void +} + +define internal swifttailcc void @"$s1d3baryySbYaF.0.2"(ptr %0, ptr %1, i1 %2) #8 { +entry: + musttail call swifttailcc void %0(ptr swiftasync %1, i1 %2) + ret void +} + +attributes #1 = { nounwind } +attributes #2 = { cold noreturn nounwind } +attributes #3 = { nocallback nofree nounwind willreturn} +attributes #4 = { nounwind } +attributes #5 = { nocallback nofree nosync nounwind willreturn } +attributes #6 = { nomerge nounwind } +attributes #7 = { alwaysinline nounwind } +attributes #8 = { alwaysinline nounwind } diff --git a/llvm/test/Transforms/Coroutines/swift-async-dbg.ll b/llvm/test/Transforms/Coroutines/swift-async-dbg.ll index 74edf7a3f3a540..a78bcdf0ddee23 100644 --- a/llvm/test/Transforms/Coroutines/swift-async-dbg.ll +++ b/llvm/test/Transforms/Coroutines/swift-async-dbg.ll @@ -1,13 +1,13 @@ -; RUN: opt -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s -; RUN: opt -mtriple='x86_64' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s -; RUN: opt -mtriple='i386-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s --check-prefix=NOENTRY -; RUN: opt -mtriple='armv7-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s --check-prefix=NOENTRY +; RUN: opt -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s +; RUN: opt -mtriple='x86_64' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s +; RUN: opt -mtriple='i386-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s --check-prefix=NOENTRY +; RUN: opt -mtriple='armv7-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s --check-prefix=NOENTRY ;; Replicate those tests with non-instruction debug markers. -; RUN: opt --try-experimental-debuginfo-iterators -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s -; RUN: opt --try-experimental-debuginfo-iterators -mtriple='x86_64' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s -; RUN: opt --try-experimental-debuginfo-iterators -mtriple='i386-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s --check-prefix=NOENTRY -; RUN: opt --try-experimental-debuginfo-iterators -mtriple='armv7-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg)' -o - | FileCheck %s --check-prefix=NOENTRY +; RUN: opt --try-experimental-debuginfo-iterators -mtriple='arm64-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s +; RUN: opt --try-experimental-debuginfo-iterators -mtriple='x86_64' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s +; RUN: opt --try-experimental-debuginfo-iterators -mtriple='i386-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s --check-prefix=NOENTRY +; RUN: opt --try-experimental-debuginfo-iterators -mtriple='armv7-' %s -S -passes='module(coro-early),cgscc(coro-split,simplifycfg),always-inline' -o - | FileCheck %s --check-prefix=NOENTRY ; NOENTRY-NOT: OP_llvm_entry_value @@ -93,29 +93,29 @@ define swifttailcc void @coroutineA(ptr swiftasync %arg) !dbg !48 { @coroutineBTu = global <{i32, i32}> <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @"coroutineB" to i64), i64 ptrtoint (ptr @"coroutineBTu" to i64)) to i32), i32 16 }>, align 8 @coroutineATu = global <{i32, i32}> <{ i32 trunc (i64 sub (i64 ptrtoint (ptr @"coroutineA" to i64), i64 ptrtoint (ptr @"coroutineATu" to i64)) to i32), i32 16 }>, align 8 -define weak_odr hidden ptr @__swift_async_resume_get_context(ptr %arg) !dbg !64 { +define weak_odr hidden ptr @__swift_async_resume_get_context(ptr %arg) alwaysinline !dbg !64 { ret ptr %arg, !dbg !65 } -define hidden swifttailcc void @coroutineA.1(ptr %arg, i64 %arg1, i64 %arg2, ptr %arg3) !dbg !66 { +define hidden swifttailcc void @coroutineA.1(ptr %arg, i64 %arg1, i64 %arg2, ptr %arg3) alwaysinline !dbg !66 { musttail call swifttailcc void @swift_task_switch(ptr swiftasync %arg3, ptr %arg, i64 %arg1, i64 %arg2), !dbg !67 ret void, !dbg !67 } -define weak_odr hidden ptr @__swift_async_resume_project_context(ptr %arg) !dbg !68 { +define weak_odr hidden ptr @__swift_async_resume_project_context(ptr %arg) alwaysinline !dbg !68 { %i1 = load ptr, ptr %arg, align 8, !dbg !69 %i2 = call ptr @llvm.swift.async.context.addr(), !dbg !69 store ptr %i1, ptr %i2, align 8, !dbg !69 ret ptr %i1, !dbg !69 } -define hidden swifttailcc void @coroutineA.0(ptr %arg, ptr %arg1) !dbg !70 { +define hidden swifttailcc void @coroutineA.0(ptr %arg, ptr %arg1) alwaysinline !dbg !70 { musttail call swifttailcc void %arg(ptr swiftasync %arg1), !dbg !71 ret void, !dbg !71 } -define hidden swifttailcc void @coroutineA.0.1(ptr %arg, ptr %arg1) !dbg !72 { +define hidden swifttailcc void @coroutineA.0.1(ptr %arg, ptr %arg1) alwaysinline !dbg !72 { musttail call swifttailcc void %arg(ptr swiftasync %arg1), !dbg !73 ret void, !dbg !73 } -define swifttailcc void @coroutineB(ptr swiftasync %arg) !dbg !37 { +define swifttailcc void @coroutineB(ptr swiftasync %arg) alwaysinline !dbg !37 { %i2 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, ptr nonnull @coroutineBTu) %i3 = call ptr @llvm.coro.begin(token %i2, ptr null) %i6 = getelementptr inbounds <{ ptr, ptr }>, ptr %arg, i64 0, i32 1, !dbg !42 @@ -123,7 +123,7 @@ define swifttailcc void @coroutineB(ptr swiftasync %arg) !dbg !37 { %i10 = call i1 (ptr, i1, ...) @llvm.coro.end.async(ptr %i3, i1 false, ptr nonnull @coroutineB.0, ptr %i712, ptr %arg), !dbg !42 unreachable, !dbg !42 } -define hidden swifttailcc void @coroutineB.0(ptr %arg, ptr %arg1) !dbg !44 { +define hidden swifttailcc void @coroutineB.0(ptr %arg, ptr %arg1) alwaysinline !dbg !44 { musttail call swifttailcc void %arg(ptr swiftasync %arg1), !dbg !47 ret void, !dbg !47 }