From b7e5597491a7880934f927edb506605d3368794e Mon Sep 17 00:00:00 2001 From: Erik Desjardins Date: Fri, 18 Feb 2022 15:57:10 -0500 Subject: [PATCH 1/5] Use undef for partially-uninit constants up to 1024 bytes There needs to be some limit to avoid perf regressions on large arrays with undef in each element (see comment in the code). --- compiler/rustc_codegen_llvm/src/consts.rs | 6 ++-- compiler/rustc_session/src/options.rs | 4 +-- src/test/codegen/consts.rs | 4 +-- .../uninit-consts-allow-partially-uninit.rs | 35 ------------------- src/test/codegen/uninit-consts.rs | 17 +++++++-- 5 files changed, 23 insertions(+), 43 deletions(-) delete mode 100644 src/test/codegen/uninit-consts-allow-partially-uninit.rs diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs index 6707de933522b..ddd87c5b24d55 100644 --- a/compiler/rustc_codegen_llvm/src/consts.rs +++ b/compiler/rustc_codegen_llvm/src/consts.rs @@ -53,8 +53,10 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> } }; - // Generating partially-uninit consts inhibits optimizations, so it is disabled by default. - // See https://github.com/rust-lang/rust/issues/84565. + // Generating partially-uninit consts is limited to small allocations, + // to avoid the cost of generating large complex const expressions. + // For example, `[(u32, u8); 1024 * 1024]` contains uninit padding in each element, + // and would result in `{ [5 x i8] zeroinitializer, [3 x i8] undef, ...repeat 1M times... }`. let allow_partially_uninit = match cx.sess().opts.debugging_opts.partially_uninit_const_threshold { Some(max) => range.len() <= max, diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index c069144fa9f1c..59ce7cdb60362 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1341,9 +1341,9 @@ options! { "panic strategy for panics in drops"), parse_only: bool = (false, parse_bool, [UNTRACKED], "parse only; do not compile, assemble, or link (default: no)"), - partially_uninit_const_threshold: Option = (None, parse_opt_number, [TRACKED], + partially_uninit_const_threshold: Option = (Some(1024), parse_opt_number, [TRACKED], "allow generating const initializers with mixed init/uninit bytes, \ - and set the maximum total size of a const allocation for which this is allowed (default: never)"), + and set the maximum total size of a const allocation for which this is allowed (default: 1024 bytes)"), perf_stats: bool = (false, parse_bool, [UNTRACKED], "print some performance-related statistics (default: no)"), pick_stable_methods_before_any_unstable: bool = (true, parse_bool, [TRACKED], diff --git a/src/test/codegen/consts.rs b/src/test/codegen/consts.rs index 7f945299c22a6..e47a9f9ee2011 100644 --- a/src/test/codegen/consts.rs +++ b/src/test/codegen/consts.rs @@ -43,7 +43,7 @@ pub fn inline_enum_const() -> E { #[no_mangle] pub fn low_align_const() -> E { // Check that low_align_const and high_align_const use the same constant - // CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 2 %1, i8* align 2 getelementptr inbounds (<{ [8 x i8] }>, <{ [8 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false) + // CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 2 %1, i8* align 2 getelementptr inbounds (<{ [4 x i8], [4 x i8] }>, <{ [4 x i8], [4 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false) *&E::A(0) } @@ -51,6 +51,6 @@ pub fn low_align_const() -> E { #[no_mangle] pub fn high_align_const() -> E { // Check that low_align_const and high_align_const use the same constant - // CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [8 x i8] }>, <{ [8 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false) + // CHECK: memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [4 x i8], [4 x i8] }>, <{ [4 x i8], [4 x i8] }>* [[LOW_HIGH]], i32 0, i32 0, i32 0), i{{(32|64)}} 8, i1 false) *&E::A(0) } diff --git a/src/test/codegen/uninit-consts-allow-partially-uninit.rs b/src/test/codegen/uninit-consts-allow-partially-uninit.rs deleted file mode 100644 index f7420e4126ed0..0000000000000 --- a/src/test/codegen/uninit-consts-allow-partially-uninit.rs +++ /dev/null @@ -1,35 +0,0 @@ -// compile-flags: -C no-prepopulate-passes -Z partially_uninit_const_threshold=1024 - -// Like uninit-consts.rs, but tests that we correctly generate partially-uninit consts -// when the (disabled by default) partially_uninit_const_threshold flag is used. - -#![crate_type = "lib"] - -use std::mem::MaybeUninit; - -pub struct PartiallyUninit { - x: u32, - y: MaybeUninit<[u8; 10]> -} - -// This should be partially undef. -// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"\EF\BE\AD\DE", [12 x i8] undef }>, align 4 - -// This shouldn't contain undef, since it's larger than the 1024 byte limit. -// CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4 - -// CHECK-LABEL: @partially_uninit -#[no_mangle] -pub const fn partially_uninit() -> PartiallyUninit { - const X: PartiallyUninit = PartiallyUninit { x: 0xdeadbeef, y: MaybeUninit::uninit() }; - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [4 x i8], [12 x i8] }>, <{ [4 x i8], [12 x i8] }>* [[PARTIALLY_UNINIT]], i32 0, i32 0, i32 0), i{{(32|64)}} 16, i1 false) - X -} - -// CHECK-LABEL: @uninit_padding_huge -#[no_mangle] -pub const fn uninit_padding_huge() -> [(u32, u8); 4096] { - const X: [(u32, u8); 4096] = [(123, 45); 4096]; - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [32768 x i8] }>, <{ [32768 x i8] }>* [[UNINIT_PADDING_HUGE]], i32 0, i32 0, i32 0), i{{(32|64)}} 32768, i1 false) - X -} diff --git a/src/test/codegen/uninit-consts.rs b/src/test/codegen/uninit-consts.rs index c4c21e03f1676..0ca2197ecbcc7 100644 --- a/src/test/codegen/uninit-consts.rs +++ b/src/test/codegen/uninit-consts.rs @@ -12,7 +12,12 @@ pub struct PartiallyUninit { } // CHECK: [[FULLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [10 x i8] }> undef -// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [16 x i8] }> <{ [16 x i8] c"\EF\BE\AD\DE\00\00\00\00\00\00\00\00\00\00\00\00" }>, align 4 + +// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"\EF\BE\AD\DE", [12 x i8] undef }>, align 4 + +// This shouldn't contain undef, since it's larger than the 1024 byte limit. +// CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4 + // CHECK: [[FULLY_UNINIT_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [16384 x i8] }> undef // CHECK-LABEL: @fully_uninit @@ -27,7 +32,15 @@ pub const fn fully_uninit() -> MaybeUninit<[u8; 10]> { #[no_mangle] pub const fn partially_uninit() -> PartiallyUninit { const X: PartiallyUninit = PartiallyUninit { x: 0xdeadbeef, y: MaybeUninit::uninit() }; - // CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [16 x i8] }>, <{ [16 x i8] }>* [[PARTIALLY_UNINIT]], i32 0, i32 0, i32 0), i{{(32|64)}} 16, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [4 x i8], [12 x i8] }>, <{ [4 x i8], [12 x i8] }>* [[PARTIALLY_UNINIT]], i32 0, i32 0, i32 0), i{{(32|64)}} 16, i1 false) + X +} + +// CHECK-LABEL: @uninit_padding_huge +#[no_mangle] +pub const fn uninit_padding_huge() -> [(u32, u8); 4096] { + const X: [(u32, u8); 4096] = [(123, 45); 4096]; + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i{{(32|64)}}(i8* align 4 %1, i8* align 4 getelementptr inbounds (<{ [32768 x i8] }>, <{ [32768 x i8] }>* [[UNINIT_PADDING_HUGE]], i32 0, i32 0, i32 0), i{{(32|64)}} 32768, i1 false) X } From d5769e9843dede495551a781cf947b631984868f Mon Sep 17 00:00:00 2001 From: Erik Desjardins Date: Sat, 19 Feb 2022 01:17:31 -0500 Subject: [PATCH 2/5] switch to limiting the number of init/uninit chunks --- compiler/rustc_codegen_llvm/src/consts.rs | 30 ++++++------------- compiler/rustc_interface/src/tests.rs | 2 +- .../src/mir/interpret/allocation.rs | 1 + compiler/rustc_session/src/options.rs | 6 ++-- src/test/codegen/uninit-consts.rs | 3 +- 5 files changed, 16 insertions(+), 26 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs index ddd87c5b24d55..14de6c5986a33 100644 --- a/compiler/rustc_codegen_llvm/src/consts.rs +++ b/compiler/rustc_codegen_llvm/src/consts.rs @@ -37,7 +37,7 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> alloc: &'a Allocation, range: Range, ) { - let mut chunks = alloc + let chunks = alloc .init_mask() .range_as_init_chunks(Size::from_bytes(range.start), Size::from_bytes(range.end)); @@ -53,32 +53,20 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> } }; - // Generating partially-uninit consts is limited to small allocations, + // Generating partially-uninit consts is limited to small numbers of chunks, // to avoid the cost of generating large complex const expressions. // For example, `[(u32, u8); 1024 * 1024]` contains uninit padding in each element, // and would result in `{ [5 x i8] zeroinitializer, [3 x i8] undef, ...repeat 1M times... }`. - let allow_partially_uninit = - match cx.sess().opts.debugging_opts.partially_uninit_const_threshold { - Some(max) => range.len() <= max, - None => false, - }; + let max = cx.sess().opts.debugging_opts.uninit_const_chunk_threshold; + let allow_uninit_chunks = chunks.clone().take(max.saturating_add(1)).count() <= max; - if allow_partially_uninit { + if allow_uninit_chunks { llvals.extend(chunks.map(chunk_to_llval)); } else { - let llval = match (chunks.next(), chunks.next()) { - (Some(chunk), None) => { - // exactly one chunk, either fully init or fully uninit - chunk_to_llval(chunk) - } - _ => { - // partially uninit, codegen as if it was initialized - // (using some arbitrary value for uninit bytes) - let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range); - cx.const_bytes(bytes) - } - }; - llvals.push(llval); + // If this allocation contains any uninit bytes, codegen as if it was initialized + // (using some arbitrary value for uninit bytes). + let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range); + llvals.push(cx.const_bytes(bytes)); } } diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 9ab138c1b12a5..def5c30411b84 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -758,7 +758,6 @@ fn test_debugging_options_tracking_hash() { tracked!(osx_rpath_install_name, true); tracked!(panic_abort_tests, true); tracked!(panic_in_drop, PanicStrategy::Abort); - tracked!(partially_uninit_const_threshold, Some(123)); tracked!(pick_stable_methods_before_any_unstable, false); tracked!(plt, Some(true)); tracked!(polonius, true); @@ -789,6 +788,7 @@ fn test_debugging_options_tracking_hash() { tracked!(trap_unreachable, Some(false)); tracked!(treat_err_as_bug, NonZeroUsize::new(1)); tracked!(tune_cpu, Some(String::from("abc"))); + tracked!(uninit_const_chunk_threshold, 123); tracked!(unleash_the_miri_inside_of_you, true); tracked!(use_ctors_section, Some(true)); tracked!(verify_llvm_ir, true); diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index a36c9b6ed7304..5de119f956282 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -957,6 +957,7 @@ impl InitMask { } /// Yields [`InitChunk`]s. See [`InitMask::range_as_init_chunks`]. +#[derive(Clone)] pub struct InitChunkIter<'a> { init_mask: &'a InitMask, /// Whether the next chunk we will return is initialized. diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 59ce7cdb60362..26bb268a039bf 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1341,9 +1341,6 @@ options! { "panic strategy for panics in drops"), parse_only: bool = (false, parse_bool, [UNTRACKED], "parse only; do not compile, assemble, or link (default: no)"), - partially_uninit_const_threshold: Option = (Some(1024), parse_opt_number, [TRACKED], - "allow generating const initializers with mixed init/uninit bytes, \ - and set the maximum total size of a const allocation for which this is allowed (default: 1024 bytes)"), perf_stats: bool = (false, parse_bool, [UNTRACKED], "print some performance-related statistics (default: no)"), pick_stable_methods_before_any_unstable: bool = (true, parse_bool, [TRACKED], @@ -1488,6 +1485,9 @@ options! { "in diagnostics, use heuristics to shorten paths referring to items"), ui_testing: bool = (false, parse_bool, [UNTRACKED], "emit compiler diagnostics in a form suitable for UI testing (default: no)"), + uninit_const_chunk_threshold: usize = (256, parse_number, [TRACKED], + "allow generating const initializers with mixed init/uninit chunks, \ + and set the maximum number of chunks for which this is allowed (default: 256)"), unleash_the_miri_inside_of_you: bool = (false, parse_bool, [TRACKED], "take the brakes off const evaluation. NOTE: this is unsound (default: no)"), unpretty: Option = (None, parse_unpretty, [UNTRACKED], diff --git a/src/test/codegen/uninit-consts.rs b/src/test/codegen/uninit-consts.rs index 0ca2197ecbcc7..d8c8fce105fe4 100644 --- a/src/test/codegen/uninit-consts.rs +++ b/src/test/codegen/uninit-consts.rs @@ -15,7 +15,8 @@ pub struct PartiallyUninit { // CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"\EF\BE\AD\DE", [12 x i8] undef }>, align 4 -// This shouldn't contain undef, since it's larger than the 1024 byte limit. +// This shouldn't contain undef, since it contains more than 256 chunks +// (the default value of uninit_const_chunk_threshold). // CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4 // CHECK: [[FULLY_UNINIT_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [16384 x i8] }> undef From c2e84fa5fc4788a8bcb97fbcdc2fa78359e44121 Mon Sep 17 00:00:00 2001 From: Erik Desjardins Date: Sat, 19 Feb 2022 10:35:44 -0500 Subject: [PATCH 3/5] reduce default uninit_const_chunk_threshold to 16 (from 256) --- compiler/rustc_session/src/options.rs | 4 ++-- src/test/codegen/uninit-consts.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 26bb268a039bf..11204df7aeaa0 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1485,9 +1485,9 @@ options! { "in diagnostics, use heuristics to shorten paths referring to items"), ui_testing: bool = (false, parse_bool, [UNTRACKED], "emit compiler diagnostics in a form suitable for UI testing (default: no)"), - uninit_const_chunk_threshold: usize = (256, parse_number, [TRACKED], + uninit_const_chunk_threshold: usize = (16, parse_number, [TRACKED], "allow generating const initializers with mixed init/uninit chunks, \ - and set the maximum number of chunks for which this is allowed (default: 256)"), + and set the maximum number of chunks for which this is allowed (default: 16)"), unleash_the_miri_inside_of_you: bool = (false, parse_bool, [TRACKED], "take the brakes off const evaluation. NOTE: this is unsound (default: no)"), unpretty: Option = (None, parse_unpretty, [UNTRACKED], diff --git a/src/test/codegen/uninit-consts.rs b/src/test/codegen/uninit-consts.rs index d8c8fce105fe4..17457bc628b66 100644 --- a/src/test/codegen/uninit-consts.rs +++ b/src/test/codegen/uninit-consts.rs @@ -15,8 +15,8 @@ pub struct PartiallyUninit { // CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"\EF\BE\AD\DE", [12 x i8] undef }>, align 4 -// This shouldn't contain undef, since it contains more than 256 chunks -// (the default value of uninit_const_chunk_threshold). +// This shouldn't contain undef, since it contains more chunks +// than the default value of uninit_const_chunk_threshold. // CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4 // CHECK: [[FULLY_UNINIT_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [16384 x i8] }> undef From 067f628286216cd3a78524da28fab29bfb0f516a Mon Sep 17 00:00:00 2001 From: Erik Desjardins Date: Sun, 20 Feb 2022 11:37:22 -0500 Subject: [PATCH 4/5] add check for llvm 14 --- compiler/rustc_codegen_llvm/src/consts.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs index 14de6c5986a33..9ab822f01f1ec 100644 --- a/compiler/rustc_codegen_llvm/src/consts.rs +++ b/compiler/rustc_codegen_llvm/src/consts.rs @@ -2,6 +2,7 @@ use crate::base; use crate::common::CodegenCx; use crate::debuginfo; use crate::llvm::{self, True}; +use crate::llvm_util; use crate::type_::Type; use crate::type_of::LayoutLlvmExt; use crate::value::Value; @@ -57,7 +58,13 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> // to avoid the cost of generating large complex const expressions. // For example, `[(u32, u8); 1024 * 1024]` contains uninit padding in each element, // and would result in `{ [5 x i8] zeroinitializer, [3 x i8] undef, ...repeat 1M times... }`. - let max = cx.sess().opts.debugging_opts.uninit_const_chunk_threshold; + let max = if llvm_util::get_version() < (14, 0, 0) { + // Generating partially-uninit consts inhibits optimizations in LLVM < 14. + // See https://github.com/rust-lang/rust/issues/84565. + 1 + } else { + cx.sess().opts.debugging_opts.uninit_const_chunk_threshold + }; let allow_uninit_chunks = chunks.clone().take(max.saturating_add(1)).count() <= max; if allow_uninit_chunks { From 5bf8303bbc0f7ad6f68c1c7311ca538ddcb23ebe Mon Sep 17 00:00:00 2001 From: Erik Desjardins Date: Sun, 20 Feb 2022 12:57:22 -0500 Subject: [PATCH 5/5] limit tests to llvm 14+ --- src/test/codegen/consts.rs | 2 +- src/test/codegen/uninit-consts.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/codegen/consts.rs b/src/test/codegen/consts.rs index e47a9f9ee2011..1a84f1a44795b 100644 --- a/src/test/codegen/consts.rs +++ b/src/test/codegen/consts.rs @@ -1,5 +1,5 @@ // compile-flags: -C no-prepopulate-passes -// +// min-llvm-version: 14.0 #![crate_type = "lib"] diff --git a/src/test/codegen/uninit-consts.rs b/src/test/codegen/uninit-consts.rs index 17457bc628b66..3e370c7ba64f8 100644 --- a/src/test/codegen/uninit-consts.rs +++ b/src/test/codegen/uninit-consts.rs @@ -1,4 +1,5 @@ // compile-flags: -C no-prepopulate-passes +// min-llvm-version: 14.0 // Check that we use undef (and not zero) for uninitialized bytes in constants.