From 6e5566cf03340fba678c10c17282b1fcc536c3e1 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Thu, 7 Sep 2023 09:48:50 -0400 Subject: [PATCH 1/2] lto: load bitcode sections by name Upstream change llvm/llvm-project@6b539f5eb8ef1d3a3c87873caa2dbd5147e1adbd changed `isSectionBitcode` works and it now only respects `.llvm.lto` sections instead of also `.llvmbc`, which it says was never intended to be used for LTO. We instead load sections by name, and sniff for raw bitcode by hand. r? @nikic @rustbot label: +llvm-main --- compiler/rustc_codegen_llvm/src/back/lto.rs | 27 +++++++++++--- compiler/rustc_codegen_llvm/src/back/write.rs | 36 ++++++++++++------- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 6 ++++ .../rustc_llvm/llvm-wrapper/PassWrapper.cpp | 33 +++++++++++++++++ 4 files changed, 86 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index c1d3392386c..3964039ac2d 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -1,4 +1,6 @@ -use crate::back::write::{self, save_temp_bitcode, CodegenDiagnosticsStage, DiagnosticHandlers}; +use crate::back::write::{ + self, bitcode_section_name, save_temp_bitcode, CodegenDiagnosticsStage, DiagnosticHandlers, +}; use crate::errors::{ DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, }; @@ -120,6 +122,7 @@ fn prepare_lto( info!("adding bitcode from {}", name); match get_bitcode_slice_from_object_data( child.data(&*archive_data).expect("corrupt rlib"), + cgcx, ) { Ok(data) => { let module = SerializedModule::FromRlib(data.to_vec()); @@ -141,10 +144,26 @@ fn prepare_lto( Ok((symbols_below_threshold, upstream_modules)) } -fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], LtoBitcodeFromRlib> { +fn get_bitcode_slice_from_object_data<'a>( + obj: &'a [u8], + cgcx: &CodegenContext, +) -> Result<&'a [u8], LtoBitcodeFromRlib> { + // We're about to assume the data here is an object file with sections, but if it's raw LLVM IR that + // won't work. Fortunately, if that's what we have we can just return the object directly, so we sniff + // the relevant magic strings here and return. + if obj.starts_with(b"\xDE\xC0\x17\x0B") || obj.starts_with(b"BC\xC0\xDE") { + return Ok(obj); + } + let section_name = bitcode_section_name(cgcx); let mut len = 0; - let data = - unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) }; + let data = unsafe { + llvm::LLVMRustGetSliceFromObjectDataByName( + obj.as_ptr(), + obj.len(), + section_name.as_ptr(), + &mut len, + ) + }; if !data.is_null() { assert!(len != 0); let bc = unsafe { slice::from_raw_parts(data, len) }; diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 47cc5bd52e2..d73714ef357 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -853,6 +853,27 @@ fn create_section_with_flags_asm(section_name: &str, section_flags: &str, data: asm } +fn target_is_apple(cgcx: &CodegenContext) -> bool { + cgcx.opts.target_triple.triple().contains("-ios") + || cgcx.opts.target_triple.triple().contains("-darwin") + || cgcx.opts.target_triple.triple().contains("-tvos") + || cgcx.opts.target_triple.triple().contains("-watchos") +} + +fn target_is_aix(cgcx: &CodegenContext) -> bool { + cgcx.opts.target_triple.triple().contains("-aix") +} + +pub(crate) fn bitcode_section_name(cgcx: &CodegenContext) -> &'static str { + if target_is_apple(cgcx) { + "__LLVM,__bitcode\0" + } else if target_is_aix(cgcx) { + ".ipa\0" + } else { + ".llvmbc\0" + } +} + /// Embed the bitcode of an LLVM module in the LLVM module itself. /// /// This is done primarily for iOS where it appears to be standard to compile C @@ -913,11 +934,8 @@ unsafe fn embed_bitcode( // Unfortunately, LLVM provides no way to set custom section flags. For ELF // and COFF we emit the sections using module level inline assembly for that // reason (see issue #90326 for historical background). - let is_aix = cgcx.opts.target_triple.triple().contains("-aix"); - let is_apple = cgcx.opts.target_triple.triple().contains("-ios") - || cgcx.opts.target_triple.triple().contains("-darwin") - || cgcx.opts.target_triple.triple().contains("-tvos") - || cgcx.opts.target_triple.triple().contains("-watchos"); + let is_aix = target_is_aix(cgcx); + let is_apple = target_is_apple(cgcx); if is_apple || is_aix || cgcx.opts.target_triple.triple().starts_with("wasm") @@ -932,13 +950,7 @@ unsafe fn embed_bitcode( ); llvm::LLVMSetInitializer(llglobal, llconst); - let section = if is_apple { - "__LLVM,__bitcode\0" - } else if is_aix { - ".ipa\0" - } else { - ".llvmbc\0" - }; + let section = bitcode_section_name(cgcx); llvm::LLVMSetSection(llglobal, section.as_ptr().cast()); llvm::LLVMRustSetLinkage(llglobal, llvm::Linkage::PrivateLinkage); llvm::LLVMSetGlobalConstant(llglobal, llvm::True); diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 01cbf7d3b11..9f16889b953 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2319,6 +2319,12 @@ extern "C" { len: usize, out_len: &mut usize, ) -> *const u8; + pub fn LLVMRustGetSliceFromObjectDataByName( + data: *const u8, + len: usize, + name: *const u8, + out_len: &mut usize, + ) -> *const u8; pub fn LLVMRustLinkerNew(M: &Module) -> &mut Linker<'_>; pub fn LLVMRustLinkerAdd( diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index 8f8f1d8c04f..bc5163ab138 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -1511,6 +1512,38 @@ LLVMRustGetBitcodeSliceFromObjectData(const char *data, return BitcodeOrError->getBufferStart(); } +// Find a section of an object file by name. Fail if the section is missing or +// empty. +extern "C" const char *LLVMRustGetSliceFromObjectDataByName(const char *data, + size_t len, + const char *name, + size_t *out_len) { + *out_len = 0; + StringRef Data(data, len); + MemoryBufferRef Buffer(Data, ""); // The id is unused. + file_magic Type = identify_magic(Buffer.getBuffer()); + Expected> ObjFileOrError = + object::ObjectFile::createObjectFile(Buffer, Type); + if (!ObjFileOrError) { + LLVMRustSetLastError(toString(ObjFileOrError.takeError()).c_str()); + return nullptr; + } + for (const object::SectionRef &Sec : (*ObjFileOrError)->sections()) { + Expected Name = Sec.getName(); + if (Name && *Name == name) { + Expected SectionOrError = Sec.getContents(); + if (!SectionOrError) { + LLVMRustSetLastError(toString(SectionOrError.takeError()).c_str()); + return nullptr; + } + *out_len = SectionOrError->size(); + return SectionOrError->data(); + } + } + LLVMRustSetLastError("could not find requested section"); + return nullptr; +} + // Computes the LTO cache key for the provided 'ModId' in the given 'Data', // storing the result in 'KeyOut'. // Currently, this cache key is a SHA-1 hash of anything that could affect From 0db66022b1e0f057362be1ca8593c3ff1fee1c85 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Thu, 7 Sep 2023 11:56:25 -0400 Subject: [PATCH 2/2] lto: handle Apple platforms correctly by eliding __LLVM, from section name --- compiler/rustc_codegen_llvm/src/back/lto.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 3964039ac2d..5cf83b1accb 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -154,7 +154,10 @@ fn get_bitcode_slice_from_object_data<'a>( if obj.starts_with(b"\xDE\xC0\x17\x0B") || obj.starts_with(b"BC\xC0\xDE") { return Ok(obj); } - let section_name = bitcode_section_name(cgcx); + // We drop the "__LLVM," prefix here because on Apple platforms there's a notion of "segment name" + // which in the public API for sections gets treated as part of the section name, but internally + // in MachOObjectFile.cpp gets treated separately. + let section_name = bitcode_section_name(cgcx).trim_start_matches("__LLVM,"); let mut len = 0; let data = unsafe { llvm::LLVMRustGetSliceFromObjectDataByName(