From 773f533eae25129cea7241b74e54f26ce5eebb62 Mon Sep 17 00:00:00 2001 From: Gary Guo Date: Sat, 2 Apr 2022 22:54:51 +0100 Subject: [PATCH] Synthesis object file for `#[used]` and exported symbols --- compiler/rustc_codegen_ssa/src/back/link.rs | 69 +++++++++++++++++++ compiler/rustc_codegen_ssa/src/back/linker.rs | 46 +++++++++++++ .../rustc_codegen_ssa/src/back/metadata.rs | 2 +- .../src/back/symbol_export.rs | 54 +++++++++++++-- compiler/rustc_codegen_ssa/src/base.rs | 7 ++ compiler/rustc_codegen_ssa/src/lib.rs | 2 + .../src/middle/exported_symbols.rs | 10 +++ 7 files changed, 182 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index 548ae0e411d..45ae5ffc513 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -7,6 +7,7 @@ use rustc_errors::{ErrorGuaranteed, Handler}; use rustc_fs_util::fix_windows_verbatim_for_gcc; use rustc_hir::def_id::CrateNum; use rustc_middle::middle::dependency_format::Linkage; +use rustc_middle::middle::exported_symbols::SymbolExportKind; use rustc_session::config::{self, CFGuard, CrateType, DebugInfo, LdImpl, Strip}; use rustc_session::config::{OutputFilenames, OutputType, PrintRequest, SplitDwarfKind}; use rustc_session::cstore::DllImport; @@ -1654,6 +1655,67 @@ fn add_post_link_args(cmd: &mut dyn Linker, sess: &Session, flavor: LinkerFlavor } } +/// Add a synthetic object file that contains reference to all symbols that we want to expose to +/// the linker. +/// +/// Background: we implement rlibs as static library (archives). Linkers treat archives +/// differently from object files: all object files participate in linking, while archives will +/// only participate in linking if they can satisfy at least one undefined reference (version +/// scripts doesn't count). This causes `#[no_mangle]` or `#[used]` items to be ignored by the +/// linker, and since they never participate in the linking, using `KEEP` in the linker scripts +/// can't keep them either. This causes #47384. +/// +/// To keep them around, we could use `--whole-archive` and equivalents to force rlib to +/// participate in linking like object files, but this proves to be expensive (#93791). Therefore +/// we instead just introduce an undefined reference to them. This could be done by `-u` command +/// line option to the linker or `EXTERN(...)` in linker scripts, however they does not only +/// introduce an undefined reference, but also make them the GC roots, preventing `--gc-sections` +/// from removing them, and this is especially problematic for embedded programming where every +/// byte counts. +/// +/// This method creates a synthetic object file, which contains undefined references to all symbols +/// that are necessary for the linking. They are only present in symbol table but not actually +/// used in any sections, so the linker will therefore pick relevant rlibs for linking, but +/// unused `#[no_mangle]` or `#[used]` can still be discard by GC sections. +fn add_linked_symbol_object( + cmd: &mut dyn Linker, + sess: &Session, + tmpdir: &Path, + symbols: &[(String, SymbolExportKind)], +) { + if symbols.is_empty() { + return; + } + + let Some(mut file) = super::metadata::create_object_file(sess) else { + return; + }; + + for (sym, kind) in symbols.iter() { + file.add_symbol(object::write::Symbol { + name: sym.clone().into(), + value: 0, + size: 0, + kind: match kind { + SymbolExportKind::Text => object::SymbolKind::Text, + SymbolExportKind::Data => object::SymbolKind::Data, + SymbolExportKind::Tls => object::SymbolKind::Tls, + }, + scope: object::SymbolScope::Unknown, + weak: false, + section: object::write::SymbolSection::Undefined, + flags: object::SymbolFlags::None, + }); + } + + let path = tmpdir.join("symbols.o"); + let result = std::fs::write(&path, file.write().unwrap()); + if let Err(e) = result { + sess.fatal(&format!("failed to write {}: {}", path.display(), e)); + } + cmd.add_object(&path); +} + /// Add object files containing code from the current crate. fn add_local_crate_regular_objects(cmd: &mut dyn Linker, codegen_results: &CodegenResults) { for obj in codegen_results.modules.iter().filter_map(|m| m.object.as_ref()) { @@ -1794,6 +1856,13 @@ fn linker_with_args<'a, B: ArchiveBuilder<'a>>( // Pre-link CRT objects. add_pre_link_objects(cmd, sess, link_output_kind, crt_objects_fallback); + add_linked_symbol_object( + cmd, + sess, + tmpdir, + &codegen_results.crate_info.linked_symbols[&crate_type], + ); + // Sanitizer libraries. add_sanitizer_libraries(sess, crate_type, cmd); diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index a90b8a62c62..044dd9556fe 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -12,6 +12,7 @@ use std::{env, mem, str}; use rustc_hir::def_id::{CrateNum, LOCAL_CRATE}; use rustc_middle::middle::dependency_format::Linkage; +use rustc_middle::middle::exported_symbols::SymbolExportKind; use rustc_middle::ty::TyCtxt; use rustc_serialize::{json, Encoder}; use rustc_session::config::{self, CrateType, DebugInfo, LinkerPluginLto, Lto, OptLevel, Strip}; @@ -1557,6 +1558,51 @@ pub(crate) fn exported_symbols(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec, + crate_type: CrateType, +) -> Vec<(String, SymbolExportKind)> { + match crate_type { + CrateType::Executable | CrateType::Cdylib => (), + CrateType::Staticlib | CrateType::ProcMacro | CrateType::Rlib | CrateType::Dylib => { + return Vec::new(); + } + } + + let mut symbols = Vec::new(); + + let export_threshold = symbol_export::crates_export_threshold(&[crate_type]); + for &(symbol, info) in tcx.exported_symbols(LOCAL_CRATE).iter() { + if info.level.is_below_threshold(export_threshold) || info.used { + symbols.push(( + symbol_export::symbol_name_for_instance_in_crate(tcx, symbol, LOCAL_CRATE), + info.kind, + )); + } + } + + let formats = tcx.dependency_formats(()); + let deps = formats.iter().find_map(|(t, list)| (*t == crate_type).then_some(list)).unwrap(); + + for (index, dep_format) in deps.iter().enumerate() { + let cnum = CrateNum::new(index + 1); + // For each dependency that we are linking to statically ... + if *dep_format == Linkage::Static { + // ... we add its symbol list to our export list. + for &(symbol, info) in tcx.exported_symbols(cnum).iter() { + if info.level.is_below_threshold(export_threshold) || info.used { + symbols.push(( + symbol_export::symbol_name_for_instance_in_crate(tcx, symbol, cnum), + info.kind, + )); + } + } + } + } + + symbols +} + /// Much simplified and explicit CLI for the NVPTX linker. The linker operates /// with bitcode and uses LLVM backend to generate a PTX assembly. pub struct PtxLinker<'a> { diff --git a/compiler/rustc_codegen_ssa/src/back/metadata.rs b/compiler/rustc_codegen_ssa/src/back/metadata.rs index c52269805c4..2e422728056 100644 --- a/compiler/rustc_codegen_ssa/src/back/metadata.rs +++ b/compiler/rustc_codegen_ssa/src/back/metadata.rs @@ -94,7 +94,7 @@ fn search_for_metadata<'a>( .map_err(|e| format!("failed to read {} section in '{}': {}", section, path.display(), e)) } -fn create_object_file(sess: &Session) -> Option> { +pub(crate) fn create_object_file(sess: &Session) -> Option> { let endianness = match sess.target.options.endian { Endian::Little => Endianness::Little, Endian::Big => Endianness::Big, diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index e14ebe3ae48..56159cc2e08 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -9,7 +9,7 @@ use rustc_hir::Node; use rustc_index::vec::IndexVec; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::middle::exported_symbols::{ - metadata_symbol_name, ExportedSymbol, SymbolExportInfo, SymbolExportLevel, + metadata_symbol_name, ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, }; use rustc_middle::ty::query::{ExternProviders, Providers}; use rustc_middle::ty::subst::{GenericArgKind, SubstsRef}; @@ -124,6 +124,7 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, cnum: CrateNum) -> DefIdMap< } else { symbol_export_level(tcx, def_id.to_def_id()) }; + let codegen_attrs = tcx.codegen_fn_attrs(def_id.to_def_id()); debug!( "EXPORTED SYMBOL (local): {} ({:?})", tcx.symbol_name(Instance::mono(tcx, def_id.to_def_id())), @@ -131,6 +132,17 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, cnum: CrateNum) -> DefIdMap< ); (def_id.to_def_id(), SymbolExportInfo { level: export_level, + kind: if tcx.is_static(def_id.to_def_id()) { + if codegen_attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) { + SymbolExportKind::Tls + } else { + SymbolExportKind::Data + } + } else { + SymbolExportKind::Text + }, + used: codegen_attrs.flags.contains(CodegenFnAttrFlags::USED) + || codegen_attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER), }) }) .collect(); @@ -138,7 +150,11 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, cnum: CrateNum) -> DefIdMap< if let Some(id) = tcx.proc_macro_decls_static(()) { reachable_non_generics.insert( id.to_def_id(), - SymbolExportInfo { level: SymbolExportLevel::C }, + SymbolExportInfo { + level: SymbolExportLevel::C, + kind: SymbolExportKind::Data, + used: false, + }, ); } @@ -180,7 +196,11 @@ fn exported_symbols_provider_local<'tcx>( symbols.push(( exported_symbol, - SymbolExportInfo { level: SymbolExportLevel::C }, + SymbolExportInfo { + level: SymbolExportLevel::C, + kind: SymbolExportKind::Text, + used: false, + }, )); } @@ -191,7 +211,11 @@ fn exported_symbols_provider_local<'tcx>( symbols.push(( exported_symbol, - SymbolExportInfo { level: SymbolExportLevel::Rust }, + SymbolExportInfo { + level: SymbolExportLevel::Rust, + kind: SymbolExportKind::Text, + used: false, + }, )); } } @@ -207,7 +231,11 @@ fn exported_symbols_provider_local<'tcx>( let exported_symbol = ExportedSymbol::NoDefId(SymbolName::new(tcx, sym)); ( exported_symbol, - SymbolExportInfo { level: SymbolExportLevel::C }, + SymbolExportInfo { + level: SymbolExportLevel::C, + kind: SymbolExportKind::Data, + used: false, + }, ) })); } @@ -220,7 +248,11 @@ fn exported_symbols_provider_local<'tcx>( let exported_symbol = ExportedSymbol::NoDefId(SymbolName::new(tcx, sym)); ( exported_symbol, - SymbolExportInfo { level: SymbolExportLevel::C }, + SymbolExportInfo { + level: SymbolExportLevel::C, + kind: SymbolExportKind::Data, + used: false, + }, ) })); } @@ -231,7 +263,11 @@ fn exported_symbols_provider_local<'tcx>( symbols.push(( exported_symbol, - SymbolExportInfo { level: SymbolExportLevel::Rust }, + SymbolExportInfo { + level: SymbolExportLevel::Rust, + kind: SymbolExportKind::Data, + used: false, + }, )); } @@ -269,6 +305,8 @@ fn exported_symbols_provider_local<'tcx>( symbol, SymbolExportInfo { level: SymbolExportLevel::Rust, + kind: SymbolExportKind::Text, + used: false, }, )); } @@ -283,6 +321,8 @@ fn exported_symbols_provider_local<'tcx>( ExportedSymbol::DropGlue(ty), SymbolExportInfo { level: SymbolExportLevel::Rust, + kind: SymbolExportKind::Text, + used: false, }, )); } diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 01056024805..5a1d0208842 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -801,6 +801,12 @@ impl CrateInfo { .iter() .map(|&c| (c, crate::back::linker::exported_symbols(tcx, c))) .collect(); + let linked_symbols = tcx + .sess + .crate_types() + .iter() + .map(|&c| (c, crate::back::linker::linked_symbols(tcx, c))) + .collect(); let local_crate_name = tcx.crate_name(LOCAL_CRATE); let crate_attrs = tcx.hir().attrs(rustc_hir::CRATE_HIR_ID); let subsystem = tcx.sess.first_attr_value_str_by_name(crate_attrs, sym::windows_subsystem); @@ -834,6 +840,7 @@ impl CrateInfo { let mut info = CrateInfo { target_cpu, exported_symbols, + linked_symbols, local_crate_name, compiler_builtins: None, profiler_runtime: None, diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 6cf6be79a86..f3d5bb4bc62 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -28,6 +28,7 @@ use rustc_hir::def_id::CrateNum; use rustc_hir::LangItem; use rustc_middle::dep_graph::WorkProduct; use rustc_middle::middle::dependency_format::Dependencies; +use rustc_middle::middle::exported_symbols::SymbolExportKind; use rustc_middle::ty::query::{ExternProviders, Providers}; use rustc_session::config::{CrateType, OutputFilenames, OutputType, RUST_CGU_EXT}; use rustc_session::cstore::{self, CrateSource}; @@ -140,6 +141,7 @@ impl From<&cstore::NativeLib> for NativeLib { pub struct CrateInfo { pub target_cpu: String, pub exported_symbols: FxHashMap>, + pub linked_symbols: FxHashMap>, pub local_crate_name: Symbol, pub compiler_builtins: Option, pub profiler_runtime: Option, diff --git a/compiler/rustc_middle/src/middle/exported_symbols.rs b/compiler/rustc_middle/src/middle/exported_symbols.rs index a605e6dfdc1..631fd09ec4c 100644 --- a/compiler/rustc_middle/src/middle/exported_symbols.rs +++ b/compiler/rustc_middle/src/middle/exported_symbols.rs @@ -21,11 +21,21 @@ impl SymbolExportLevel { } } +/// Kind of exported symbols. +#[derive(Eq, PartialEq, Debug, Copy, Clone, Encodable, Decodable, HashStable)] +pub enum SymbolExportKind { + Text, + Data, + Tls, +} + /// The `SymbolExportInfo` of a symbols specifies symbol-related information /// that is relevant to code generation and linking. #[derive(Eq, PartialEq, Debug, Copy, Clone, TyEncodable, TyDecodable, HashStable)] pub struct SymbolExportInfo { pub level: SymbolExportLevel, + pub kind: SymbolExportKind, + pub used: bool, } #[derive(Eq, PartialEq, Debug, Copy, Clone, TyEncodable, TyDecodable, HashStable)]