rust/src/link_copied.rs

637 lines
27 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! All functions here are copied from https://github.com/rust-lang/rust/blob/942864a000efd74b73e36bda5606b2cdb55ecf39/src/librustc_codegen_llvm/back/link.rs
use std::fmt;
use std::fs;
use std::io;
use std::iter;
use std::path::{Path, PathBuf};
use std::process::{Output, Stdio};
use log::info;
use rustc::middle::cstore::{NativeLibrary, NativeLibraryKind};
use rustc::middle::dependency_format::Linkage;
use rustc::session::config::{self, OutputType, RUST_CGU_EXT};
use rustc::session::search_paths::PathKind;
use rustc::session::Session;
use rustc::util::common::time;
use rustc_codegen_ssa::back::command::Command;
use rustc_codegen_ssa::back::linker::*;
use rustc_codegen_ssa::back::link::*;
use rustc_data_structures::fx::FxHashSet;
use rustc_fs_util::fix_windows_verbatim_for_gcc;
use syntax::attr;
use crate::prelude::*;
use crate::archive::{ArchiveBuilder, ArchiveConfig};
use crate::metadata::METADATA_FILENAME;
// cg_clif doesn't have bytecode, so this is just a dummy
const RLIB_BYTECODE_EXTENSION: &str = ".cg_clif_bytecode_dummy";
fn archive_search_paths(sess: &Session) -> Vec<PathBuf> {
sess.target_filesearch(PathKind::Native).search_path_dirs()
}
fn archive_config<'a>(sess: &'a Session,
output: &Path,
input: Option<&Path>) -> ArchiveConfig<'a> {
ArchiveConfig {
sess,
dst: output.to_path_buf(),
src: input.map(|p| p.to_path_buf()),
lib_search_paths: archive_search_paths(sess),
}
}
pub fn exec_linker(sess: &Session, cmd: &mut Command, out_filename: &Path, tmpdir: &Path)
-> io::Result<Output>
{
// When attempting to spawn the linker we run a risk of blowing out the
// size limits for spawning a new process with respect to the arguments
// we pass on the command line.
//
// Here we attempt to handle errors from the OS saying "your list of
// arguments is too big" by reinvoking the linker again with an `@`-file
// that contains all the arguments. The theory is that this is then
// accepted on all linkers and the linker will read all its options out of
// there instead of looking at the command line.
if !cmd.very_likely_to_exceed_some_spawn_limit() {
match cmd.command().stdout(Stdio::piped()).stderr(Stdio::piped()).spawn() {
Ok(child) => {
let output = child.wait_with_output();
flush_linked_file(&output, out_filename)?;
return output;
}
Err(ref e) if command_line_too_big(e) => {
info!("command line to linker was too big: {}", e);
}
Err(e) => return Err(e)
}
}
info!("falling back to passing arguments to linker via an @-file");
let mut cmd2 = cmd.clone();
let mut args = String::new();
for arg in cmd2.take_args() {
args.push_str(&Escape {
arg: arg.to_str().unwrap(),
is_like_msvc: sess.target.target.options.is_like_msvc,
}.to_string());
args.push_str("\n");
}
let file = tmpdir.join("linker-arguments");
let bytes = if sess.target.target.options.is_like_msvc {
let mut out = Vec::with_capacity((1 + args.len()) * 2);
// start the stream with a UTF-16 BOM
for c in iter::once(0xFEFF).chain(args.encode_utf16()) {
// encode in little endian
out.push(c as u8);
out.push((c >> 8) as u8);
}
out
} else {
args.into_bytes()
};
fs::write(&file, &bytes)?;
cmd2.arg(format!("@{}", file.display()));
info!("invoking linker {:?}", cmd2);
let output = cmd2.output();
flush_linked_file(&output, out_filename)?;
return output;
#[cfg(unix)]
fn flush_linked_file(_: &io::Result<Output>, _: &Path) -> io::Result<()> {
Ok(())
}
#[cfg(windows)]
fn flush_linked_file(command_output: &io::Result<Output>, out_filename: &Path)
-> io::Result<()>
{
// On Windows, under high I/O load, output buffers are sometimes not flushed,
// even long after process exit, causing nasty, non-reproducible output bugs.
//
// File::sync_all() calls FlushFileBuffers() down the line, which solves the problem.
//
// А full writeup of the original Chrome bug can be found at
// randomascii.wordpress.com/2018/02/25/compiler-bug-linker-bug-windows-kernel-bug/amp
if let &Ok(ref out) = command_output {
if out.status.success() {
if let Ok(of) = fs::OpenOptions::new().write(true).open(out_filename) {
of.sync_all()?;
}
}
}
Ok(())
}
#[cfg(unix)]
fn command_line_too_big(err: &io::Error) -> bool {
err.raw_os_error() == Some(::libc::E2BIG)
}
#[cfg(windows)]
fn command_line_too_big(err: &io::Error) -> bool {
const ERROR_FILENAME_EXCED_RANGE: i32 = 206;
err.raw_os_error() == Some(ERROR_FILENAME_EXCED_RANGE)
}
struct Escape<'a> {
arg: &'a str,
is_like_msvc: bool,
}
impl<'a> fmt::Display for Escape<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.is_like_msvc {
// This is "documented" at
// https://msdn.microsoft.com/en-us/library/4xdcbak7.aspx
//
// Unfortunately there's not a great specification of the
// syntax I could find online (at least) but some local
// testing showed that this seemed sufficient-ish to catch
// at least a few edge cases.
write!(f, "\"")?;
for c in self.arg.chars() {
match c {
'"' => write!(f, "\\{}", c)?,
c => write!(f, "{}", c)?,
}
}
write!(f, "\"")?;
} else {
// This is documented at https://linux.die.net/man/1/ld, namely:
//
// > Options in file are separated by whitespace. A whitespace
// > character may be included in an option by surrounding the
// > entire option in either single or double quotes. Any
// > character (including a backslash) may be included by
// > prefixing the character to be included with a backslash.
//
// We put an argument on each line, so all we need to do is
// ensure the line is interpreted as one whole argument.
for c in self.arg.chars() {
match c {
'\\' |
' ' => write!(f, "\\{}", c)?,
c => write!(f, "{}", c)?,
}
}
}
Ok(())
}
}
}
// # Rust Crate linking
//
// Rust crates are not considered at all when creating an rlib output. All
// dependencies will be linked when producing the final output (instead of
// the intermediate rlib version)
pub fn add_upstream_rust_crates(cmd: &mut dyn Linker,
sess: &Session,
codegen_results: &CodegenResults,
crate_type: config::CrateType,
tmpdir: &Path) {
// All of the heavy lifting has previously been accomplished by the
// dependency_format module of the compiler. This is just crawling the
// output of that module, adding crates as necessary.
//
// Linking to a rlib involves just passing it to the linker (the linker
// will slurp up the object files inside), and linking to a dynamic library
// involves just passing the right -l flag.
let formats = sess.dependency_formats.borrow();
let data = formats.get(&crate_type).unwrap();
// Invoke get_used_crates to ensure that we get a topological sorting of
// crates.
let deps = &codegen_results.crate_info.used_crates_dynamic;
// There's a few internal crates in the standard library (aka libcore and
// libstd) which actually have a circular dependence upon one another. This
// currently arises through "weak lang items" where libcore requires things
// like `rust_begin_unwind` but libstd ends up defining it. To get this
// circular dependence to work correctly in all situations we'll need to be
// sure to correctly apply the `--start-group` and `--end-group` options to
// GNU linkers, otherwise if we don't use any other symbol from the standard
// library it'll get discarded and the whole application won't link.
//
// In this loop we're calculating the `group_end`, after which crate to
// pass `--end-group` and `group_start`, before which crate to pass
// `--start-group`. We currently do this by passing `--end-group` after
// the first crate (when iterating backwards) that requires a lang item
// defined somewhere else. Once that's set then when we've defined all the
// necessary lang items we'll pass `--start-group`.
//
// Note that this isn't amazing logic for now but it should do the trick
// for the current implementation of the standard library.
let mut group_end = None;
let mut group_start = None;
let mut end_with = FxHashSet::default();
let info = &codegen_results.crate_info;
for &(cnum, _) in deps.iter().rev() {
if let Some(missing) = info.missing_lang_items.get(&cnum) {
end_with.extend(missing.iter().cloned());
if end_with.len() > 0 && group_end.is_none() {
group_end = Some(cnum);
}
}
end_with.retain(|item| info.lang_item_to_crate.get(item) != Some(&cnum));
if end_with.len() == 0 && group_end.is_some() {
group_start = Some(cnum);
break
}
}
// If we didn't end up filling in all lang items from upstream crates then
// we'll be filling it in with our crate. This probably means we're the
// standard library itself, so skip this for now.
if group_end.is_some() && group_start.is_none() {
group_end = None;
}
let mut compiler_builtins = None;
for &(cnum, _) in deps.iter() {
if group_start == Some(cnum) {
cmd.group_start();
}
// We may not pass all crates through to the linker. Some crates may
// appear statically in an existing dylib, meaning we'll pick up all the
// symbols from the dylib.
let src = &codegen_results.crate_info.used_crate_source[&cnum];
match data[cnum.as_usize() - 1] {
_ if codegen_results.crate_info.profiler_runtime == Some(cnum) => {
add_static_crate(cmd, sess, codegen_results, tmpdir, crate_type, cnum);
}
_ if codegen_results.crate_info.sanitizer_runtime == Some(cnum) => {
link_sanitizer_runtime(cmd, sess, codegen_results, tmpdir, cnum);
}
// compiler-builtins are always placed last to ensure that they're
// linked correctly.
_ if codegen_results.crate_info.compiler_builtins == Some(cnum) => {
assert!(compiler_builtins.is_none());
compiler_builtins = Some(cnum);
}
Linkage::NotLinked |
Linkage::IncludedFromDylib => {}
Linkage::Static => {
add_static_crate(cmd, sess, codegen_results, tmpdir, crate_type, cnum);
}
Linkage::Dynamic => {
add_dynamic_crate(cmd, sess, &src.dylib.as_ref().unwrap().0)
}
}
if group_end == Some(cnum) {
cmd.group_end();
}
}
// compiler-builtins are always placed last to ensure that they're
// linked correctly.
// We must always link the `compiler_builtins` crate statically. Even if it
// was already "included" in a dylib (e.g. `libstd` when `-C prefer-dynamic`
// is used)
if let Some(cnum) = compiler_builtins {
add_static_crate(cmd, sess, codegen_results, tmpdir, crate_type, cnum);
}
// Converts a library file-stem into a cc -l argument
fn unlib<'a>(config: &config::Config, stem: &'a str) -> &'a str {
if stem.starts_with("lib") && !config.target.options.is_like_windows {
&stem[3..]
} else {
stem
}
}
// We must link the sanitizer runtime using -Wl,--whole-archive but since
// it's packed in a .rlib, it contains stuff that are not objects that will
// make the linker error. So we must remove those bits from the .rlib before
// linking it.
fn link_sanitizer_runtime(cmd: &mut dyn Linker,
sess: &Session,
codegen_results: &CodegenResults,
tmpdir: &Path,
cnum: CrateNum) {
let src = &codegen_results.crate_info.used_crate_source[&cnum];
let cratepath = &src.rlib.as_ref().unwrap().0;
if sess.target.target.options.is_like_osx {
// On Apple platforms, the sanitizer is always built as a dylib, and
// LLVM will link to `@rpath/*.dylib`, so we need to specify an
// rpath to the library as well (the rpath should be absolute, see
// PR #41352 for details).
//
// FIXME: Remove this logic into librustc_*san once Cargo supports it
let rpath = cratepath.parent().unwrap();
let rpath = rpath.to_str().expect("non-utf8 component in path");
cmd.args(&["-Wl,-rpath".into(), "-Xlinker".into(), rpath.into()]);
}
let dst = tmpdir.join(cratepath.file_name().unwrap());
let cfg = archive_config(sess, &dst, Some(cratepath));
let mut archive = ArchiveBuilder::new(cfg);
archive.update_symbols();
for f in archive.src_files() {
if f.ends_with(RLIB_BYTECODE_EXTENSION) || f == METADATA_FILENAME {
archive.remove_file(&f);
continue
}
}
archive.build();
cmd.link_whole_rlib(&dst);
}
// Adds the static "rlib" versions of all crates to the command line.
// There's a bit of magic which happens here specifically related to LTO and
// dynamic libraries. Specifically:
//
// * For LTO, we remove upstream object files.
// * For dylibs we remove metadata and bytecode from upstream rlibs
//
// When performing LTO, almost(*) all of the bytecode from the upstream
// libraries has already been included in our object file output. As a
// result we need to remove the object files in the upstream libraries so
// the linker doesn't try to include them twice (or whine about duplicate
// symbols). We must continue to include the rest of the rlib, however, as
// it may contain static native libraries which must be linked in.
//
// (*) Crates marked with `#![no_builtins]` don't participate in LTO and
// their bytecode wasn't included. The object files in those libraries must
// still be passed to the linker.
//
// When making a dynamic library, linkers by default don't include any
// object files in an archive if they're not necessary to resolve the link.
// We basically want to convert the archive (rlib) to a dylib, though, so we
// *do* want everything included in the output, regardless of whether the
// linker thinks it's needed or not. As a result we must use the
// --whole-archive option (or the platform equivalent). When using this
// option the linker will fail if there are non-objects in the archive (such
// as our own metadata and/or bytecode). All in all, for rlibs to be
// entirely included in dylibs, we need to remove all non-object files.
//
// Note, however, that if we're not doing LTO or we're not producing a dylib
// (aka we're making an executable), we can just pass the rlib blindly to
// the linker (fast) because it's fine if it's not actually included as
// we're at the end of the dependency chain.
fn add_static_crate(cmd: &mut dyn Linker,
sess: &Session,
codegen_results: &CodegenResults,
tmpdir: &Path,
crate_type: config::CrateType,
cnum: CrateNum) {
let src = &codegen_results.crate_info.used_crate_source[&cnum];
let cratepath = &src.rlib.as_ref().unwrap().0;
// See the comment above in `link_staticlib` and `link_rlib` for why if
// there's a static library that's not relevant we skip all object
// files.
let native_libs = &codegen_results.crate_info.native_libraries[&cnum];
let skip_native = native_libs.iter().any(|lib| {
lib.kind == NativeLibraryKind::NativeStatic && !relevant_lib(sess, lib)
});
if (!are_upstream_rust_objects_already_included(sess) ||
ignored_for_lto(sess, &codegen_results.crate_info, cnum)) &&
crate_type != config::CrateType::Dylib &&
!skip_native {
cmd.link_rlib(&fix_windows_verbatim_for_gcc(cratepath));
return
}
let dst = tmpdir.join(cratepath.file_name().unwrap());
let name = cratepath.file_name().unwrap().to_str().unwrap();
let name = &name[3..name.len() - 5]; // chop off lib/.rlib
time(sess, &format!("altering {}.rlib", name), || {
let cfg = archive_config(sess, &dst, Some(cratepath));
let mut archive = ArchiveBuilder::new(cfg);
archive.update_symbols();
let mut any_objects = false;
for f in archive.src_files() {
if f.ends_with(RLIB_BYTECODE_EXTENSION) || f == METADATA_FILENAME {
archive.remove_file(&f);
continue
}
let canonical = f.replace("-", "_");
let canonical_name = name.replace("-", "_");
// Look for `.rcgu.o` at the end of the filename to conclude
// that this is a Rust-related object file.
fn looks_like_rust(s: &str) -> bool {
let path = Path::new(s);
let ext = path.extension().and_then(|s| s.to_str());
if ext != Some(OutputType::Object.extension()) {
return false
}
let ext2 = path.file_stem()
.and_then(|s| Path::new(s).extension())
.and_then(|s| s.to_str());
ext2 == Some(RUST_CGU_EXT)
}
let is_rust_object =
canonical.starts_with(&canonical_name) &&
looks_like_rust(&f);
// If we've been requested to skip all native object files
// (those not generated by the rust compiler) then we can skip
// this file. See above for why we may want to do this.
let skip_because_cfg_say_so = skip_native && !is_rust_object;
// If we're performing LTO and this is a rust-generated object
// file, then we don't need the object file as it's part of the
// LTO module. Note that `#![no_builtins]` is excluded from LTO,
// though, so we let that object file slide.
let skip_because_lto = are_upstream_rust_objects_already_included(sess) &&
is_rust_object &&
(sess.target.target.options.no_builtins ||
!codegen_results.crate_info.is_no_builtins.contains(&cnum));
if skip_because_cfg_say_so || skip_because_lto {
archive.remove_file(&f);
} else {
any_objects = true;
}
}
if !any_objects {
return
}
archive.build();
// If we're creating a dylib, then we need to include the
// whole of each object in our archive into that artifact. This is
// because a `dylib` can be reused as an intermediate artifact.
//
// Note, though, that we don't want to include the whole of a
// compiler-builtins crate (e.g. compiler-rt) because it'll get
// repeatedly linked anyway.
if crate_type == config::CrateType::Dylib &&
codegen_results.crate_info.compiler_builtins != Some(cnum) {
cmd.link_whole_rlib(&fix_windows_verbatim_for_gcc(&dst));
} else {
cmd.link_rlib(&fix_windows_verbatim_for_gcc(&dst));
}
});
}
// Same thing as above, but for dynamic crates instead of static crates.
fn add_dynamic_crate(cmd: &mut dyn Linker, sess: &Session, cratepath: &Path) {
// If we're performing LTO, then it should have been previously required
// that all upstream rust dependencies were available in an rlib format.
assert!(!are_upstream_rust_objects_already_included(sess));
// Just need to tell the linker about where the library lives and
// what its name is
let parent = cratepath.parent();
if let Some(dir) = parent {
cmd.include_path(&fix_windows_verbatim_for_gcc(dir));
}
let filestem = cratepath.file_stem().unwrap().to_str().unwrap();
cmd.link_rust_dylib(&unlib(&sess.target, filestem),
parent.unwrap_or(Path::new("")));
}
}
// # Native library linking
//
// User-supplied library search paths (-L on the command line). These are
// the same paths used to find Rust crates, so some of them may have been
// added already by the previous crate linking code. This only allows them
// to be found at compile time so it is still entirely up to outside
// forces to make sure that library can be found at runtime.
//
// Also note that the native libraries linked here are only the ones located
// in the current crate. Upstream crates with native library dependencies
// may have their native library pulled in above.
pub fn add_local_native_libraries(cmd: &mut dyn Linker,
sess: &Session,
codegen_results: &CodegenResults) {
let filesearch = sess.target_filesearch(PathKind::All);
for search_path in filesearch.search_paths() {
match search_path.kind {
PathKind::Framework => { cmd.framework_path(&search_path.dir); }
_ => { cmd.include_path(&fix_windows_verbatim_for_gcc(&search_path.dir)); }
}
}
let relevant_libs = codegen_results.crate_info.used_libraries.iter().filter(|l| {
relevant_lib(sess, l)
});
let search_path = archive_search_paths(sess);
for lib in relevant_libs {
let name = match lib.name {
Some(ref l) => l,
None => continue,
};
match lib.kind {
NativeLibraryKind::NativeUnknown => cmd.link_dylib(&name.as_str()),
NativeLibraryKind::NativeFramework => cmd.link_framework(&name.as_str()),
NativeLibraryKind::NativeStaticNobundle => cmd.link_staticlib(&name.as_str()),
NativeLibraryKind::NativeStatic => cmd.link_whole_staticlib(&name.as_str(),
&search_path)
}
}
}
// Link in all of our upstream crates' native dependencies. Remember that
// all of these upstream native dependencies are all non-static
// dependencies. We've got two cases then:
//
// 1. The upstream crate is an rlib. In this case we *must* link in the
// native dependency because the rlib is just an archive.
//
// 2. The upstream crate is a dylib. In order to use the dylib, we have to
// have the dependency present on the system somewhere. Thus, we don't
// gain a whole lot from not linking in the dynamic dependency to this
// crate as well.
//
// The use case for this is a little subtle. In theory the native
// dependencies of a crate are purely an implementation detail of the crate
// itself, but the problem arises with generic and inlined functions. If a
// generic function calls a native function, then the generic function must
// be instantiated in the target crate, meaning that the native symbol must
// also be resolved in the target crate.
pub fn add_upstream_native_libraries(cmd: &mut dyn Linker,
sess: &Session,
codegen_results: &CodegenResults,
crate_type: config::CrateType) {
// Be sure to use a topological sorting of crates because there may be
// interdependencies between native libraries. When passing -nodefaultlibs,
// for example, almost all native libraries depend on libc, so we have to
// make sure that's all the way at the right (liblibc is near the base of
// the dependency chain).
//
// This passes RequireStatic, but the actual requirement doesn't matter,
// we're just getting an ordering of crate numbers, we're not worried about
// the paths.
let formats = sess.dependency_formats.borrow();
let data = formats.get(&crate_type).unwrap();
let crates = &codegen_results.crate_info.used_crates_static;
for &(cnum, _) in crates {
for lib in codegen_results.crate_info.native_libraries[&cnum].iter() {
let name = match lib.name {
Some(ref l) => l,
None => continue,
};
if !relevant_lib(sess, &lib) {
continue
}
match lib.kind {
NativeLibraryKind::NativeUnknown => cmd.link_dylib(&name.as_str()),
NativeLibraryKind::NativeFramework => cmd.link_framework(&name.as_str()),
NativeLibraryKind::NativeStaticNobundle => {
// Link "static-nobundle" native libs only if the crate they originate from
// is being linked statically to the current crate. If it's linked dynamically
// or is an rlib already included via some other dylib crate, the symbols from
// native libs will have already been included in that dylib.
if data[cnum.as_usize() - 1] == Linkage::Static {
cmd.link_staticlib(&name.as_str())
}
},
// ignore statically included native libraries here as we've
// already included them when we included the rust library
// previously
NativeLibraryKind::NativeStatic => {}
}
}
}
}
fn relevant_lib(sess: &Session, lib: &NativeLibrary) -> bool {
match lib.cfg {
Some(ref cfg) => attr::cfg_matches(cfg, &sess.parse_sess, None),
None => true,
}
}
fn are_upstream_rust_objects_already_included(sess: &Session) -> bool {
match sess.lto() {
Lto::Fat => true,
Lto::Thin => {
// If we defer LTO to the linker, we haven't run LTO ourselves, so
// any upstream object files have not been copied yet.
!sess.opts.debugging_opts.cross_lang_lto.enabled()
}
Lto::No |
Lto::ThinLocal => false,
}
}