374 lines
14 KiB
Rust
Raw Normal View History

//! Set and unset common attributes on LLVM values.
use std::ffi::CString;
2021-02-13 22:17:15 +11:00
use cstr::cstr;
2019-12-22 17:42:04 -05:00
use rustc_codegen_ssa::traits::*;
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
use rustc_data_structures::fx::FxHashMap;
2019-12-22 17:42:04 -05:00
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_hir::def_id::DefId;
2020-03-29 17:19:48 +02:00
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::ty::layout::HasTyCtxt;
use rustc_middle::ty::query::Providers;
2020-03-31 08:27:09 -04:00
use rustc_middle::ty::{self, TyCtxt};
use rustc_session::config::{OptLevel, SanitizerSet};
use rustc_session::Session;
use rustc_target::spec::StackProbeType;
2019-02-18 03:58:58 +09:00
use crate::attributes;
use crate::llvm::AttributePlace::Function;
2019-12-22 17:42:04 -05:00
use crate::llvm::{self, Attribute};
2019-02-18 03:58:58 +09:00
use crate::llvm_util;
pub use rustc_attr::{InlineAttr, InstructionSetAttr, OptimizeAttr};
2019-02-18 03:58:58 +09:00
use crate::context::CodegenCx;
use crate::value::Value;
/// Mark LLVM function to use provided inline heuristic.
#[inline]
fn inline(cx: &CodegenCx<'ll, '_>, val: &'ll Value, inline: InlineAttr) {
use self::InlineAttr::*;
match inline {
2019-12-22 17:42:04 -05:00
Hint => Attribute::InlineHint.apply_llfn(Function, val),
2016-08-03 00:25:19 +03:00
Always => Attribute::AlwaysInline.apply_llfn(Function, val),
2019-12-22 17:42:04 -05:00
Never => {
if cx.tcx().sess.target.arch != "amdgpu" {
Attribute::NoInline.apply_llfn(Function, val);
}
2019-12-22 17:42:04 -05:00
}
None => {}
};
}
/// Apply LLVM sanitize attributes.
#[inline]
pub fn sanitize(cx: &CodegenCx<'ll, '_>, no_sanitize: SanitizerSet, llfn: &'ll Value) {
let enabled = cx.tcx.sess.opts.debugging_opts.sanitizer - no_sanitize;
if enabled.contains(SanitizerSet::ADDRESS) {
llvm::Attribute::SanitizeAddress.apply_llfn(Function, llfn);
}
if enabled.contains(SanitizerSet::MEMORY) {
llvm::Attribute::SanitizeMemory.apply_llfn(Function, llfn);
}
if enabled.contains(SanitizerSet::THREAD) {
llvm::Attribute::SanitizeThread.apply_llfn(Function, llfn);
}
2021-01-22 18:32:38 -08:00
if enabled.contains(SanitizerSet::HWADDRESS) {
llvm::Attribute::SanitizeHWAddress.apply_llfn(Function, llfn);
}
}
/// Tell LLVM to emit or not emit the information necessary to unwind the stack for the function.
#[inline]
pub fn emit_uwtable(val: &'ll Value, emit: bool) {
2016-08-03 00:25:19 +03:00
Attribute::UWTable.toggle_llfn(Function, val, emit);
}
/// Tell LLVM if this function should be 'naked', i.e., skip the epilogue and prologue.
2016-03-21 21:01:08 +01:00
#[inline]
fn naked(val: &'ll Value, is_naked: bool) {
2016-08-03 00:25:19 +03:00
Attribute::Naked.toggle_llfn(Function, val, is_naked);
2016-03-21 21:01:08 +01:00
}
pub fn set_frame_pointer_elimination(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
2018-01-05 07:04:08 +02:00
if cx.sess().must_not_eliminate_frame_pointers() {
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
2021-02-13 22:17:15 +11:00
cstr!("frame-pointer"),
cstr!("all"),
);
}
}
/// Tell LLVM what instrument function to insert.
#[inline]
fn set_instrument_function(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
if cx.sess().instrument_mcount() {
// Similar to `clang -pg` behavior. Handled by the
// `post-inline-ee-instrument` LLVM pass.
2019-03-29 06:44:31 +09:00
// The function name varies on platforms.
// See test/CodeGen/mcount.c in clang.
let mcount_name = CString::new(cx.sess().target.mcount.as_str().as_bytes()).unwrap();
2019-03-29 06:44:31 +09:00
llvm::AddFunctionAttrStringValue(
2019-12-22 17:42:04 -05:00
llfn,
llvm::AttributePlace::Function,
2021-02-13 22:17:15 +11:00
cstr!("instrument-function-entry-inlined"),
2019-12-22 17:42:04 -05:00
&mcount_name,
);
}
}
fn set_probestack(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
// Currently stack probes seem somewhat incompatible with the address
// sanitizer and thread sanitizer. With asan we're already protected from
// stack overflow anyway so we don't really need stack probes regardless.
if cx
.sess()
.opts
.debugging_opts
.sanitizer
.intersects(SanitizerSet::ADDRESS | SanitizerSet::THREAD)
{
return;
}
2019-05-28 16:48:03 +02:00
// probestack doesn't play nice either with `-C profile-generate`.
if cx.sess().opts.cg.profile_generate.enabled() {
return;
}
// probestack doesn't play nice either with gcov profiling.
if cx.sess().opts.debugging_opts.profile {
return;
}
let attr_value = match cx.sess().target.stack_probes {
StackProbeType::None => None,
// Request LLVM to generate the probes inline. If the given LLVM version does not support
// this, no probe is generated at all (even if the attribute is specified).
2021-02-13 22:17:15 +11:00
StackProbeType::Inline => Some(cstr!("inline-asm")),
// Flag our internal `__rust_probestack` function as the stack probe symbol.
// This is defined in the `compiler-builtins` crate for each architecture.
2021-02-13 22:17:15 +11:00
StackProbeType::Call => Some(cstr!("__rust_probestack")),
// Pick from the two above based on the LLVM version.
StackProbeType::InlineOrCall { min_llvm_version_for_inline } => {
if llvm_util::get_version() < min_llvm_version_for_inline {
2021-02-13 22:17:15 +11:00
Some(cstr!("__rust_probestack"))
} else {
2021-02-13 22:17:15 +11:00
Some(cstr!("inline-asm"))
}
}
};
if let Some(attr_value) = attr_value {
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
2021-02-13 22:17:15 +11:00
cstr!("probe-stack"),
attr_value,
);
}
}
pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
let target_cpu = SmallCStr::new(llvm_util::target_cpu(cx.tcx.sess));
llvm::AddFunctionAttrStringValue(
2019-12-22 17:42:04 -05:00
llfn,
llvm::AttributePlace::Function,
2021-02-13 22:17:15 +11:00
cstr!("target-cpu"),
2019-12-22 17:42:04 -05:00
target_cpu.as_c_str(),
);
}
pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
let tune_cpu = SmallCStr::new(tune);
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
2021-02-13 22:17:15 +11:00
cstr!("tune-cpu"),
tune_cpu.as_c_str(),
);
}
}
/// Sets the `NonLazyBind` LLVM attribute on a given function,
/// assuming the codegen options allow skipping the PLT.
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
// Don't generate calls through PLT if it's not necessary
if !sess.needs_plt() {
Attribute::NonLazyBind.apply_llfn(Function, llfn);
}
}
2019-01-19 00:37:52 +02:00
pub(crate) fn default_optimisation_attrs(sess: &Session, llfn: &'ll Value) {
match sess.opts.optimize {
OptLevel::Size => {
llvm::Attribute::MinSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
2019-12-22 17:42:04 -05:00
}
2019-01-19 00:37:52 +02:00
OptLevel::SizeMin => {
llvm::Attribute::MinSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
}
OptLevel::No => {
llvm::Attribute::MinSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
}
_ => {}
}
}
/// Composite function which sets LLVM attributes for function depending on its AST (`#[attribute]`)
/// attributes.
2020-03-31 08:27:09 -04:00
pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::Instance<'tcx>) {
let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id());
match codegen_fn_attrs.optimize {
OptimizeAttr::None => {
2019-01-19 00:37:52 +02:00
default_optimisation_attrs(cx.tcx.sess, llfn);
}
OptimizeAttr::Speed => {
llvm::Attribute::MinSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.unapply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
}
OptimizeAttr::Size => {
llvm::Attribute::MinSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn);
llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn);
}
}
let inline_attr = if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
InlineAttr::Never
} else if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
InlineAttr::Hint
} else {
codegen_fn_attrs.inline
};
inline(cx, llfn, inline_attr);
// The `uwtable` attribute according to LLVM is:
//
// This attribute indicates that the ABI being targeted requires that an
// unwind table entry be produced for this function even if we can show
// that no exceptions passes by it. This is normally the case for the
// ELF x86-64 abi, but it can be disabled for some compilation units.
//
// Typically when we're compiling with `-C panic=abort` (which implies this
// `no_landing_pads` check) we don't need `uwtable` because we can't
// generate any exceptions! On Windows, however, exceptions include other
// events such as illegal instructions, segfaults, etc. This means that on
// Windows we end up still needing the `uwtable` attribute even if the `-C
// panic=abort` flag is passed.
//
// You can also find more info on why Windows always requires uwtables here:
// https://bugzilla.mozilla.org/show_bug.cgi?id=1302078
if cx.sess().must_emit_unwind_tables() {
attributes::emit_uwtable(llfn, true);
}
2018-01-05 07:04:08 +02:00
set_frame_pointer_elimination(cx, llfn);
set_instrument_function(cx, llfn);
2018-01-05 07:04:08 +02:00
set_probestack(cx, llfn);
2018-05-08 16:10:16 +03:00
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
Attribute::Cold.apply_llfn(Function, llfn);
}
2019-02-09 15:55:30 +01:00
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_RETURNS_TWICE) {
Attribute::ReturnsTwice.apply_llfn(Function, llfn);
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_PURE) {
Attribute::ReadOnly.apply_llfn(Function, llfn);
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_CONST) {
Attribute::ReadNone.apply_llfn(Function, llfn);
}
2018-05-08 16:10:16 +03:00
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
naked(llfn, true);
}
2018-05-08 16:10:16 +03:00
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::ALLOCATOR) {
2019-12-22 17:42:04 -05:00
Attribute::NoAlias.apply_llfn(llvm::AttributePlace::ReturnValue, llfn);
}
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::CMSE_NONSECURE_ENTRY) {
2021-02-13 22:17:15 +11:00
llvm::AddFunctionAttrString(llfn, Function, cstr!("cmse_nonsecure_entry"));
}
sanitize(cx, codegen_fn_attrs.no_sanitize, llfn);
// Always annotate functions with the target-cpu they are compiled for.
// Without this, ThinLTO won't inline Rust functions into Clang generated
// functions (because Clang annotates functions this way too).
apply_target_cpu_attr(cx, llfn);
// tune-cpu is only conveyed through the attribute for our purpose.
// The target doesn't care; the subtarget reads our attribute.
apply_tune_cpu_attr(cx, llfn);
Adjust `-Ctarget-cpu=native` handling in cg_llvm When cg_llvm encounters the `-Ctarget-cpu=native` it computes an explciit set of features that applies to the target in order to correctly compile code for the host CPU (because e.g. `skylake` alone is not sufficient to tell if some of the instructions are available or not). However there were a couple of issues with how we did this. Firstly, the order in which features were overriden wasn't quite right – conceptually you'd expect `-Ctarget-cpu=native` option to override the features that are implicitly set by the target definition. However due to how other `-Ctarget-cpu` values are handled we must adopt the following order of priority: * Features from -Ctarget-cpu=*; are overriden by * Features implied by --target; are overriden by * Features from -Ctarget-feature; are overriden by * function specific features. Another problem was in that the function level `target-features` attribute would overwrite the entire set of the globally enabled features, rather than just the features the `#[target_feature(enable/disable)]` specified. With something like `-Ctarget-cpu=native` we'd end up in a situation wherein a function without `#[target_feature(enable)]` annotation would have a broader set of features compared to a function with one such attribute. This turned out to be a cause of heavy run-time regressions in some code using these function-level attributes in conjunction with `-Ctarget-cpu=native`, for example. With this PR rustc is more careful about specifying the entire set of features for functions that use `#[target_feature(enable/disable)]` or `#[instruction_set]` attributes. Sadly testing the original reproducer for this behaviour is quite impossible – we cannot rely on `-Ctarget-cpu=native` to be anything in particular on developer or CI machines.
2021-03-13 15:29:39 +02:00
let function_features = codegen_fn_attrs
.target_features
.iter()
.map(|f| {
2019-12-22 17:42:04 -05:00
let feature = &f.as_str();
format!("+{}", llvm_util::to_llvm_feature(cx.tcx.sess, feature))
Adjust `-Ctarget-cpu=native` handling in cg_llvm When cg_llvm encounters the `-Ctarget-cpu=native` it computes an explciit set of features that applies to the target in order to correctly compile code for the host CPU (because e.g. `skylake` alone is not sufficient to tell if some of the instructions are available or not). However there were a couple of issues with how we did this. Firstly, the order in which features were overriden wasn't quite right – conceptually you'd expect `-Ctarget-cpu=native` option to override the features that are implicitly set by the target definition. However due to how other `-Ctarget-cpu` values are handled we must adopt the following order of priority: * Features from -Ctarget-cpu=*; are overriden by * Features implied by --target; are overriden by * Features from -Ctarget-feature; are overriden by * function specific features. Another problem was in that the function level `target-features` attribute would overwrite the entire set of the globally enabled features, rather than just the features the `#[target_feature(enable/disable)]` specified. With something like `-Ctarget-cpu=native` we'd end up in a situation wherein a function without `#[target_feature(enable)]` annotation would have a broader set of features compared to a function with one such attribute. This turned out to be a cause of heavy run-time regressions in some code using these function-level attributes in conjunction with `-Ctarget-cpu=native`, for example. With this PR rustc is more careful about specifying the entire set of features for functions that use `#[target_feature(enable/disable)]` or `#[instruction_set]` attributes. Sadly testing the original reproducer for this behaviour is quite impossible – we cannot rely on `-Ctarget-cpu=native` to be anything in particular on developer or CI machines.
2021-03-13 15:29:39 +02:00
})
.chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
InstructionSetAttr::ArmA32 => "-thumb-mode".to_string(),
InstructionSetAttr::ArmT32 => "+thumb-mode".to_string(),
}))
Adjust `-Ctarget-cpu=native` handling in cg_llvm When cg_llvm encounters the `-Ctarget-cpu=native` it computes an explciit set of features that applies to the target in order to correctly compile code for the host CPU (because e.g. `skylake` alone is not sufficient to tell if some of the instructions are available or not). However there were a couple of issues with how we did this. Firstly, the order in which features were overriden wasn't quite right – conceptually you'd expect `-Ctarget-cpu=native` option to override the features that are implicitly set by the target definition. However due to how other `-Ctarget-cpu` values are handled we must adopt the following order of priority: * Features from -Ctarget-cpu=*; are overriden by * Features implied by --target; are overriden by * Features from -Ctarget-feature; are overriden by * function specific features. Another problem was in that the function level `target-features` attribute would overwrite the entire set of the globally enabled features, rather than just the features the `#[target_feature(enable/disable)]` specified. With something like `-Ctarget-cpu=native` we'd end up in a situation wherein a function without `#[target_feature(enable)]` annotation would have a broader set of features compared to a function with one such attribute. This turned out to be a cause of heavy run-time regressions in some code using these function-level attributes in conjunction with `-Ctarget-cpu=native`, for example. With this PR rustc is more careful about specifying the entire set of features for functions that use `#[target_feature(enable/disable)]` or `#[instruction_set]` attributes. Sadly testing the original reproducer for this behaviour is quite impossible – we cannot rely on `-Ctarget-cpu=native` to be anything in particular on developer or CI machines.
2021-03-13 15:29:39 +02:00
.collect::<Vec<String>>();
if !function_features.is_empty() {
let mut global_features = llvm_util::llvm_global_features(cx.tcx.sess);
global_features.extend(function_features.into_iter());
let features = global_features.join(",");
let val = CString::new(features).unwrap();
llvm::AddFunctionAttrStringValue(
2019-12-22 17:42:04 -05:00
llfn,
llvm::AttributePlace::Function,
2021-02-13 22:17:15 +11:00
cstr!("target-features"),
2019-12-22 17:42:04 -05:00
&val,
);
}
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
// Note that currently the `wasm-import-module` doesn't do anything, but
// eventually LLVM 7 should read this and ferry the appropriate import
// module to the output file.
if cx.tcx.sess.target.arch == "wasm32" {
if let Some(module) = wasm_import_module(cx.tcx, instance.def_id()) {
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
2021-02-13 22:17:15 +11:00
cstr!("wasm-import-module"),
&module,
);
Fix handling of wasm import modules and names The WebAssembly targets of rustc have weird issues around name mangling and import the same name from different modules. This all largely stems from the fact that we're using literal symbol names in LLVM IR to represent what a function is called when it's imported, and we're not using the wasm-specific `wasm-import-name` attribute. This in turn leads to two issues: * If, in the same codegen unit, the same FFI symbol is referenced twice then rustc, when translating to LLVM IR, will only reference one symbol from the first wasm module referenced. * There's also a bug in LLD [1] where even if two codegen units reference different modules, having the same symbol names means that LLD coalesces the symbols and only refers to one wasm module. Put another way, all our imported wasm symbols from the environment are keyed off their LLVM IR symbol name, which has lots of collisions today. This commit fixes the issue by implementing two changes: 1. All wasm symbols with `#[link(wasm_import_module = "...")]` are mangled by default in LLVM IR. This means they're all given unique names. 2. Symbols then use the `wasm-import-name` attribute to ensure that the WebAssembly file uses the correct import name. When put together this should ensure we don't trip over the LLD bug [1] and we also codegen IR correctly always referencing the right symbols with the right import module/name pairs. Closes #50021 Closes #56309 Closes #63562 [1]: https://bugs.llvm.org/show_bug.cgi?id=44316
2019-12-16 14:15:57 -08:00
2019-12-22 17:42:04 -05:00
let name =
codegen_fn_attrs.link_name.unwrap_or_else(|| cx.tcx.item_name(instance.def_id()));
Fix handling of wasm import modules and names The WebAssembly targets of rustc have weird issues around name mangling and import the same name from different modules. This all largely stems from the fact that we're using literal symbol names in LLVM IR to represent what a function is called when it's imported, and we're not using the wasm-specific `wasm-import-name` attribute. This in turn leads to two issues: * If, in the same codegen unit, the same FFI symbol is referenced twice then rustc, when translating to LLVM IR, will only reference one symbol from the first wasm module referenced. * There's also a bug in LLD [1] where even if two codegen units reference different modules, having the same symbol names means that LLD coalesces the symbols and only refers to one wasm module. Put another way, all our imported wasm symbols from the environment are keyed off their LLVM IR symbol name, which has lots of collisions today. This commit fixes the issue by implementing two changes: 1. All wasm symbols with `#[link(wasm_import_module = "...")]` are mangled by default in LLVM IR. This means they're all given unique names. 2. Symbols then use the `wasm-import-name` attribute to ensure that the WebAssembly file uses the correct import name. When put together this should ensure we don't trip over the LLD bug [1] and we also codegen IR correctly always referencing the right symbols with the right import module/name pairs. Closes #50021 Closes #56309 Closes #63562 [1]: https://bugs.llvm.org/show_bug.cgi?id=44316
2019-12-16 14:15:57 -08:00
let name = CString::new(&name.as_str()[..]).unwrap();
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
2021-02-13 22:17:15 +11:00
cstr!("wasm-import-name"),
Fix handling of wasm import modules and names The WebAssembly targets of rustc have weird issues around name mangling and import the same name from different modules. This all largely stems from the fact that we're using literal symbol names in LLVM IR to represent what a function is called when it's imported, and we're not using the wasm-specific `wasm-import-name` attribute. This in turn leads to two issues: * If, in the same codegen unit, the same FFI symbol is referenced twice then rustc, when translating to LLVM IR, will only reference one symbol from the first wasm module referenced. * There's also a bug in LLD [1] where even if two codegen units reference different modules, having the same symbol names means that LLD coalesces the symbols and only refers to one wasm module. Put another way, all our imported wasm symbols from the environment are keyed off their LLVM IR symbol name, which has lots of collisions today. This commit fixes the issue by implementing two changes: 1. All wasm symbols with `#[link(wasm_import_module = "...")]` are mangled by default in LLVM IR. This means they're all given unique names. 2. Symbols then use the `wasm-import-name` attribute to ensure that the WebAssembly file uses the correct import name. When put together this should ensure we don't trip over the LLD bug [1] and we also codegen IR correctly always referencing the right symbols with the right import module/name pairs. Closes #50021 Closes #56309 Closes #63562 [1]: https://bugs.llvm.org/show_bug.cgi?id=44316
2019-12-16 14:15:57 -08:00
&name,
);
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
}
}
}
pub fn provide_both(providers: &mut Providers) {
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
providers.wasm_import_module_map = |tcx, cnum| {
// Build up a map from DefId to a `NativeLib` structure, where
// `NativeLib` internally contains information about
// `#[link(wasm_import_module = "...")]` for example.
let native_libs = tcx.native_libraries(cnum);
2018-10-08 16:55:04 +02:00
2019-12-22 17:42:04 -05:00
let def_id_to_native_lib = native_libs
.iter()
.filter_map(|lib| lib.foreign_module.map(|id| (id, lib)))
.collect::<FxHashMap<_, _>>();
let mut ret = FxHashMap::default();
2020-10-27 15:01:03 +01:00
for (def_id, lib) in tcx.foreign_modules(cnum).iter() {
let module = def_id_to_native_lib.get(&def_id).and_then(|s| s.wasm_import_module);
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
let module = match module {
Some(s) => s,
None => continue,
};
2018-10-08 16:55:04 +02:00
ret.extend(lib.foreign_items.iter().map(|id| {
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
assert_eq!(id.krate, cnum);
2018-10-08 16:55:04 +02:00
(*id, module.to_string())
}));
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
}
2020-03-27 20:26:20 +01:00
ret
};
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
}
2019-06-14 00:48:52 +03:00
fn wasm_import_module(tcx: TyCtxt<'_>, id: DefId) -> Option<CString> {
2019-12-22 17:42:04 -05:00
tcx.wasm_import_module_map(id.krate).get(&id).map(|s| CString::new(&s[..]).unwrap())
rustc: Add a `#[wasm_import_module]` attribute This commit adds a new attribute to the Rust compiler specific to the wasm target (and no other targets). The `#[wasm_import_module]` attribute is used to specify the module that a name is imported from, and is used like so: #[wasm_import_module = "./foo.js"] extern { fn some_js_function(); } Here the import of the symbol `some_js_function` is tagged with the `./foo.js` module in the wasm output file. Wasm-the-format includes two fields on all imports, a module and a field. The field is the symbol name (`some_js_function` above) and the module has historically unconditionally been `"env"`. I'm not sure if this `"env"` convention has asm.js or LLVM roots, but regardless we'd like the ability to configure it! The proposed ES module integration with wasm (aka a wasm module is "just another ES module") requires that the import module of wasm imports is interpreted as an ES module import, meaning that you'll need to encode paths, NPM packages, etc. As a result, we'll need this to be something other than `"env"`! Unfortunately neither our version of LLVM nor LLD supports custom import modules (aka anything not `"env"`). My hope is that by the time LLVM 7 is released both will have support, but in the meantime this commit adds some primitive encoding/decoding of wasm files to the compiler. This way rustc postprocesses the wasm module that LLVM emits to ensure it's got all the imports we'd like to have in it. Eventually I'd ideally like to unconditionally require this attribute to be placed on all `extern { ... }` blocks. For now though it seemed prudent to add it as an unstable attribute, so for now it's not required (as that'd force usage of a feature gate). Hopefully it doesn't take too long to "stabilize" this! cc rust-lang-nursery/rust-wasm#29
2018-02-10 14:28:17 -08:00
}