2013-04-10 07:47:22 -05:00
|
|
|
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
|
2012-12-03 18:48:01 -06:00
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2013-05-17 17:28:44 -05:00
|
|
|
|
2012-12-13 15:05:22 -06:00
|
|
|
use back::{link, abi};
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
use lib::llvm::{Pointer, ValueRef};
|
2012-12-23 16:41:37 -06:00
|
|
|
use lib;
|
2012-12-13 15:05:22 -06:00
|
|
|
use middle::trans::base::*;
|
2013-01-25 16:56:56 -06:00
|
|
|
use middle::trans::cabi;
|
2013-04-01 19:47:38 -05:00
|
|
|
use middle::trans::cabi_x86;
|
|
|
|
use middle::trans::cabi_x86_64;
|
2013-02-26 05:24:15 -06:00
|
|
|
use middle::trans::cabi_arm;
|
2013-04-01 19:47:38 -05:00
|
|
|
use middle::trans::cabi_mips;
|
2012-12-13 15:05:22 -06:00
|
|
|
use middle::trans::build::*;
|
|
|
|
use middle::trans::callee::*;
|
|
|
|
use middle::trans::common::*;
|
|
|
|
use middle::trans::datum::*;
|
2013-03-26 15:38:07 -05:00
|
|
|
use middle::trans::expr::Ignore;
|
2013-02-25 13:11:21 -06:00
|
|
|
use middle::trans::machine::llsize_of;
|
2012-12-23 16:41:37 -06:00
|
|
|
use middle::trans::glue;
|
|
|
|
use middle::trans::machine;
|
2012-12-13 15:05:22 -06:00
|
|
|
use middle::trans::type_of::*;
|
2012-12-23 16:41:37 -06:00
|
|
|
use middle::trans::type_of;
|
2013-02-25 13:11:21 -06:00
|
|
|
use middle::ty;
|
2013-04-26 21:13:38 -05:00
|
|
|
use middle::ty::FnSig;
|
2012-12-13 15:05:22 -06:00
|
|
|
use util::ppaux::ty_to_str;
|
|
|
|
|
2013-07-27 00:50:20 -05:00
|
|
|
use std::cell::Cell;
|
2013-06-28 17:32:26 -05:00
|
|
|
use std::uint;
|
|
|
|
use std::vec;
|
2012-09-04 13:54:36 -05:00
|
|
|
use syntax::codemap::span;
|
|
|
|
use syntax::{ast, ast_util};
|
2012-12-13 15:05:22 -06:00
|
|
|
use syntax::{attr, ast_map};
|
2013-03-27 11:55:18 -05:00
|
|
|
use syntax::opt_vec;
|
2012-12-23 16:41:37 -06:00
|
|
|
use syntax::parse::token::special_idents;
|
2013-05-14 19:27:27 -05:00
|
|
|
use syntax::parse::token;
|
2013-04-17 11:15:37 -05:00
|
|
|
use syntax::abi::{X86, X86_64, Arm, Mips};
|
2013-03-13 21:25:28 -05:00
|
|
|
use syntax::abi::{RustIntrinsic, Rust, Stdcall, Fastcall,
|
|
|
|
Cdecl, Aapcs, C};
|
2013-06-16 05:52:44 -05:00
|
|
|
use middle::trans::type_::Type;
|
2012-02-13 16:59:05 -06:00
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn abi_info(ccx: @mut CrateContext) -> @cabi::ABIInfo {
|
2013-04-01 19:47:38 -05:00
|
|
|
return match ccx.sess.targ_cfg.arch {
|
|
|
|
X86 => cabi_x86::abi_info(ccx),
|
|
|
|
X86_64 => cabi_x86_64::abi_info(),
|
2013-03-13 21:25:28 -05:00
|
|
|
Arm => cabi_arm::abi_info(),
|
2013-04-01 19:47:38 -05:00
|
|
|
Mips => cabi_mips::abi_info(),
|
2012-03-20 13:44:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-27 08:04:22 -05:00
|
|
|
pub fn link_name(ccx: &CrateContext, i: &ast::foreign_item) -> @str {
|
2013-05-19 00:07:44 -05:00
|
|
|
match attr::first_attr_value_str_by_name(i.attrs, "link_name") {
|
2013-02-10 18:33:16 -06:00
|
|
|
None => ccx.sess.str_of(i.ident),
|
2013-02-16 12:16:32 -06:00
|
|
|
Some(ln) => ln,
|
2012-02-13 16:59:05 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-08 19:44:37 -06:00
|
|
|
struct ShimTypes {
|
|
|
|
fn_sig: ty::FnSig,
|
|
|
|
|
|
|
|
/// LLVM types that will appear on the foreign function
|
|
|
|
llsig: LlvmSignature,
|
|
|
|
|
|
|
|
/// True if there is a return value (not bottom, not unit)
|
2012-02-13 16:59:05 -06:00
|
|
|
ret_def: bool,
|
2013-03-08 19:44:37 -06:00
|
|
|
|
|
|
|
/// Type of the struct we will use to shuttle values back and forth.
|
|
|
|
/// This is always derived from the llsig.
|
2013-06-15 22:45:48 -05:00
|
|
|
bundle_ty: Type,
|
2013-03-08 19:44:37 -06:00
|
|
|
|
|
|
|
/// Type of the shim function itself.
|
2013-06-15 22:45:48 -05:00
|
|
|
shim_fn_ty: Type,
|
2013-03-08 19:44:37 -06:00
|
|
|
|
|
|
|
/// Adapter object for handling native ABI rules (trust me, you
|
|
|
|
/// don't want to know).
|
2013-01-25 16:56:56 -06:00
|
|
|
fn_ty: cabi::FnType
|
2013-02-19 01:40:42 -06:00
|
|
|
}
|
2012-02-13 16:59:05 -06:00
|
|
|
|
2013-03-08 19:44:37 -06:00
|
|
|
struct LlvmSignature {
|
2013-06-15 22:45:48 -05:00
|
|
|
llarg_tys: ~[Type],
|
|
|
|
llret_ty: Type,
|
2013-04-18 17:53:29 -05:00
|
|
|
sret: bool,
|
2012-02-13 16:59:05 -06:00
|
|
|
}
|
|
|
|
|
2013-06-27 08:04:22 -05:00
|
|
|
fn foreign_signature(ccx: &mut CrateContext, fn_sig: &ty::FnSig)
|
2013-04-18 17:53:29 -05:00
|
|
|
-> LlvmSignature {
|
2013-03-08 19:44:37 -06:00
|
|
|
/*!
|
|
|
|
* The ForeignSignature is the LLVM types of the arguments/return type
|
|
|
|
* of a function. Note that these LLVM types are not quite the same
|
|
|
|
* as the LLVM types would be for a native Rust function because foreign
|
|
|
|
* functions just plain ignore modes. They also don't pass aggregate
|
|
|
|
* values by pointer like we do.
|
|
|
|
*/
|
|
|
|
|
2013-04-26 21:13:38 -05:00
|
|
|
let llarg_tys = fn_sig.inputs.map(|arg_ty| type_of(ccx, *arg_ty));
|
2013-03-08 19:44:37 -06:00
|
|
|
let llret_ty = type_of::type_of(ccx, fn_sig.output);
|
2013-04-18 17:53:29 -05:00
|
|
|
LlvmSignature {
|
|
|
|
llarg_tys: llarg_tys,
|
|
|
|
llret_ty: llret_ty,
|
2013-06-30 23:02:14 -05:00
|
|
|
sret: !ty::type_is_immediate(ccx.tcx, fn_sig.output),
|
2013-04-18 17:53:29 -05:00
|
|
|
}
|
2013-03-08 19:44:37 -06:00
|
|
|
}
|
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn shim_types(ccx: @mut CrateContext, id: ast::node_id) -> ShimTypes {
|
2013-03-08 19:44:37 -06:00
|
|
|
let fn_sig = match ty::get(ty::node_id_to_type(ccx.tcx, id)).sty {
|
2013-07-02 14:47:32 -05:00
|
|
|
ty::ty_bare_fn(ref fn_ty) => fn_ty.sig.clone(),
|
2013-05-19 00:07:44 -05:00
|
|
|
_ => ccx.sess.bug("c_arg_and_ret_lltys called on non-function type")
|
2013-03-08 19:44:37 -06:00
|
|
|
};
|
|
|
|
let llsig = foreign_signature(ccx, &fn_sig);
|
2013-07-02 21:13:00 -05:00
|
|
|
let bundle_ty = Type::struct_(llsig.llarg_tys + &[llsig.llret_ty.ptr_to()], false);
|
2013-04-18 17:53:29 -05:00
|
|
|
let ret_def = !ty::type_is_bot(fn_sig.output) &&
|
|
|
|
!ty::type_is_nil(fn_sig.output);
|
2013-06-15 22:45:48 -05:00
|
|
|
let fn_ty = abi_info(ccx).compute_info(llsig.llarg_tys, llsig.llret_ty, ret_def);
|
2013-03-08 19:44:37 -06:00
|
|
|
ShimTypes {
|
|
|
|
fn_sig: fn_sig,
|
|
|
|
llsig: llsig,
|
2012-03-20 13:44:28 -05:00
|
|
|
ret_def: ret_def,
|
2012-02-13 18:06:56 -06:00
|
|
|
bundle_ty: bundle_ty,
|
2013-06-16 05:52:44 -05:00
|
|
|
shim_fn_ty: Type::func([bundle_ty.ptr_to()], &Type::void()),
|
2013-01-25 16:56:56 -06:00
|
|
|
fn_ty: fn_ty
|
2013-03-08 19:44:37 -06:00
|
|
|
}
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2013-03-08 19:44:37 -06:00
|
|
|
type shim_arg_builder<'self> =
|
2013-07-17 05:12:08 -05:00
|
|
|
&'self fn(bcx: @mut Block, tys: &ShimTypes,
|
2013-03-08 19:44:37 -06:00
|
|
|
llargbundle: ValueRef) -> ~[ValueRef];
|
2012-02-13 18:06:56 -06:00
|
|
|
|
2013-03-08 19:44:37 -06:00
|
|
|
type shim_ret_builder<'self> =
|
2013-07-17 05:12:08 -05:00
|
|
|
&'self fn(bcx: @mut Block, tys: &ShimTypes,
|
2013-03-08 19:44:37 -06:00
|
|
|
llargbundle: ValueRef,
|
|
|
|
llretval: ValueRef);
|
2012-02-13 18:06:56 -06:00
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_shim_fn_(ccx: @mut CrateContext,
|
2013-06-27 08:04:22 -05:00
|
|
|
shim_name: &str,
|
2012-02-13 18:06:56 -06:00
|
|
|
llbasefn: ValueRef,
|
2013-03-08 19:44:37 -06:00
|
|
|
tys: &ShimTypes,
|
2012-02-13 18:06:56 -06:00
|
|
|
cc: lib::llvm::CallConv,
|
|
|
|
arg_builder: shim_arg_builder,
|
2013-04-18 17:53:29 -05:00
|
|
|
ret_builder: shim_ret_builder)
|
|
|
|
-> ValueRef {
|
2012-02-13 18:06:56 -06:00
|
|
|
let llshimfn = decl_internal_cdecl_fn(
|
|
|
|
ccx.llmod, shim_name, tys.shim_fn_ty);
|
|
|
|
|
|
|
|
// Declare the body of the shim function:
|
2013-05-27 20:33:57 -05:00
|
|
|
let fcx = new_fn_ctxt(ccx, ~[], llshimfn, tys.fn_sig.output, None);
|
2013-07-21 09:19:34 -05:00
|
|
|
let bcx = fcx.entry_bcx.get();
|
2013-05-22 01:17:04 -05:00
|
|
|
|
2012-03-21 09:42:20 -05:00
|
|
|
let llargbundle = get_param(llshimfn, 0u);
|
2012-02-13 18:06:56 -06:00
|
|
|
let llargvals = arg_builder(bcx, tys, llargbundle);
|
|
|
|
|
|
|
|
// Create the call itself and store the return value:
|
2013-03-08 19:44:37 -06:00
|
|
|
let llretval = CallWithConv(bcx, llbasefn, llargvals, cc);
|
2012-02-13 18:06:56 -06:00
|
|
|
|
|
|
|
ret_builder(bcx, tys, llargbundle, llretval);
|
|
|
|
|
2013-04-18 17:53:29 -05:00
|
|
|
// Don't finish up the function in the usual way, because this doesn't
|
|
|
|
// follow the normal Rust calling conventions.
|
2013-07-12 20:25:46 -05:00
|
|
|
let ret_cx = match fcx.llreturn {
|
|
|
|
Some(llreturn) => raw_block(fcx, false, llreturn),
|
|
|
|
None => bcx
|
|
|
|
};
|
2013-06-20 09:42:44 -05:00
|
|
|
RetVoid(ret_cx);
|
2013-07-21 09:19:34 -05:00
|
|
|
fcx.cleanup();
|
2012-02-13 18:06:56 -06:00
|
|
|
|
2012-08-01 19:30:05 -05:00
|
|
|
return llshimfn;
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
type wrap_arg_builder<'self> = &'self fn(bcx: @mut Block,
|
2013-04-18 17:53:29 -05:00
|
|
|
tys: &ShimTypes,
|
|
|
|
llwrapfn: ValueRef,
|
2013-05-27 20:33:57 -05:00
|
|
|
llargbundle: ValueRef);
|
2012-02-13 18:06:56 -06:00
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
type wrap_ret_builder<'self> = &'self fn(bcx: @mut Block,
|
2013-04-18 17:53:29 -05:00
|
|
|
tys: &ShimTypes,
|
|
|
|
llargbundle: ValueRef);
|
2012-02-13 18:06:56 -06:00
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_wrap_fn_(ccx: @mut CrateContext,
|
2013-03-08 19:44:37 -06:00
|
|
|
tys: &ShimTypes,
|
2012-02-13 18:06:56 -06:00
|
|
|
llshimfn: ValueRef,
|
|
|
|
llwrapfn: ValueRef,
|
|
|
|
shim_upcall: ValueRef,
|
2013-04-19 13:50:03 -05:00
|
|
|
needs_c_return: bool,
|
2012-02-13 18:06:56 -06:00
|
|
|
arg_builder: wrap_arg_builder,
|
2013-04-18 17:53:29 -05:00
|
|
|
ret_builder: wrap_ret_builder) {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::build_wrap_fn_");
|
2013-05-27 20:33:57 -05:00
|
|
|
let fcx = new_fn_ctxt(ccx, ~[], llwrapfn, tys.fn_sig.output, None);
|
2013-07-21 09:19:34 -05:00
|
|
|
let bcx = fcx.entry_bcx.get();
|
2013-04-18 17:53:29 -05:00
|
|
|
|
2013-04-19 13:50:03 -05:00
|
|
|
// Patch up the return type if it's not immediate and we're returning via
|
|
|
|
// the C ABI.
|
2013-06-30 23:02:14 -05:00
|
|
|
if needs_c_return && !ty::type_is_immediate(ccx.tcx, tys.fn_sig.output) {
|
2013-06-13 02:19:50 -05:00
|
|
|
let lloutputtype = type_of::type_of(fcx.ccx, tys.fn_sig.output);
|
2013-07-21 09:19:34 -05:00
|
|
|
fcx.llretptr = Some(alloca(bcx, lloutputtype, ""));
|
2013-04-19 13:50:03 -05:00
|
|
|
}
|
2013-04-18 17:53:29 -05:00
|
|
|
|
2012-02-13 18:06:56 -06:00
|
|
|
// Allocate the struct and write the arguments into it.
|
2013-06-20 14:21:37 -05:00
|
|
|
let llargbundle = alloca(bcx, tys.bundle_ty, "__llargbundle");
|
2013-05-27 20:33:57 -05:00
|
|
|
arg_builder(bcx, tys, llwrapfn, llargbundle);
|
2012-02-13 18:06:56 -06:00
|
|
|
|
|
|
|
// Create call itself.
|
2013-06-15 22:45:48 -05:00
|
|
|
let llshimfnptr = PointerCast(bcx, llshimfn, Type::i8p());
|
|
|
|
let llrawargbundle = PointerCast(bcx, llargbundle, Type::i8p());
|
2013-05-19 00:07:44 -05:00
|
|
|
Call(bcx, shim_upcall, [llrawargbundle, llshimfnptr]);
|
2012-02-13 18:06:56 -06:00
|
|
|
ret_builder(bcx, tys, llargbundle);
|
|
|
|
|
2013-04-18 17:53:29 -05:00
|
|
|
// Then return according to the C ABI.
|
2013-07-12 20:25:46 -05:00
|
|
|
let return_context = match fcx.llreturn {
|
|
|
|
Some(llreturn) => raw_block(fcx, false, llreturn),
|
|
|
|
None => bcx
|
|
|
|
};
|
2013-06-16 06:11:17 -05:00
|
|
|
|
|
|
|
let llfunctiontype = val_ty(llwrapfn);
|
|
|
|
let llfunctiontype = llfunctiontype.element_type();
|
|
|
|
let return_type = llfunctiontype.return_type();
|
|
|
|
if return_type.kind() == ::lib::llvm::Void {
|
|
|
|
// XXX: This might be wrong if there are any functions for which
|
|
|
|
// the C ABI specifies a void output pointer and the Rust ABI
|
|
|
|
// does not.
|
|
|
|
RetVoid(return_context);
|
|
|
|
} else {
|
|
|
|
// Cast if we have to...
|
|
|
|
// XXX: This is ugly.
|
|
|
|
let llretptr = BitCast(return_context, fcx.llretptr.get(), return_type.ptr_to());
|
|
|
|
Ret(return_context, Load(return_context, llretptr));
|
2013-04-18 17:53:29 -05:00
|
|
|
}
|
2013-07-21 09:19:34 -05:00
|
|
|
fcx.cleanup();
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2012-07-03 18:11:00 -05:00
|
|
|
// For each foreign function F, we generate a wrapper function W and a shim
|
2012-02-13 16:59:05 -06:00
|
|
|
// function S that all work together. The wrapper function W is the function
|
|
|
|
// that other rust code actually invokes. Its job is to marshall the
|
|
|
|
// arguments into a struct. It then uses a small bit of assembly to switch
|
|
|
|
// over to the C stack and invoke the shim function. The shim function S then
|
|
|
|
// unpacks the arguments from the struct and invokes the actual function F
|
|
|
|
// according to its specified calling convention.
|
|
|
|
//
|
2012-07-03 18:11:00 -05:00
|
|
|
// Example: Given a foreign c-stack function F(x: X, y: Y) -> Z,
|
2012-02-13 16:59:05 -06:00
|
|
|
// we generate a wrapper function W that looks like:
|
|
|
|
//
|
|
|
|
// void W(Z* dest, void *env, X x, Y y) {
|
|
|
|
// struct { X x; Y y; Z *z; } args = { x, y, z };
|
|
|
|
// call_on_c_stack_shim(S, &args);
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// The shim function S then looks something like:
|
|
|
|
//
|
|
|
|
// void S(struct { X x; Y y; Z *z; } *args) {
|
|
|
|
// *args->z = F(args->x, args->y);
|
|
|
|
// }
|
|
|
|
//
|
|
|
|
// However, if the return type of F is dynamically sized or of aggregate type,
|
|
|
|
// the shim function looks like:
|
|
|
|
//
|
|
|
|
// void S(struct { X x; Y y; Z *z; } *args) {
|
|
|
|
// F(args->z, args->x, args->y);
|
|
|
|
// }
|
|
|
|
//
|
2013-03-08 19:44:37 -06:00
|
|
|
// Note: on i386, the layout of the args struct is generally the same
|
|
|
|
// as the desired layout of the arguments on the C stack. Therefore,
|
|
|
|
// we could use upcall_alloc_c_stack() to allocate the `args`
|
|
|
|
// structure and switch the stack pointer appropriately to avoid a
|
|
|
|
// round of copies. (In fact, the shim function itself is
|
|
|
|
// unnecessary). We used to do this, in fact, and will perhaps do so
|
|
|
|
// in the future.
|
2013-06-13 02:19:50 -05:00
|
|
|
pub fn trans_foreign_mod(ccx: @mut CrateContext,
|
2013-03-13 21:25:28 -05:00
|
|
|
path: &ast_map::path,
|
2013-04-18 17:53:29 -05:00
|
|
|
foreign_mod: &ast::foreign_mod) {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::trans_foreign_mod");
|
2012-03-22 15:44:20 -05:00
|
|
|
|
2013-03-13 21:25:28 -05:00
|
|
|
let arch = ccx.sess.targ_cfg.arch;
|
|
|
|
let abi = match foreign_mod.abis.for_arch(arch) {
|
|
|
|
None => {
|
|
|
|
ccx.sess.fatal(
|
|
|
|
fmt!("No suitable ABI for target architecture \
|
|
|
|
in module %s",
|
|
|
|
ast_map::path_to_str(*path,
|
|
|
|
ccx.sess.intr())));
|
|
|
|
}
|
|
|
|
|
|
|
|
Some(abi) => abi,
|
2013-03-08 19:44:37 -06:00
|
|
|
};
|
|
|
|
|
2013-06-21 07:29:53 -05:00
|
|
|
for foreign_mod.items.iter().advance |&foreign_item| {
|
2013-03-08 19:44:37 -06:00
|
|
|
match foreign_item.node {
|
|
|
|
ast::foreign_item_fn(*) => {
|
|
|
|
let id = foreign_item.id;
|
2013-03-13 21:25:28 -05:00
|
|
|
match abi {
|
|
|
|
RustIntrinsic => {
|
|
|
|
// Intrinsics are emitted by monomorphic fn
|
|
|
|
}
|
|
|
|
|
|
|
|
Rust => {
|
|
|
|
// FIXME(#3678) Implement linking to foreign fns with Rust ABI
|
|
|
|
ccx.sess.unimpl(
|
|
|
|
fmt!("Foreign functions with Rust ABI"));
|
|
|
|
}
|
|
|
|
|
|
|
|
Stdcall => {
|
|
|
|
build_foreign_fn(ccx, id, foreign_item,
|
|
|
|
lib::llvm::X86StdcallCallConv);
|
|
|
|
}
|
|
|
|
|
|
|
|
Fastcall => {
|
|
|
|
build_foreign_fn(ccx, id, foreign_item,
|
|
|
|
lib::llvm::X86FastcallCallConv);
|
|
|
|
}
|
|
|
|
|
|
|
|
Cdecl => {
|
|
|
|
// FIXME(#3678) should really be more specific
|
|
|
|
build_foreign_fn(ccx, id, foreign_item,
|
|
|
|
lib::llvm::CCallConv);
|
|
|
|
}
|
|
|
|
|
|
|
|
Aapcs => {
|
|
|
|
// FIXME(#3678) should really be more specific
|
|
|
|
build_foreign_fn(ccx, id, foreign_item,
|
|
|
|
lib::llvm::CCallConv);
|
|
|
|
}
|
|
|
|
|
|
|
|
C => {
|
|
|
|
build_foreign_fn(ccx, id, foreign_item,
|
|
|
|
lib::llvm::CCallConv);
|
2013-03-08 19:44:37 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-06-22 00:46:27 -05:00
|
|
|
ast::foreign_item_static(*) => {
|
2013-06-04 01:00:49 -05:00
|
|
|
let ident = token::ident_to_str(&foreign_item.ident);
|
2013-06-12 12:02:55 -05:00
|
|
|
ccx.item_symbols.insert(foreign_item.id, /* bad */ident.to_owned());
|
2013-03-08 19:44:37 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_foreign_fn(ccx: @mut CrateContext,
|
2013-03-13 21:25:28 -05:00
|
|
|
id: ast::node_id,
|
|
|
|
foreign_item: @ast::foreign_item,
|
2013-04-18 17:53:29 -05:00
|
|
|
cc: lib::llvm::CallConv) {
|
2013-03-13 21:25:28 -05:00
|
|
|
let llwrapfn = get_item_val(ccx, id);
|
|
|
|
let tys = shim_types(ccx, id);
|
2013-07-19 06:51:37 -05:00
|
|
|
if attr::contains_name(foreign_item.attrs, "rust_stack") {
|
2013-03-13 21:25:28 -05:00
|
|
|
build_direct_fn(ccx, llwrapfn, foreign_item,
|
|
|
|
&tys, cc);
|
2013-07-19 06:51:37 -05:00
|
|
|
} else if attr::contains_name(foreign_item.attrs, "fast_ffi") {
|
2013-03-29 18:55:04 -05:00
|
|
|
build_fast_ffi_fn(ccx, llwrapfn, foreign_item, &tys, cc);
|
2013-03-13 21:25:28 -05:00
|
|
|
} else {
|
2013-04-18 17:53:29 -05:00
|
|
|
let llshimfn = build_shim_fn(ccx, foreign_item, &tys, cc);
|
2013-03-13 21:25:28 -05:00
|
|
|
build_wrap_fn(ccx, &tys, llshimfn, llwrapfn);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_shim_fn(ccx: @mut CrateContext,
|
2013-06-27 08:04:22 -05:00
|
|
|
foreign_item: &ast::foreign_item,
|
2013-03-08 19:44:37 -06:00
|
|
|
tys: &ShimTypes,
|
2013-04-18 17:53:29 -05:00
|
|
|
cc: lib::llvm::CallConv)
|
|
|
|
-> ValueRef {
|
2013-03-08 19:44:37 -06:00
|
|
|
/*!
|
|
|
|
*
|
|
|
|
* Build S, from comment above:
|
|
|
|
*
|
|
|
|
* void S(struct { X x; Y y; Z *z; } *args) {
|
|
|
|
* F(args->z, args->x, args->y);
|
|
|
|
* }
|
|
|
|
*/
|
2012-02-13 16:59:05 -06:00
|
|
|
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::build_shim_fn");
|
2012-03-22 15:44:20 -05:00
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn build_args(bcx: @mut Block, tys: &ShimTypes, llargbundle: ValueRef)
|
2013-04-18 17:53:29 -05:00
|
|
|
-> ~[ValueRef] {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::shim::build_args");
|
2013-04-18 17:53:29 -05:00
|
|
|
tys.fn_ty.build_shim_args(bcx, tys.llsig.llarg_tys, llargbundle)
|
2012-02-13 16:59:05 -06:00
|
|
|
}
|
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn build_ret(bcx: @mut Block,
|
2013-04-18 17:53:29 -05:00
|
|
|
tys: &ShimTypes,
|
|
|
|
llargbundle: ValueRef,
|
|
|
|
llretval: ValueRef) {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::shim::build_ret");
|
2013-04-18 17:53:29 -05:00
|
|
|
tys.fn_ty.build_shim_ret(bcx,
|
|
|
|
tys.llsig.llarg_tys,
|
|
|
|
tys.ret_def,
|
|
|
|
llargbundle,
|
|
|
|
llretval);
|
2012-02-13 16:59:05 -06:00
|
|
|
}
|
|
|
|
|
2012-07-18 18:18:02 -05:00
|
|
|
let lname = link_name(ccx, foreign_item);
|
2013-06-12 12:02:55 -05:00
|
|
|
let llbasefn = base_fn(ccx, lname, tys, cc);
|
2012-02-13 18:06:56 -06:00
|
|
|
// Name the shim function
|
2013-06-12 12:02:55 -05:00
|
|
|
let shim_name = fmt!("%s__c_stack_shim", lname);
|
2013-04-18 17:53:29 -05:00
|
|
|
build_shim_fn_(ccx,
|
|
|
|
shim_name,
|
|
|
|
llbasefn,
|
|
|
|
tys,
|
|
|
|
cc,
|
|
|
|
build_args,
|
|
|
|
build_ret)
|
2012-02-13 16:59:05 -06:00
|
|
|
}
|
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn base_fn(ccx: &CrateContext,
|
2013-04-18 17:53:29 -05:00
|
|
|
lname: &str,
|
|
|
|
tys: &ShimTypes,
|
|
|
|
cc: lib::llvm::CallConv)
|
|
|
|
-> ValueRef {
|
2012-03-22 07:44:16 -05:00
|
|
|
// Declare the "prototype" for the base function F:
|
2013-01-25 16:56:56 -06:00
|
|
|
do tys.fn_ty.decl_fn |fnty| {
|
2013-02-16 12:16:32 -06:00
|
|
|
decl_fn(ccx.llmod, lname, cc, fnty)
|
2012-03-22 07:44:16 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-06-21 18:44:10 -05:00
|
|
|
// FIXME (#2535): this is very shaky and probably gets ABIs wrong all
|
|
|
|
// over the place
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_direct_fn(ccx: @mut CrateContext,
|
2013-04-18 17:53:29 -05:00
|
|
|
decl: ValueRef,
|
2013-06-27 08:04:22 -05:00
|
|
|
item: &ast::foreign_item,
|
2013-04-18 17:53:29 -05:00
|
|
|
tys: &ShimTypes,
|
2012-03-22 07:44:16 -05:00
|
|
|
cc: lib::llvm::CallConv) {
|
2013-06-12 12:02:55 -05:00
|
|
|
debug!("build_direct_fn(%s)", link_name(ccx, item));
|
2013-03-29 18:55:04 -05:00
|
|
|
|
2013-05-27 20:33:57 -05:00
|
|
|
let fcx = new_fn_ctxt(ccx, ~[], decl, tys.fn_sig.output, None);
|
2013-07-21 09:19:34 -05:00
|
|
|
let bcx = fcx.entry_bcx.get();
|
2013-06-12 12:02:55 -05:00
|
|
|
let llbasefn = base_fn(ccx, link_name(ccx, item), tys, cc);
|
2013-03-29 18:55:04 -05:00
|
|
|
let ty = ty::lookup_item_type(ccx.tcx,
|
|
|
|
ast_util::local_def(item.id)).ty;
|
2013-05-22 01:17:04 -05:00
|
|
|
let ret_ty = ty::ty_fn_ret(ty);
|
2013-03-29 18:55:04 -05:00
|
|
|
let args = vec::from_fn(ty::ty_fn_args(ty).len(), |i| {
|
2013-05-27 20:33:57 -05:00
|
|
|
get_param(decl, fcx.arg_pos(i))
|
2013-03-29 18:55:04 -05:00
|
|
|
});
|
|
|
|
let retval = Call(bcx, llbasefn, args);
|
|
|
|
if !ty::type_is_nil(ret_ty) && !ty::type_is_bot(ret_ty) {
|
2013-04-18 17:53:29 -05:00
|
|
|
Store(bcx, retval, fcx.llretptr.get());
|
2013-03-29 18:55:04 -05:00
|
|
|
}
|
2013-07-21 09:19:34 -05:00
|
|
|
finish_fn(fcx, bcx);
|
2013-03-29 18:55:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME (#2535): this is very shaky and probably gets ABIs wrong all
|
|
|
|
// over the place
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_fast_ffi_fn(ccx: @mut CrateContext,
|
2013-03-29 18:55:04 -05:00
|
|
|
decl: ValueRef,
|
2013-06-27 08:04:22 -05:00
|
|
|
item: &ast::foreign_item,
|
2013-03-29 18:55:04 -05:00
|
|
|
tys: &ShimTypes,
|
|
|
|
cc: lib::llvm::CallConv) {
|
2013-06-12 12:02:55 -05:00
|
|
|
debug!("build_fast_ffi_fn(%s)", link_name(ccx, item));
|
2013-03-29 18:55:04 -05:00
|
|
|
|
2013-05-27 20:33:57 -05:00
|
|
|
let fcx = new_fn_ctxt(ccx, ~[], decl, tys.fn_sig.output, None);
|
2013-07-21 09:19:34 -05:00
|
|
|
let bcx = fcx.entry_bcx.get();
|
2013-06-12 12:02:55 -05:00
|
|
|
let llbasefn = base_fn(ccx, link_name(ccx, item), tys, cc);
|
2013-03-29 18:55:04 -05:00
|
|
|
set_no_inline(fcx.llfn);
|
|
|
|
set_fixed_stack_segment(fcx.llfn);
|
2012-03-22 07:44:16 -05:00
|
|
|
let ty = ty::lookup_item_type(ccx.tcx,
|
|
|
|
ast_util::local_def(item.id)).ty;
|
2013-05-22 01:17:04 -05:00
|
|
|
let ret_ty = ty::ty_fn_ret(ty);
|
2012-06-30 18:19:07 -05:00
|
|
|
let args = vec::from_fn(ty::ty_fn_args(ty).len(), |i| {
|
2013-05-27 20:33:57 -05:00
|
|
|
get_param(decl, fcx.arg_pos(i))
|
2012-03-22 07:44:16 -05:00
|
|
|
});
|
|
|
|
let retval = Call(bcx, llbasefn, args);
|
2013-03-29 18:55:04 -05:00
|
|
|
if !ty::type_is_nil(ret_ty) && !ty::type_is_bot(ret_ty) {
|
2013-04-18 17:53:29 -05:00
|
|
|
Store(bcx, retval, fcx.llretptr.get());
|
2012-03-22 07:44:16 -05:00
|
|
|
}
|
2013-07-21 09:19:34 -05:00
|
|
|
finish_fn(fcx, bcx);
|
2012-03-22 07:44:16 -05:00
|
|
|
}
|
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_wrap_fn(ccx: @mut CrateContext,
|
2013-03-08 19:44:37 -06:00
|
|
|
tys: &ShimTypes,
|
2012-02-13 16:59:05 -06:00
|
|
|
llshimfn: ValueRef,
|
|
|
|
llwrapfn: ValueRef) {
|
2013-03-08 19:44:37 -06:00
|
|
|
/*!
|
|
|
|
*
|
|
|
|
* Build W, from comment above:
|
|
|
|
*
|
|
|
|
* void W(Z* dest, void *env, X x, Y y) {
|
|
|
|
* struct { X x; Y y; Z *z; } args = { x, y, z };
|
|
|
|
* call_on_c_stack_shim(S, &args);
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* One thing we have to be very careful of is to
|
|
|
|
* account for the Rust modes.
|
|
|
|
*/
|
2012-02-13 18:06:56 -06:00
|
|
|
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::build_wrap_fn");
|
2012-03-22 15:44:20 -05:00
|
|
|
|
2013-04-18 17:53:29 -05:00
|
|
|
build_wrap_fn_(ccx,
|
|
|
|
tys,
|
|
|
|
llshimfn,
|
|
|
|
llwrapfn,
|
2013-03-08 19:44:37 -06:00
|
|
|
ccx.upcalls.call_shim_on_c_stack,
|
2013-04-19 13:50:03 -05:00
|
|
|
false,
|
2013-04-18 17:53:29 -05:00
|
|
|
build_args,
|
|
|
|
build_ret);
|
2013-03-08 19:44:37 -06:00
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn build_args(bcx: @mut Block,
|
2013-04-18 17:53:29 -05:00
|
|
|
tys: &ShimTypes,
|
|
|
|
llwrapfn: ValueRef,
|
2013-05-27 20:33:57 -05:00
|
|
|
llargbundle: ValueRef) {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::wrap::build_args");
|
2013-03-08 19:44:37 -06:00
|
|
|
let ccx = bcx.ccx();
|
2013-05-14 04:52:12 -05:00
|
|
|
let n = tys.llsig.llarg_tys.len();
|
2013-03-08 19:44:37 -06:00
|
|
|
for uint::range(0, n) |i| {
|
2013-05-27 20:33:57 -05:00
|
|
|
let arg_i = bcx.fcx.arg_pos(i);
|
|
|
|
let mut llargval = get_param(llwrapfn, arg_i);
|
2013-03-08 19:44:37 -06:00
|
|
|
|
|
|
|
// In some cases, Rust will pass a pointer which the
|
|
|
|
// native C type doesn't have. In that case, just
|
|
|
|
// load the value from the pointer.
|
|
|
|
if type_of::arg_is_indirect(ccx, &tys.fn_sig.inputs[i]) {
|
|
|
|
llargval = Load(bcx, llargval);
|
|
|
|
}
|
|
|
|
|
2013-05-19 00:07:44 -05:00
|
|
|
store_inbounds(bcx, llargval, llargbundle, [0u, i]);
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
2013-06-20 09:42:44 -05:00
|
|
|
|
|
|
|
for bcx.fcx.llretptr.iter().advance |&retptr| {
|
|
|
|
store_inbounds(bcx, retptr, llargbundle, [0u, n]);
|
|
|
|
}
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn build_ret(bcx: @mut Block,
|
2013-04-18 17:53:29 -05:00
|
|
|
shim_types: &ShimTypes,
|
|
|
|
llargbundle: ValueRef) {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::wrap::build_ret");
|
2013-04-18 17:53:29 -05:00
|
|
|
let arg_count = shim_types.fn_sig.inputs.len();
|
2013-06-20 09:42:44 -05:00
|
|
|
for bcx.fcx.llretptr.iter().advance |&retptr| {
|
|
|
|
let llretptr = load_inbounds(bcx, llargbundle, [0, arg_count]);
|
|
|
|
Store(bcx, Load(bcx, llretptr), retptr);
|
|
|
|
}
|
2012-02-13 16:59:05 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2012-02-13 17:28:00 -06:00
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
pub fn trans_intrinsic(ccx: @mut CrateContext,
|
2013-01-30 13:46:19 -06:00
|
|
|
decl: ValueRef,
|
2013-06-27 08:04:22 -05:00
|
|
|
item: &ast::foreign_item,
|
2013-04-17 11:15:37 -05:00
|
|
|
path: ast_map::path,
|
2013-02-18 14:36:30 -06:00
|
|
|
substs: @param_substs,
|
2013-07-19 06:51:37 -05:00
|
|
|
attributes: &[ast::Attribute],
|
2013-01-30 13:46:19 -06:00
|
|
|
ref_id: Option<ast::node_id>) {
|
2013-06-12 12:02:55 -05:00
|
|
|
debug!("trans_intrinsic(item.ident=%s)", ccx.sess.str_of(item.ident));
|
2012-08-28 17:54:45 -05:00
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn simple_llvm_intrinsic(bcx: @mut Block, name: &'static str, num_args: uint) {
|
2013-07-11 12:31:38 -05:00
|
|
|
assert!(num_args <= 4);
|
|
|
|
let mut args = [0 as ValueRef, ..4];
|
|
|
|
let first_real_arg = bcx.fcx.arg_pos(0u);
|
|
|
|
for uint::range(0, num_args) |i| {
|
|
|
|
args[i] = get_param(bcx.fcx.llfn, first_real_arg + i);
|
|
|
|
}
|
|
|
|
let llfn = bcx.ccx().intrinsics.get_copy(&name);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, Call(bcx, llfn, args.slice(0, num_args)));
|
2013-07-11 12:31:38 -05:00
|
|
|
}
|
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn memcpy_intrinsic(bcx: @mut Block, name: &'static str, tp_ty: ty::t, sizebits: u8) {
|
2013-07-11 12:31:38 -05:00
|
|
|
let ccx = bcx.ccx();
|
|
|
|
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
|
|
|
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
|
|
|
|
let size = match sizebits {
|
|
|
|
32 => C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32),
|
|
|
|
64 => C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64),
|
|
|
|
_ => ccx.sess.fatal("Invalid value for sizebits")
|
|
|
|
};
|
|
|
|
|
|
|
|
let decl = bcx.fcx.llfn;
|
|
|
|
let first_real_arg = bcx.fcx.arg_pos(0u);
|
|
|
|
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), Type::i8p());
|
|
|
|
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), Type::i8p());
|
|
|
|
let count = get_param(decl, first_real_arg + 2);
|
|
|
|
let volatile = C_i1(false);
|
|
|
|
let llfn = bcx.ccx().intrinsics.get_copy(&name);
|
|
|
|
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
RetVoid(bcx);
|
2013-07-11 12:31:38 -05:00
|
|
|
}
|
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn memset_intrinsic(bcx: @mut Block, name: &'static str, tp_ty: ty::t, sizebits: u8) {
|
2013-07-11 12:31:38 -05:00
|
|
|
let ccx = bcx.ccx();
|
|
|
|
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
|
|
|
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
|
|
|
|
let size = match sizebits {
|
|
|
|
32 => C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32),
|
|
|
|
64 => C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64),
|
|
|
|
_ => ccx.sess.fatal("Invalid value for sizebits")
|
|
|
|
};
|
|
|
|
|
|
|
|
let decl = bcx.fcx.llfn;
|
|
|
|
let first_real_arg = bcx.fcx.arg_pos(0u);
|
|
|
|
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), Type::i8p());
|
|
|
|
let val = get_param(decl, first_real_arg + 1);
|
|
|
|
let count = get_param(decl, first_real_arg + 2);
|
|
|
|
let volatile = C_i1(false);
|
|
|
|
let llfn = bcx.ccx().intrinsics.get_copy(&name);
|
|
|
|
Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
RetVoid(bcx);
|
2013-07-11 12:31:38 -05:00
|
|
|
}
|
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn count_zeros_intrinsic(bcx: @mut Block, name: &'static str) {
|
2013-07-11 12:31:38 -05:00
|
|
|
let x = get_param(bcx.fcx.llfn, bcx.fcx.arg_pos(0u));
|
|
|
|
let y = C_i1(false);
|
|
|
|
let llfn = bcx.ccx().intrinsics.get_copy(&name);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, Call(bcx, llfn, [x, y]));
|
2013-07-11 12:31:38 -05:00
|
|
|
}
|
|
|
|
|
2013-04-18 17:53:29 -05:00
|
|
|
let output_type = ty::ty_fn_ret(ty::node_id_to_type(ccx.tcx, item.id));
|
|
|
|
|
2013-05-27 20:33:57 -05:00
|
|
|
let fcx = new_fn_ctxt_w_id(ccx,
|
|
|
|
path,
|
|
|
|
decl,
|
|
|
|
item.id,
|
|
|
|
output_type,
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
true,
|
2013-05-27 20:33:57 -05:00
|
|
|
Some(substs),
|
2013-07-21 09:19:34 -05:00
|
|
|
None,
|
2013-05-27 20:33:57 -05:00
|
|
|
Some(item.span));
|
2013-04-18 17:53:29 -05:00
|
|
|
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
set_always_inline(fcx.llfn);
|
|
|
|
|
2013-04-20 06:58:56 -05:00
|
|
|
// Set the fixed stack segment flag if necessary.
|
2013-07-19 06:51:37 -05:00
|
|
|
if attr::contains_name(attributes, "fixed_stack_segment") {
|
2013-04-20 06:58:56 -05:00
|
|
|
set_fixed_stack_segment(fcx.llfn);
|
|
|
|
}
|
|
|
|
|
2013-07-21 09:19:34 -05:00
|
|
|
let mut bcx = fcx.entry_bcx.get();
|
2013-05-27 20:33:57 -05:00
|
|
|
let first_real_arg = fcx.arg_pos(0u);
|
2013-06-15 18:58:51 -05:00
|
|
|
|
|
|
|
let nm = ccx.sess.str_of(item.ident);
|
|
|
|
let name = nm.as_slice();
|
|
|
|
|
|
|
|
// This requires that atomic intrinsics follow a specific naming pattern:
|
|
|
|
// "atomic_<operation>[_<ordering>], and no ordering means SeqCst
|
|
|
|
if name.starts_with("atomic_") {
|
|
|
|
let split : ~[&str] = name.split_iter('_').collect();
|
|
|
|
assert!(split.len() >= 2, "Atomic intrinsic not correct format");
|
|
|
|
let order = if split.len() == 2 {
|
|
|
|
lib::llvm::SequentiallyConsistent
|
|
|
|
} else {
|
|
|
|
match split[2] {
|
|
|
|
"relaxed" => lib::llvm::Monotonic,
|
|
|
|
"acq" => lib::llvm::Acquire,
|
|
|
|
"rel" => lib::llvm::Release,
|
|
|
|
"acqrel" => lib::llvm::AcquireRelease,
|
|
|
|
_ => ccx.sess.fatal("Unknown ordering in atomic intrinsic")
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
match split[1] {
|
|
|
|
"cxchg" => {
|
|
|
|
let old = AtomicCmpXchg(bcx, get_param(decl, first_real_arg),
|
|
|
|
get_param(decl, first_real_arg + 1u),
|
|
|
|
get_param(decl, first_real_arg + 2u),
|
|
|
|
order);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, old);
|
2013-06-15 18:58:51 -05:00
|
|
|
}
|
|
|
|
"load" => {
|
|
|
|
let old = AtomicLoad(bcx, get_param(decl, first_real_arg),
|
|
|
|
order);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, old);
|
2013-06-15 18:58:51 -05:00
|
|
|
}
|
|
|
|
"store" => {
|
|
|
|
AtomicStore(bcx, get_param(decl, first_real_arg + 1u),
|
|
|
|
get_param(decl, first_real_arg),
|
|
|
|
order);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
RetVoid(bcx);
|
2013-06-15 18:58:51 -05:00
|
|
|
}
|
2013-07-28 02:48:16 -05:00
|
|
|
"fence" => {
|
|
|
|
AtomicFence(bcx, order);
|
|
|
|
RetVoid(bcx);
|
|
|
|
}
|
2013-06-15 18:58:51 -05:00
|
|
|
op => {
|
|
|
|
// These are all AtomicRMW ops
|
|
|
|
let atom_op = match op {
|
|
|
|
"xchg" => lib::llvm::Xchg,
|
|
|
|
"xadd" => lib::llvm::Add,
|
|
|
|
"xsub" => lib::llvm::Sub,
|
|
|
|
"and" => lib::llvm::And,
|
|
|
|
"nand" => lib::llvm::Nand,
|
|
|
|
"or" => lib::llvm::Or,
|
|
|
|
"xor" => lib::llvm::Xor,
|
|
|
|
"max" => lib::llvm::Max,
|
|
|
|
"min" => lib::llvm::Min,
|
|
|
|
"umax" => lib::llvm::UMax,
|
|
|
|
"umin" => lib::llvm::UMin,
|
|
|
|
_ => ccx.sess.fatal("Unknown atomic operation")
|
|
|
|
};
|
|
|
|
|
|
|
|
let old = AtomicRMW(bcx, atom_op, get_param(decl, first_real_arg),
|
2012-10-21 21:23:50 -05:00
|
|
|
get_param(decl, first_real_arg + 1u),
|
2013-06-15 18:58:51 -05:00
|
|
|
order);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, old);
|
2013-06-15 18:58:51 -05:00
|
|
|
}
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-15 18:58:51 -05:00
|
|
|
|
2013-07-21 09:19:34 -05:00
|
|
|
fcx.cleanup();
|
2013-06-15 18:58:51 -05:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
match name {
|
2013-06-12 12:02:55 -05:00
|
|
|
"size_of" => {
|
2012-08-28 17:54:45 -05:00
|
|
|
let tp_ty = substs.tys[0];
|
|
|
|
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, C_uint(ccx, machine::llsize_of_real(ccx, lltp_ty)));
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"move_val" => {
|
2013-05-29 14:49:23 -05:00
|
|
|
// Create a datum reflecting the value being moved.
|
|
|
|
// Use `appropriate_mode` so that the datum is by ref
|
|
|
|
// if the value is non-immediate. Note that, with
|
|
|
|
// intrinsics, there are no argument cleanups to
|
|
|
|
// concern ourselves with.
|
2012-08-28 17:54:45 -05:00
|
|
|
let tp_ty = substs.tys[0];
|
2013-06-30 23:02:14 -05:00
|
|
|
let mode = appropriate_mode(ccx.tcx, tp_ty);
|
2012-08-28 17:54:45 -05:00
|
|
|
let src = Datum {val: get_param(decl, first_real_arg + 1u),
|
2013-05-29 14:49:23 -05:00
|
|
|
ty: tp_ty, mode: mode};
|
2012-08-28 17:54:45 -05:00
|
|
|
bcx = src.move_to(bcx, DROP_EXISTING,
|
|
|
|
get_param(decl, first_real_arg));
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
RetVoid(bcx);
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"move_val_init" => {
|
2012-09-20 14:29:15 -05:00
|
|
|
// See comments for `"move_val"`.
|
2012-08-28 17:54:45 -05:00
|
|
|
let tp_ty = substs.tys[0];
|
2013-06-30 23:02:14 -05:00
|
|
|
let mode = appropriate_mode(ccx.tcx, tp_ty);
|
2012-08-28 17:54:45 -05:00
|
|
|
let src = Datum {val: get_param(decl, first_real_arg + 1u),
|
2013-05-29 14:49:23 -05:00
|
|
|
ty: tp_ty, mode: mode};
|
2012-08-28 17:54:45 -05:00
|
|
|
bcx = src.move_to(bcx, INIT, get_param(decl, first_real_arg));
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
RetVoid(bcx);
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"min_align_of" => {
|
2012-08-28 17:54:45 -05:00
|
|
|
let tp_ty = substs.tys[0];
|
|
|
|
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, C_uint(ccx, machine::llalign_of_min(ccx, lltp_ty)));
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"pref_align_of"=> {
|
2012-08-28 17:54:45 -05:00
|
|
|
let tp_ty = substs.tys[0];
|
|
|
|
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, C_uint(ccx, machine::llalign_of_pref(ccx, lltp_ty)));
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"get_tydesc" => {
|
2012-08-28 17:54:45 -05:00
|
|
|
let tp_ty = substs.tys[0];
|
|
|
|
let static_ti = get_tydesc(ccx, tp_ty);
|
|
|
|
glue::lazily_emit_all_tydesc_glue(ccx, static_ti);
|
|
|
|
|
2013-06-19 12:54:54 -05:00
|
|
|
// FIXME (#3730): ideally this shouldn't need a cast,
|
|
|
|
// but there's a circularity between translating rust types to llvm
|
|
|
|
// types and having a tydesc type available. So I can't directly access
|
|
|
|
// the llvm type of intrinsic::TyDesc struct.
|
|
|
|
let userland_tydesc_ty = type_of::type_of(ccx, output_type);
|
|
|
|
let td = PointerCast(bcx, static_ti.tydesc, userland_tydesc_ty);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, td);
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"init" => {
|
2012-08-28 17:54:45 -05:00
|
|
|
let tp_ty = substs.tys[0];
|
|
|
|
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
match bcx.fcx.llretptr {
|
|
|
|
Some(ptr) => { Store(bcx, C_null(lltp_ty), ptr); RetVoid(bcx); }
|
|
|
|
None if ty::type_is_nil(tp_ty) => RetVoid(bcx),
|
|
|
|
None => Ret(bcx, C_null(lltp_ty)),
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"uninit" => {
|
2013-05-09 05:23:38 -05:00
|
|
|
// Do nothing, this is effectively a no-op
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
let retty = substs.tys[0];
|
|
|
|
if ty::type_is_immediate(ccx.tcx, retty) && !ty::type_is_nil(retty) {
|
|
|
|
unsafe {
|
|
|
|
Ret(bcx, lib::llvm::llvm::LLVMGetUndef(type_of(ccx, retty).to_ref()));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
RetVoid(bcx)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
"forget" => {
|
|
|
|
RetVoid(bcx);
|
2013-05-09 05:23:38 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"transmute" => {
|
2013-04-22 18:22:36 -05:00
|
|
|
let (in_type, out_type) = (substs.tys[0], substs.tys[1]);
|
|
|
|
let llintype = type_of::type_of(ccx, in_type);
|
|
|
|
let llouttype = type_of::type_of(ccx, out_type);
|
|
|
|
|
|
|
|
let in_type_size = machine::llbitsize_of_real(ccx, llintype);
|
|
|
|
let out_type_size = machine::llbitsize_of_real(ccx, llouttype);
|
|
|
|
if in_type_size != out_type_size {
|
2013-05-05 11:17:59 -05:00
|
|
|
let sp = match ccx.tcx.items.get_copy(&ref_id.get()) {
|
2013-04-22 18:22:36 -05:00
|
|
|
ast_map::node_expr(e) => e.span,
|
2013-05-05 17:18:51 -05:00
|
|
|
_ => fail!("transmute has non-expr arg"),
|
2013-04-22 18:22:36 -05:00
|
|
|
};
|
|
|
|
let pluralize = |n| if 1u == n { "" } else { "s" };
|
2013-04-26 16:04:39 -05:00
|
|
|
ccx.sess.span_fatal(sp,
|
|
|
|
fmt!("transmute called on types with \
|
|
|
|
different sizes: %s (%u bit%s) to \
|
|
|
|
%s (%u bit%s)",
|
|
|
|
ty_to_str(ccx.tcx, in_type),
|
|
|
|
in_type_size,
|
|
|
|
pluralize(in_type_size),
|
|
|
|
ty_to_str(ccx.tcx, out_type),
|
|
|
|
out_type_size,
|
|
|
|
pluralize(out_type_size)));
|
2013-04-22 18:22:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if !ty::type_is_nil(out_type) {
|
|
|
|
let llsrcval = get_param(decl, first_real_arg);
|
2013-07-11 11:44:40 -05:00
|
|
|
if ty::type_is_immediate(ccx.tcx, in_type) {
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
match fcx.llretptr {
|
|
|
|
Some(llretptr) => {
|
|
|
|
Store(bcx, llsrcval, PointerCast(bcx, llretptr, llintype.ptr_to()));
|
|
|
|
RetVoid(bcx);
|
|
|
|
}
|
|
|
|
None => match (llintype.kind(), llouttype.kind()) {
|
|
|
|
(Pointer, other) | (other, Pointer) if other != Pointer => {
|
|
|
|
let tmp = Alloca(bcx, llouttype, "");
|
|
|
|
Store(bcx, llsrcval, PointerCast(bcx, tmp, llintype.ptr_to()));
|
|
|
|
Ret(bcx, Load(bcx, tmp));
|
|
|
|
}
|
|
|
|
_ => Ret(bcx, BitCast(bcx, llsrcval, llouttype))
|
|
|
|
}
|
|
|
|
}
|
2013-07-23 02:37:00 -05:00
|
|
|
} else if ty::type_is_immediate(ccx.tcx, out_type) {
|
|
|
|
let llsrcptr = PointerCast(bcx, llsrcval, llouttype.ptr_to());
|
|
|
|
Ret(bcx, Load(bcx, llsrcptr));
|
2013-04-22 18:22:36 -05:00
|
|
|
} else {
|
2013-07-11 11:44:40 -05:00
|
|
|
// NB: Do not use a Load and Store here. This causes massive
|
|
|
|
// code bloat when `transmute` is used on large structural
|
|
|
|
// types.
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
let lldestptr = fcx.llretptr.get();
|
2013-07-11 11:44:40 -05:00
|
|
|
let lldestptr = PointerCast(bcx, lldestptr, Type::i8p());
|
|
|
|
let llsrcptr = PointerCast(bcx, llsrcval, Type::i8p());
|
|
|
|
|
|
|
|
let llsize = llsize_of(ccx, llintype);
|
|
|
|
call_memcpy(bcx, lldestptr, llsrcptr, llsize, 1);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
RetVoid(bcx);
|
2013-04-22 18:22:36 -05:00
|
|
|
};
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
} else {
|
|
|
|
RetVoid(bcx);
|
2013-04-22 18:22:36 -05:00
|
|
|
}
|
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"needs_drop" => {
|
2012-08-28 17:54:45 -05:00
|
|
|
let tp_ty = substs.tys[0];
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, C_bool(ty::type_needs_drop(ccx.tcx, tp_ty)));
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-29 21:36:27 -05:00
|
|
|
"contains_managed" => {
|
|
|
|
let tp_ty = substs.tys[0];
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, C_bool(ty::type_contents(ccx.tcx, tp_ty).contains_managed()));
|
2013-06-29 21:36:27 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"visit_tydesc" => {
|
2012-08-28 17:54:45 -05:00
|
|
|
let td = get_param(decl, first_real_arg);
|
|
|
|
let visitor = get_param(decl, first_real_arg + 1u);
|
2013-04-22 23:19:58 -05:00
|
|
|
//let llvisitorptr = alloca(bcx, val_ty(visitor));
|
|
|
|
//Store(bcx, visitor, llvisitorptr);
|
2013-06-15 22:45:48 -05:00
|
|
|
let td = PointerCast(bcx, td, ccx.tydesc_type.ptr_to());
|
|
|
|
glue::call_tydesc_glue_full(bcx, visitor, td,
|
|
|
|
abi::tydesc_field_visit_glue, None);
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
RetVoid(bcx);
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"frame_address" => {
|
2013-06-13 02:19:50 -05:00
|
|
|
let frameaddress = ccx.intrinsics.get_copy(& &"llvm.frameaddress");
|
2013-05-19 00:07:44 -05:00
|
|
|
let frameaddress_val = Call(bcx, frameaddress, [C_i32(0i32)]);
|
2012-09-07 09:37:19 -05:00
|
|
|
let star_u8 = ty::mk_imm_ptr(
|
|
|
|
bcx.tcx(),
|
2013-04-22 22:19:05 -05:00
|
|
|
ty::mk_mach_uint(ast::ty_u8));
|
2013-01-31 19:12:29 -06:00
|
|
|
let fty = ty::mk_closure(bcx.tcx(), ty::ClosureTy {
|
|
|
|
purity: ast::impure_fn,
|
|
|
|
sigil: ast::BorrowedSigil,
|
|
|
|
onceness: ast::Many,
|
|
|
|
region: ty::re_bound(ty::br_anon(0)),
|
2013-05-10 14:57:27 -05:00
|
|
|
bounds: ty::EmptyBuiltinBounds(),
|
2013-04-24 03:29:46 -05:00
|
|
|
sig: FnSig {
|
|
|
|
bound_lifetime_names: opt_vec::Empty,
|
2013-04-26 21:13:38 -05:00
|
|
|
inputs: ~[ star_u8 ],
|
2013-04-24 03:29:46 -05:00
|
|
|
output: ty::mk_nil()
|
|
|
|
}
|
2012-08-28 17:54:45 -05:00
|
|
|
});
|
|
|
|
let datum = Datum {val: get_param(decl, first_real_arg),
|
2013-05-29 14:49:23 -05:00
|
|
|
mode: ByRef(ZeroMem), ty: fty};
|
2013-01-10 12:59:58 -06:00
|
|
|
let arg_vals = ~[frameaddress_val];
|
2012-08-28 17:54:45 -05:00
|
|
|
bcx = trans_call_inner(
|
2013-04-22 22:19:05 -05:00
|
|
|
bcx, None, fty, ty::mk_nil(),
|
2012-08-28 17:54:45 -05:00
|
|
|
|bcx| Callee {bcx: bcx, data: Closure(datum)},
|
2013-07-08 01:12:01 -05:00
|
|
|
ArgVals(arg_vals), Some(Ignore), DontAutorefArg).bcx;
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
RetVoid(bcx);
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-06-12 12:02:55 -05:00
|
|
|
"morestack_addr" => {
|
2012-08-28 17:54:45 -05:00
|
|
|
// XXX This is a hack to grab the address of this particular
|
|
|
|
// native function. There should be a general in-language
|
|
|
|
// way to do this
|
2013-05-19 00:07:44 -05:00
|
|
|
let llfty = type_of_fn(bcx.ccx(), [], ty::mk_nil());
|
2012-08-28 17:54:45 -05:00
|
|
|
let morestack_addr = decl_cdecl_fn(
|
2013-05-19 00:07:44 -05:00
|
|
|
bcx.ccx().llmod, "__morestack", llfty);
|
2013-06-15 22:45:48 -05:00
|
|
|
let morestack_addr = PointerCast(bcx, morestack_addr, Type::nil().ptr_to());
|
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the
usual setup, which means an alloca, and therefore also a jump, for
those intrinsics that return an immediate value. This is especially bad
for unoptimized builds because it means that an intrinsic like
"contains_managed" that should be just "ret 0" or "ret 1" actually ends
up allocating stack space, doing a jump and a store/load sequence
before it finally returns the value.
To fix that, we need a way to stop the generic function declaration
mechanism from allocating stack space for the return value. This
implicitly also kills the jump, because the block for static allocas
isn't required anymore.
Additionally, trans_intrinsic needs to build the return itself instead
of calling finish_fn, because the latter relies on the availability of
the return value pointer.
With these changes, we get the bare minimum code required for our
intrinsics, which makes them small enough that inlining them makes the
resulting code smaller, so we can mark them as "always inline" to get
better performing unoptimized builds.
Optimized builds also benefit slightly from this change as there's less
code for LLVM to translate and the smaller intrinsics help it to make
better inlining decisions for a few code paths.
Building stage2 librustc gets ~1% faster for the optimized version and 5% for
the unoptimized version.
2013-07-16 12:25:06 -05:00
|
|
|
Ret(bcx, morestack_addr);
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2013-07-11 12:31:38 -05:00
|
|
|
"memcpy32" => memcpy_intrinsic(bcx, "llvm.memcpy.p0i8.p0i8.i32", substs.tys[0], 32),
|
|
|
|
"memcpy64" => memcpy_intrinsic(bcx, "llvm.memcpy.p0i8.p0i8.i64", substs.tys[0], 64),
|
|
|
|
"memmove32" => memcpy_intrinsic(bcx, "llvm.memmove.p0i8.p0i8.i32", substs.tys[0], 32),
|
|
|
|
"memmove64" => memcpy_intrinsic(bcx, "llvm.memmove.p0i8.p0i8.i64", substs.tys[0], 64),
|
|
|
|
"memset32" => memset_intrinsic(bcx, "llvm.memset.p0i8.i32", substs.tys[0], 32),
|
|
|
|
"memset64" => memset_intrinsic(bcx, "llvm.memset.p0i8.i64", substs.tys[0], 64),
|
|
|
|
"sqrtf32" => simple_llvm_intrinsic(bcx, "llvm.sqrt.f32", 1),
|
|
|
|
"sqrtf64" => simple_llvm_intrinsic(bcx, "llvm.sqrt.f64", 1),
|
|
|
|
"powif32" => simple_llvm_intrinsic(bcx, "llvm.powi.f32", 2),
|
|
|
|
"powif64" => simple_llvm_intrinsic(bcx, "llvm.powi.f64", 2),
|
|
|
|
"sinf32" => simple_llvm_intrinsic(bcx, "llvm.sin.f32", 1),
|
|
|
|
"sinf64" => simple_llvm_intrinsic(bcx, "llvm.sin.f64", 1),
|
|
|
|
"cosf32" => simple_llvm_intrinsic(bcx, "llvm.cos.f32", 1),
|
|
|
|
"cosf64" => simple_llvm_intrinsic(bcx, "llvm.cos.f64", 1),
|
|
|
|
"powf32" => simple_llvm_intrinsic(bcx, "llvm.pow.f32", 2),
|
|
|
|
"powf64" => simple_llvm_intrinsic(bcx, "llvm.pow.f64", 2),
|
|
|
|
"expf32" => simple_llvm_intrinsic(bcx, "llvm.exp.f32", 1),
|
|
|
|
"expf64" => simple_llvm_intrinsic(bcx, "llvm.exp.f64", 1),
|
|
|
|
"exp2f32" => simple_llvm_intrinsic(bcx, "llvm.exp2.f32", 1),
|
|
|
|
"exp2f64" => simple_llvm_intrinsic(bcx, "llvm.exp2.f64", 1),
|
|
|
|
"logf32" => simple_llvm_intrinsic(bcx, "llvm.log.f32", 1),
|
|
|
|
"logf64" => simple_llvm_intrinsic(bcx, "llvm.log.f64", 1),
|
|
|
|
"log10f32" => simple_llvm_intrinsic(bcx, "llvm.log10.f32", 1),
|
|
|
|
"log10f64" => simple_llvm_intrinsic(bcx, "llvm.log10.f64", 1),
|
|
|
|
"log2f32" => simple_llvm_intrinsic(bcx, "llvm.log2.f32", 1),
|
|
|
|
"log2f64" => simple_llvm_intrinsic(bcx, "llvm.log2.f64", 1),
|
|
|
|
"fmaf32" => simple_llvm_intrinsic(bcx, "llvm.fma.f32", 3),
|
|
|
|
"fmaf64" => simple_llvm_intrinsic(bcx, "llvm.fma.f64", 3),
|
|
|
|
"fabsf32" => simple_llvm_intrinsic(bcx, "llvm.fabs.f32", 1),
|
|
|
|
"fabsf64" => simple_llvm_intrinsic(bcx, "llvm.fabs.f64", 1),
|
|
|
|
"floorf32" => simple_llvm_intrinsic(bcx, "llvm.floor.f32", 1),
|
|
|
|
"floorf64" => simple_llvm_intrinsic(bcx, "llvm.floor.f64", 1),
|
|
|
|
"ceilf32" => simple_llvm_intrinsic(bcx, "llvm.ceil.f32", 1),
|
|
|
|
"ceilf64" => simple_llvm_intrinsic(bcx, "llvm.ceil.f64", 1),
|
|
|
|
"truncf32" => simple_llvm_intrinsic(bcx, "llvm.trunc.f32", 1),
|
|
|
|
"truncf64" => simple_llvm_intrinsic(bcx, "llvm.trunc.f64", 1),
|
|
|
|
"ctpop8" => simple_llvm_intrinsic(bcx, "llvm.ctpop.i8", 1),
|
|
|
|
"ctpop16" => simple_llvm_intrinsic(bcx, "llvm.ctpop.i16", 1),
|
|
|
|
"ctpop32" => simple_llvm_intrinsic(bcx, "llvm.ctpop.i32", 1),
|
|
|
|
"ctpop64" => simple_llvm_intrinsic(bcx, "llvm.ctpop.i64", 1),
|
|
|
|
"ctlz8" => count_zeros_intrinsic(bcx, "llvm.ctlz.i8"),
|
|
|
|
"ctlz16" => count_zeros_intrinsic(bcx, "llvm.ctlz.i16"),
|
|
|
|
"ctlz32" => count_zeros_intrinsic(bcx, "llvm.ctlz.i32"),
|
|
|
|
"ctlz64" => count_zeros_intrinsic(bcx, "llvm.ctlz.i64"),
|
|
|
|
"cttz8" => count_zeros_intrinsic(bcx, "llvm.cttz.i8"),
|
|
|
|
"cttz16" => count_zeros_intrinsic(bcx, "llvm.cttz.i16"),
|
|
|
|
"cttz32" => count_zeros_intrinsic(bcx, "llvm.cttz.i32"),
|
|
|
|
"cttz64" => count_zeros_intrinsic(bcx, "llvm.cttz.i64"),
|
|
|
|
"bswap16" => simple_llvm_intrinsic(bcx, "llvm.bswap.i16", 1),
|
|
|
|
"bswap32" => simple_llvm_intrinsic(bcx, "llvm.bswap.i32", 1),
|
|
|
|
"bswap64" => simple_llvm_intrinsic(bcx, "llvm.bswap.i64", 1),
|
2012-08-28 17:54:45 -05:00
|
|
|
_ => {
|
|
|
|
// Could we make this an enum rather than a string? does it get
|
|
|
|
// checked earlier?
|
2013-05-02 11:28:53 -05:00
|
|
|
ccx.sess.span_bug(item.span, "unknown intrinsic");
|
2012-08-28 17:54:45 -05:00
|
|
|
}
|
2012-03-21 09:42:20 -05:00
|
|
|
}
|
2013-07-21 09:19:34 -05:00
|
|
|
fcx.cleanup();
|
2012-03-21 09:42:20 -05:00
|
|
|
}
|
|
|
|
|
2013-03-08 19:44:37 -06:00
|
|
|
/**
|
|
|
|
* Translates a "crust" fn, meaning a Rust fn that can be called
|
|
|
|
* from C code. In this case, we have to perform some adaptation
|
|
|
|
* to (1) switch back to the Rust stack and (2) adapt the C calling
|
|
|
|
* convention to our own.
|
|
|
|
*
|
|
|
|
* Example: Given a crust fn F(x: X, y: Y) -> Z, we generate a
|
|
|
|
* Rust function R as normal:
|
|
|
|
*
|
|
|
|
* void R(Z* dest, void *env, X x, Y y) {...}
|
|
|
|
*
|
|
|
|
* and then we generate a wrapper function W that looks like:
|
|
|
|
*
|
|
|
|
* Z W(X x, Y y) {
|
|
|
|
* struct { X x; Y y; Z *z; } args = { x, y, z };
|
|
|
|
* call_on_c_stack_shim(S, &args);
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* Note that the wrapper follows the foreign (typically "C") ABI.
|
|
|
|
* The wrapper is the actual "value" of the foreign fn. Finally,
|
|
|
|
* we generate a shim function S that looks like:
|
|
|
|
*
|
|
|
|
* void S(struct { X x; Y y; Z *z; } *args) {
|
|
|
|
* R(args->z, NULL, args->x, args->y);
|
|
|
|
* }
|
|
|
|
*/
|
2013-06-13 02:19:50 -05:00
|
|
|
pub fn trans_foreign_fn(ccx: @mut CrateContext,
|
2013-04-17 11:15:37 -05:00
|
|
|
path: ast_map::path,
|
2013-01-31 19:12:29 -06:00
|
|
|
decl: &ast::fn_decl,
|
2013-07-19 00:38:55 -05:00
|
|
|
body: &ast::Block,
|
2013-01-30 13:46:19 -06:00
|
|
|
llwrapfn: ValueRef,
|
|
|
|
id: ast::node_id) {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::build_foreign_fn");
|
2012-03-22 15:44:20 -05:00
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_rust_fn(ccx: @mut CrateContext,
|
2013-07-02 14:47:32 -05:00
|
|
|
path: &ast_map::path,
|
2013-04-18 17:53:29 -05:00
|
|
|
decl: &ast::fn_decl,
|
2013-07-19 00:38:55 -05:00
|
|
|
body: &ast::Block,
|
2013-04-18 17:53:29 -05:00
|
|
|
id: ast::node_id)
|
|
|
|
-> ValueRef {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::foreign::build_rust_fn");
|
2012-02-13 18:06:56 -06:00
|
|
|
let t = ty::node_id_to_type(ccx.tcx, id);
|
2013-01-07 16:16:52 -06:00
|
|
|
// XXX: Bad copy.
|
2012-09-18 13:46:39 -05:00
|
|
|
let ps = link::mangle_internal_name_by_path(
|
2013-07-02 14:47:32 -05:00
|
|
|
ccx,
|
|
|
|
vec::append_one((*path).clone(),
|
|
|
|
ast_map::path_name(
|
|
|
|
special_idents::clownshoe_abi)));
|
2012-03-09 03:47:40 -06:00
|
|
|
let llty = type_of_fn_from_ty(ccx, t);
|
2012-02-13 18:06:56 -06:00
|
|
|
let llfndecl = decl_internal_cdecl_fn(ccx.llmod, ps, llty);
|
2013-03-29 18:55:04 -05:00
|
|
|
trans_fn(ccx,
|
2013-07-02 14:47:32 -05:00
|
|
|
(*path).clone(),
|
2013-03-29 18:55:04 -05:00
|
|
|
decl,
|
|
|
|
body,
|
|
|
|
llfndecl,
|
|
|
|
no_self,
|
|
|
|
None,
|
|
|
|
id,
|
|
|
|
[]);
|
2012-08-01 19:30:05 -05:00
|
|
|
return llfndecl;
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_shim_fn(ccx: @mut CrateContext,
|
2013-04-17 11:15:37 -05:00
|
|
|
path: ast_map::path,
|
2013-04-18 17:53:29 -05:00
|
|
|
llrustfn: ValueRef,
|
|
|
|
tys: &ShimTypes)
|
|
|
|
-> ValueRef {
|
2013-03-08 19:44:37 -06:00
|
|
|
/*!
|
|
|
|
*
|
|
|
|
* Generate the shim S:
|
|
|
|
*
|
|
|
|
* void S(struct { X x; Y y; Z *z; } *args) {
|
|
|
|
* R(args->z, NULL, &args->x, args->y);
|
|
|
|
* }
|
|
|
|
*
|
|
|
|
* One complication is that we must adapt to the Rust
|
|
|
|
* calling convention, which introduces indirection
|
|
|
|
* in some cases. To demonstrate this, I wrote one of the
|
|
|
|
* entries above as `&args->x`, because presumably `X` is
|
|
|
|
* one of those types that is passed by pointer in Rust.
|
|
|
|
*/
|
|
|
|
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::foreign::build_shim_fn");
|
2012-03-22 15:44:20 -05:00
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn build_args(bcx: @mut Block, tys: &ShimTypes, llargbundle: ValueRef)
|
2013-04-18 17:53:29 -05:00
|
|
|
-> ~[ValueRef] {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::extern::shim::build_args");
|
2013-03-08 19:44:37 -06:00
|
|
|
let ccx = bcx.ccx();
|
2012-06-29 18:26:56 -05:00
|
|
|
let mut llargvals = ~[];
|
2012-03-15 08:47:03 -05:00
|
|
|
let mut i = 0u;
|
2013-03-08 19:44:37 -06:00
|
|
|
let n = tys.fn_sig.inputs.len();
|
2013-04-18 17:53:29 -05:00
|
|
|
|
2013-06-30 23:02:14 -05:00
|
|
|
if !ty::type_is_immediate(bcx.tcx(), tys.fn_sig.output) {
|
2013-05-19 00:07:44 -05:00
|
|
|
let llretptr = load_inbounds(bcx, llargbundle, [0u, n]);
|
2013-04-18 17:53:29 -05:00
|
|
|
llargvals.push(llretptr);
|
|
|
|
}
|
|
|
|
|
2013-06-15 22:45:48 -05:00
|
|
|
let llenvptr = C_null(Type::opaque_box(bcx.ccx()).ptr_to());
|
2012-09-26 19:33:34 -05:00
|
|
|
llargvals.push(llenvptr);
|
2012-02-13 18:06:56 -06:00
|
|
|
while i < n {
|
2013-03-08 19:44:37 -06:00
|
|
|
// Get a pointer to the argument:
|
|
|
|
let mut llargval = GEPi(bcx, llargbundle, [0u, i]);
|
|
|
|
|
|
|
|
if !type_of::arg_is_indirect(ccx, &tys.fn_sig.inputs[i]) {
|
|
|
|
// If Rust would pass this by value, load the value.
|
|
|
|
llargval = Load(bcx, llargval);
|
|
|
|
}
|
|
|
|
|
2012-09-26 19:33:34 -05:00
|
|
|
llargvals.push(llargval);
|
2012-02-13 18:06:56 -06:00
|
|
|
i += 1u;
|
|
|
|
}
|
2012-08-01 19:30:05 -05:00
|
|
|
return llargvals;
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn build_ret(bcx: @mut Block,
|
2013-04-18 17:53:29 -05:00
|
|
|
shim_types: &ShimTypes,
|
|
|
|
llargbundle: ValueRef,
|
|
|
|
llretval: ValueRef) {
|
2013-06-30 23:02:14 -05:00
|
|
|
if bcx.fcx.llretptr.is_some() &&
|
|
|
|
ty::type_is_immediate(bcx.tcx(), shim_types.fn_sig.output) {
|
2013-04-18 17:53:29 -05:00
|
|
|
// Write the value into the argument bundle.
|
|
|
|
let arg_count = shim_types.fn_sig.inputs.len();
|
|
|
|
let llretptr = load_inbounds(bcx,
|
|
|
|
llargbundle,
|
2013-05-19 00:07:44 -05:00
|
|
|
[0, arg_count]);
|
2013-04-18 17:53:29 -05:00
|
|
|
Store(bcx, llretval, llretptr);
|
|
|
|
} else {
|
|
|
|
// NB: The return pointer in the Rust ABI function is wired
|
|
|
|
// directly into the return slot in the shim struct.
|
|
|
|
}
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2012-09-18 13:46:39 -05:00
|
|
|
let shim_name = link::mangle_internal_name_by_path(
|
2013-04-18 17:53:29 -05:00
|
|
|
ccx,
|
|
|
|
vec::append_one(path, ast_map::path_name(
|
2012-12-23 16:41:37 -06:00
|
|
|
special_idents::clownshoe_stack_shim
|
2012-07-18 18:18:02 -05:00
|
|
|
)));
|
2013-04-18 17:53:29 -05:00
|
|
|
build_shim_fn_(ccx,
|
|
|
|
shim_name,
|
|
|
|
llrustfn,
|
|
|
|
tys,
|
|
|
|
lib::llvm::CCallConv,
|
|
|
|
build_args,
|
|
|
|
build_ret)
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
fn build_wrap_fn(ccx: @mut CrateContext,
|
2013-04-18 17:53:29 -05:00
|
|
|
llshimfn: ValueRef,
|
|
|
|
llwrapfn: ValueRef,
|
|
|
|
tys: &ShimTypes) {
|
2013-03-08 19:44:37 -06:00
|
|
|
/*!
|
|
|
|
*
|
|
|
|
* Generate the wrapper W:
|
|
|
|
*
|
|
|
|
* Z W(X x, Y y) {
|
|
|
|
* struct { X x; Y y; Z *z; } args = { x, y, z };
|
|
|
|
* call_on_c_stack_shim(S, &args);
|
|
|
|
* }
|
|
|
|
*/
|
2012-02-13 18:06:56 -06:00
|
|
|
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::foreign::build_wrap_fn");
|
2012-03-22 15:44:20 -05:00
|
|
|
|
2013-04-18 17:53:29 -05:00
|
|
|
build_wrap_fn_(ccx,
|
|
|
|
tys,
|
|
|
|
llshimfn,
|
|
|
|
llwrapfn,
|
2013-03-08 19:44:37 -06:00
|
|
|
ccx.upcalls.call_shim_on_rust_stack,
|
2013-04-19 13:50:03 -05:00
|
|
|
true,
|
2013-04-18 17:53:29 -05:00
|
|
|
build_args,
|
|
|
|
build_ret);
|
2013-03-08 19:44:37 -06:00
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn build_args(bcx: @mut Block,
|
2013-04-18 17:53:29 -05:00
|
|
|
tys: &ShimTypes,
|
|
|
|
llwrapfn: ValueRef,
|
2013-05-27 20:33:57 -05:00
|
|
|
llargbundle: ValueRef) {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::foreign::wrap::build_args");
|
2013-04-18 17:53:29 -05:00
|
|
|
tys.fn_ty.build_wrap_args(bcx,
|
|
|
|
tys.llsig.llret_ty,
|
|
|
|
llwrapfn,
|
2013-05-27 20:33:57 -05:00
|
|
|
llargbundle);
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2013-07-17 05:12:08 -05:00
|
|
|
fn build_ret(bcx: @mut Block, tys: &ShimTypes, llargbundle: ValueRef) {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::foreign::wrap::build_ret");
|
2013-04-18 17:53:29 -05:00
|
|
|
tys.fn_ty.build_wrap_ret(bcx, tys.llsig.llarg_tys, llargbundle);
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-08 19:44:37 -06:00
|
|
|
let tys = shim_types(ccx, id);
|
2012-02-13 18:06:56 -06:00
|
|
|
// The internal Rust ABI function - runs on the Rust stack
|
2013-01-07 16:16:52 -06:00
|
|
|
// XXX: Bad copy.
|
2013-07-02 14:47:32 -05:00
|
|
|
let llrustfn = build_rust_fn(ccx, &path, decl, body, id);
|
2012-02-13 18:06:56 -06:00
|
|
|
// The internal shim function - runs on the Rust stack
|
2013-03-08 19:44:37 -06:00
|
|
|
let llshimfn = build_shim_fn(ccx, path, llrustfn, &tys);
|
2012-07-03 18:11:00 -05:00
|
|
|
// The foreign C function - runs on the C stack
|
2013-03-08 19:44:37 -06:00
|
|
|
build_wrap_fn(ccx, llshimfn, llwrapfn, &tys)
|
2012-02-13 18:06:56 -06:00
|
|
|
}
|
|
|
|
|
2013-06-13 02:19:50 -05:00
|
|
|
pub fn register_foreign_fn(ccx: @mut CrateContext,
|
2013-01-30 13:46:19 -06:00
|
|
|
sp: span,
|
2013-07-27 00:50:20 -05:00
|
|
|
sym: ~str,
|
|
|
|
node_id: ast::node_id)
|
2013-04-18 17:53:29 -05:00
|
|
|
-> ValueRef {
|
2013-06-16 23:23:24 -05:00
|
|
|
let _icx = push_ctxt("foreign::register_foreign_fn");
|
2013-04-18 17:53:29 -05:00
|
|
|
|
2012-02-13 18:06:56 -06:00
|
|
|
let t = ty::node_id_to_type(ccx.tcx, node_id);
|
2013-07-27 00:50:20 -05:00
|
|
|
let sym = Cell::new(sym);
|
2013-04-18 17:53:29 -05:00
|
|
|
|
2013-03-08 19:44:37 -06:00
|
|
|
let tys = shim_types(ccx, node_id);
|
|
|
|
do tys.fn_ty.decl_fn |fnty| {
|
2013-04-18 17:53:29 -05:00
|
|
|
register_fn_fuller(ccx,
|
|
|
|
sp,
|
2013-07-27 00:50:20 -05:00
|
|
|
sym.take(),
|
2013-04-18 17:53:29 -05:00
|
|
|
node_id,
|
|
|
|
t,
|
|
|
|
lib::llvm::CCallConv,
|
|
|
|
fnty)
|
2012-03-20 13:44:28 -05:00
|
|
|
}
|
2012-03-07 18:48:57 -06:00
|
|
|
}
|