From a302610016bd8299aff79c38f5a6b95ec1c112ba Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:22:23 +0000 Subject: [PATCH] Merge commit '93a5433f17ab5ed48cc88f1e69b0713b16183373' into sync_cg_clif-2023-10-24 --- .vscode/settings.json | 2 +- Cargo.lock | 52 +++---- Cargo.toml | 12 +- Readme.md | 6 +- example/mini_core_hello_world.rs | 11 ++ ...h-gets-miscompiled-with-llvm-sysroot.patch | 25 ---- src/abi/mod.rs | 35 ++--- src/abi/pass_mode.rs | 11 +- src/cast.rs | 6 +- src/common.rs | 19 +++ src/driver/aot.rs | 23 ++- src/inline_asm.rs | 22 +-- src/intrinsics/llvm_x86.rs | 137 ++++++++++++++++++ src/value_and_place.rs | 37 ++--- 14 files changed, 260 insertions(+), 138 deletions(-) delete mode 100644 patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch diff --git a/.vscode/settings.json b/.vscode/settings.json index 60cb51d5663..834a1362caf 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -33,7 +33,7 @@ ] }, { - "sysroot_src": "./download/sysroot/sysroot_src/library", + "sysroot_src": "./build/stdlib/library", "crates": [ { "root_module": "./example/std_example.rs", diff --git a/Cargo.lock b/Cargo.lock index 8079913cb0f..c716d501173 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,18 +45,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cranelift-bforest" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5e1df0da8488dd03b34afc134ba84b754d61862cc465932a9e5d07952f661e" +checksum = "c1512c3bb6b13018e7109fc3ac964bc87b329eaf3a77825d337558d0c7f6f1be" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-codegen" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a17ca4e699a0aaf49a0c88f6311a864f321048aa63f6b787cab20eb5f93f10" +checksum = "16cb8fb9220a6ea7a226705a273ab905309ee546267bdf34948d57932d7f0396" dependencies = [ "bumpalo", "cranelift-bforest", @@ -75,39 +75,39 @@ dependencies = [ [[package]] name = "cranelift-codegen-meta" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022f2793cdade1d37a1f755ac42938a3f832f533eac6cafc8b26b209544c3c06" +checksum = "ab3a8d3b0d4745b183da5ea0792b13d79f5c23d6e69ac04761728e2532b56649" dependencies = [ "cranelift-codegen-shared", ] [[package]] name = "cranelift-codegen-shared" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4d72dbb83c2ad788dec4ad0843070973cb48c35a3ca19b1e7437ac40834fd9c" +checksum = "524141c8e68f2abc2043de4c2b31f6d9dd42432738c246431d0572a1422a4a84" [[package]] name = "cranelift-control" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae07cf26dcc90d546826d747ac63b6c40c916f34b03e92a6ae0422c28d771b8a" +checksum = "97513b57c961c713789a03886a57b43e14ebcd204cbaa8ae50ca6c70a8e716b3" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2fe6b7e49820893691aea497f36257e9d6f52061d8c4758d61d802d5f101a3d" +checksum = "e3f23d3cf3afa7e45f239702612c76d87964f652a55e28d13ed6d7e20f3479dd" [[package]] name = "cranelift-frontend" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f497576ca3674581581601b6a55ccc1b43447217648c880e5bce70db3cf659" +checksum = "554cd4947ec9209b58bf9ae5bf83581b5ddf9128bd967208e334b504a57db54e" dependencies = [ "cranelift-codegen", "log", @@ -117,15 +117,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b96aa02eac00fffee13b0cd37d17874ccdb3d5458983041accd825ef78ce6454" +checksum = "6c1892a439696b6413cb54083806f5fd9fc431768b8de74864b3d9e8b93b124f" [[package]] name = "cranelift-jit" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1d6e0e308c873eefc185745a6b21daec2a10f7554c9fb67e334c2d7d756d979" +checksum = "32209252fb38acaf1662ccd0397907bbe0e92bdb13b6ddbfd2f74e437f83e685" dependencies = [ "anyhow", "cranelift-codegen", @@ -143,9 +143,9 @@ dependencies = [ [[package]] name = "cranelift-module" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1aa8ebb06eced4e478c3f94f1d65d4e7c93493f4640057912b27a3e34b84841" +checksum = "bf42656f5f6df7bfafc4dd7b63a1888b0627c07b43b2cb9aa54e13843fed39eb" dependencies = [ "anyhow", "cranelift-codegen", @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "cranelift-native" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2870170ca44054b202c737626607b87be6e35655084bd94a6ff807a5812ba7df" +checksum = "e0c2d3badd4b9690865f5bb68a71fa94de592fa2df3f3d11a5a062c60c0a107a" dependencies = [ "cranelift-codegen", "libc", @@ -165,9 +165,9 @@ dependencies = [ [[package]] name = "cranelift-object" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20647761742d17dabac8205da958910ede78599550e06418a16711a3ee2fc897" +checksum = "88eca54bbecea3170035168357306e9c779d4a63d8bf036c9e16bd21fdaa69b5" dependencies = [ "anyhow", "cranelift-codegen", @@ -374,9 +374,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasmtime-jit-icache-coherence" -version = "14.0.0" +version = "14.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3a5dda53ad6993f9b0a2d65fb49e0348a7232a27a8794064122870d6ee19eb2" +checksum = "9aaf2fa8fd2d6b65abae9b92edfe69254cc5d6b166e342364036c3e347de8da9" dependencies = [ "cfg-if", "libc", diff --git a/Cargo.toml b/Cargo.toml index 5ad5e0e8140..f2ce714e8ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,12 +8,12 @@ crate-type = ["dylib"] [dependencies] # These have to be in sync with each other -cranelift-codegen = { version = "0.101", features = ["unwind", "all-arch"] } -cranelift-frontend = { version = "0.101" } -cranelift-module = { version = "0.101" } -cranelift-native = { version = "0.101" } -cranelift-jit = { version = "0.101", optional = true } -cranelift-object = { version = "0.101" } +cranelift-codegen = { version = "0.101.1", features = ["unwind", "all-arch"] } +cranelift-frontend = { version = "0.101.1" } +cranelift-module = { version = "0.101.1" } +cranelift-native = { version = "0.101.1" } +cranelift-jit = { version = "0.101.1", optional = true } +cranelift-object = { version = "0.101.1" } target-lexicon = "0.12.0" gimli = { version = "0.28", default-features = false, features = ["write"]} object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] } diff --git a/Readme.md b/Readme.md index 6f2027be96d..5664cbe7d4f 100644 --- a/Readme.md +++ b/Readme.md @@ -8,7 +8,7 @@ If not please open an issue. ## Building and testing ```bash -$ git clone https://github.com/bjorn3/rustc_codegen_cranelift +$ git clone https://github.com/rust-lang/rustc_codegen_cranelift $ cd rustc_codegen_cranelift $ ./y.sh prepare $ ./y.sh build @@ -29,7 +29,7 @@ Extract the `dist` directory in the archive anywhere you want. If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`. (tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)). -[releases]: https://github.com/bjorn3/rustc_codegen_cranelift/releases/tag/dev +[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev ## Usage @@ -78,7 +78,7 @@ configuration options. * Inline assembly ([no cranelift support](https://github.com/bytecodealliance/wasmtime/issues/1041)) * On UNIX there is support for invoking an external assembler for `global_asm!` and `asm!`. -* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported) +* SIMD ([tracked here](https://github.com/rust-lang/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported) * Unwinding on panics ([no cranelift support](https://github.com/bytecodealliance/wasmtime/issues/1677), `-Cpanic=abort` is enabled by default) ## License diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs index 91de04d9770..afc51a47f14 100644 --- a/example/mini_core_hello_world.rs +++ b/example/mini_core_hello_world.rs @@ -353,6 +353,17 @@ fn main() { let f = V([0.0, 1.0]); let _a = f.0[0]; + + stack_val_align(); +} + +#[inline(never)] +fn stack_val_align() { + #[repr(align(8192))] + struct Foo(u8); + + let a = Foo(0); + assert_eq!(&a as *const Foo as usize % 8192, 0); } #[cfg(all( diff --git a/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch b/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch deleted file mode 100644 index e6ebdcec783..00000000000 --- a/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 5d4afb8d807d181038b6a004d17ed055a8d191b2 Mon Sep 17 00:00:00 2001 -From: bjorn3 <17426603+bjorn3@users.noreply.github.com> -Date: Mon, 2 Oct 2023 13:59:00 +0000 -Subject: [PATCH] Ignore test which gets miscompiled with llvm sysroot - ---- - regex-automata/src/util/pool.rs | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/regex-automata/src/util/pool.rs b/regex-automata/src/util/pool.rs -index c03d7b0..28b233b 100644 ---- a/regex-automata/src/util/pool.rs -+++ b/regex-automata/src/util/pool.rs -@@ -1081,6 +1081,8 @@ mod tests { - // into the pool. This in turn resulted in this test producing a data race. - #[cfg(feature = "std")] - #[test] -+ // FIXME(rustc_codegen_cranelift#1395) miscompilation of thread::scope with LLVM sysroot -+ #[ignore] - fn thread_owner_sync() { - let pool = Pool::new(|| vec!['a']); - { --- -2.34.1 - diff --git a/src/abi/mod.rs b/src/abi/mod.rs index c75ad852f82..c4572e03525 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -120,32 +120,25 @@ pub(crate) fn lib_call( args: &[Value], ) -> Cow<'_, [Value]> { if self.tcx.sess.target.is_like_windows { - let (mut params, mut args): (Vec<_>, Vec<_>) = - params - .into_iter() - .zip(args) - .map(|(param, &arg)| { - if param.value_type == types::I128 { - let arg_ptr = Pointer::stack_slot(self.bcx.create_sized_stack_slot( - StackSlotData { kind: StackSlotKind::ExplicitSlot, size: 16 }, - )); - arg_ptr.store(self, arg, MemFlags::trusted()); - (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self)) - } else { - (param, arg) - } - }) - .unzip(); + let (mut params, mut args): (Vec<_>, Vec<_>) = params + .into_iter() + .zip(args) + .map(|(param, &arg)| { + if param.value_type == types::I128 { + let arg_ptr = self.create_stack_slot(16, 16); + arg_ptr.store(self, arg, MemFlags::trusted()); + (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self)) + } else { + (param, arg) + } + }) + .unzip(); let indirect_ret_val = returns.len() == 1 && returns[0].value_type == types::I128; if indirect_ret_val { params.insert(0, AbiParam::new(self.pointer_type)); - let ret_ptr = - Pointer::stack_slot(self.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: 16, - })); + let ret_ptr = self.create_stack_slot(16, 16); args.insert(0, ret_ptr.get_addr(self)); self.lib_call_unadjusted(name, params, vec![], &args); return Cow::Owned(vec![ret_ptr.load(self, types::I128, MemFlags::trusted())]); diff --git a/src/abi/pass_mode.rs b/src/abi/pass_mode.rs index 7c9f8c1051c..06522670029 100644 --- a/src/abi/pass_mode.rs +++ b/src/abi/pass_mode.rs @@ -189,16 +189,13 @@ pub(super) fn from_casted_value<'tcx>( let abi_params = cast_target_to_abi_params(cast); let abi_param_size: u32 = abi_params.iter().map(|param| param.value_type.bytes()).sum(); let layout_size = u32::try_from(layout.size.bytes()).unwrap(); - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. + let ptr = fx.create_stack_slot( // Stack slot size may be bigger for example `[u8; 3]` which is packed into an `i32`. // It may also be smaller for example when the type is a wrapper around an integer with a // larger alignment than the integer. - size: (std::cmp::max(abi_param_size, layout_size) + 15) / 16 * 16, - }); - let ptr = Pointer::stack_slot(stack_slot); + std::cmp::max(abi_param_size, layout_size), + u32::try_from(layout.align.pref.bytes()).unwrap(), + ); let mut offset = 0; let mut block_params_iter = block_params.iter().copied(); for param in abi_params { diff --git a/src/cast.rs b/src/cast.rs index 7e027c5f1b3..0b5cb1547fc 100644 --- a/src/cast.rs +++ b/src/cast.rs @@ -104,11 +104,7 @@ pub(crate) fn clif_int_or_float_cast( &[from], )[0]; // FIXME(bytecodealliance/wasmtime#6104) use bitcast instead of store to get from i64x2 to i128 - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: 16, - }); - let ret_ptr = Pointer::stack_slot(stack_slot); + let ret_ptr = fx.create_stack_slot(16, 16); ret_ptr.store(fx, ret, MemFlags::trusted()); ret_ptr.load(fx, types::I128, MemFlags::trusted()) } else { diff --git a/src/common.rs b/src/common.rs index 7a3ae6ebf52..9771f44f62c 100644 --- a/src/common.rs +++ b/src/common.rs @@ -383,6 +383,25 @@ pub(crate) fn get_local_place(&mut self, local: Local) -> CPlace<'tcx> { }) } + pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer { + if align <= 16 { + let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData { + kind: StackSlotKind::ExplicitSlot, + // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to + // specify stack slot alignment. + size: (size + 15) / 16 * 16, + }); + Pointer::stack_slot(stack_slot) + } else { + // Alignment is too big to handle using the above hack. Dynamically realign a stack slot + // instead. This wastes some space for the realignment. + let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self); + let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align)); + let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align)); + Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset)) + } + } + pub(crate) fn set_debug_loc(&mut self, source_info: mir::SourceInfo) { if let Some(debug_context) = &mut self.cx.debug_context { let (file, line, column) = diff --git a/src/driver/aot.rs b/src/driver/aot.rs index d3a7decc543..11229dd421e 100644 --- a/src/driver/aot.rs +++ b/src/driver/aot.rs @@ -361,12 +361,26 @@ pub(crate) fn run_aot( metadata: EncodedMetadata, need_metadata_module: bool, ) -> Box { + // FIXME handle `-Ctarget-cpu=native` + let target_cpu = match tcx.sess.opts.cg.target_cpu { + Some(ref name) => name, + None => tcx.sess.target.cpu.as_ref(), + } + .to_owned(); + let cgus = if tcx.sess.opts.output_types.should_codegen() { tcx.collect_and_partition_mono_items(()).1 } else { // If only `--emit metadata` is used, we shouldn't perform any codegen. // Also `tcx.collect_and_partition_mono_items` may panic in that case. - &[] + return Box::new(OngoingCodegen { + modules: vec![], + allocator_module: None, + metadata_module: None, + metadata, + crate_info: CrateInfo::new(tcx, target_cpu), + concurrency_limiter: ConcurrencyLimiter::new(tcx.sess, 0), + }); }; if tcx.dep_graph.is_fully_enabled() { @@ -481,13 +495,6 @@ pub(crate) fn run_aot( None }; - // FIXME handle `-Ctarget-cpu=native` - let target_cpu = match tcx.sess.opts.cg.target_cpu { - Some(ref name) => name, - None => tcx.sess.target.cpu.as_ref(), - } - .to_owned(); - Box::new(OngoingCodegen { modules, allocator_module, diff --git a/src/inline_asm.rs b/src/inline_asm.rs index ed077234254..0517c609337 100644 --- a/src/inline_asm.rs +++ b/src/inline_asm.rs @@ -878,13 +878,7 @@ fn call_inline_asm<'tcx>( inputs: Vec<(Size, Value)>, outputs: Vec<(Size, CPlace<'tcx>)>, ) { - let stack_slot = fx.bcx.func.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: u32::try_from(slot_size.bytes()).unwrap(), - }); - if fx.clif_comments.enabled() { - fx.add_comment(stack_slot, "inline asm scratch slot"); - } + let stack_slot = fx.create_stack_slot(u32::try_from(slot_size.bytes()).unwrap(), 16); let inline_asm_func = fx .module @@ -904,15 +898,23 @@ fn call_inline_asm<'tcx>( } for (offset, value) in inputs { - fx.bcx.ins().stack_store(value, stack_slot, i32::try_from(offset.bytes()).unwrap()); + stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).store( + fx, + value, + MemFlags::trusted(), + ); } - let stack_slot_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0); + let stack_slot_addr = stack_slot.get_addr(fx); fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]); for (offset, place) in outputs { let ty = fx.clif_type(place.layout().ty).unwrap(); - let value = fx.bcx.ins().stack_load(ty, stack_slot, i32::try_from(offset.bytes()).unwrap()); + let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load( + fx, + ty, + MemFlags::trusted(), + ); place.write_cvalue(fx, CValue::by_val(value, place.layout())); } } diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 0c9a94e1c23..35f144d7dad 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -310,6 +310,143 @@ fn select4( let val = CValue::by_val_pair(cb_out, c, layout); ret.write_cvalue(fx, val); } + "llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + // FIXME use vector instructions when possible + simd_pair_for_each_lane( + fx, + a, + b, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| { + // (a + b + 1) >> 1 + let lane_ty = fx.bcx.func.dfg.value_type(a_lane); + let a_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), a_lane); + let b_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), b_lane); + let sum = fx.bcx.ins().iadd(a_lane, b_lane); + let num_plus_one = fx.bcx.ins().iadd_imm(sum, 1); + let res = fx.bcx.ins().ushr_imm(num_plus_one, 1); + fx.bcx.ins().ireduce(lane_ty, res) + }, + ); + } + "llvm.x86.sse2.psra.w" => { + intrinsic_args!(fx, args => (a, count); intrinsic); + + let count_lane = count.force_stack(fx).0.load(fx, types::I64, MemFlags::trusted()); + let lane_ty = fx.clif_type(a.layout().ty.simd_size_and_type(fx.tcx).1).unwrap(); + let max_count = fx.bcx.ins().iconst(types::I64, i64::from(lane_ty.bits() - 1)); + let saturated_count = fx.bcx.ins().umin(count_lane, max_count); + + // FIXME use vector instructions when possible + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, a_lane| { + fx.bcx.ins().sshr(a_lane, saturated_count) + }); + } + "llvm.x86.sse2.psad.bw" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.u8); + assert_eq!(ret_lane_ty, fx.tcx.types.u64); + assert_eq!(lane_count, ret_lane_count * 8); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.u64); + for out_lane_idx in 0..lane_count / 8 { + let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0); + + for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 { + let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx); + let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx); + + let lane_diff = fx.bcx.ins().isub(a_lane, b_lane); + let abs_lane_diff = fx.bcx.ins().iabs(lane_diff); + let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff); + lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff); + } + + let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } + "llvm.x86.ssse3.pmadd.ub.sw.128" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.u8); + assert_eq!(ret_lane_ty, fx.tcx.types.i16); + assert_eq!(lane_count, ret_lane_count * 2); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); + for out_lane_idx in 0..lane_count / 2 { + let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0); + let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0); + + let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1); + let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1); + + let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0); + let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1); + + let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1); + + let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0); + + let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16)); + let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16)); + + let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min); + let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val); + + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } + "llvm.x86.sse2.pmadd.wd" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i16); + assert_eq!(ret_lane_ty, fx.tcx.types.i32); + assert_eq!(lane_count, ret_lane_count * 2); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.i32); + for out_lane_idx in 0..lane_count / 2 { + let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0); + let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0); + + let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1); + let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1); + + let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0); + let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1); + + let res_lane = fx.bcx.ins().iadd(mul0, mul1); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } _ => { fx.tcx .sess diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 3a6a6c9e3f5..5f0aa6c5581 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -132,18 +132,11 @@ pub(crate) fn dyn_star_force_data_on_stack( (ptr.get_addr(fx), vtable) } CValueInner::ByValPair(data, vtable) => { - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (u32::try_from(fx.target_config.pointer_type().bytes()).unwrap() + 15) - / 16 - * 16, - }); - let data_ptr = Pointer::stack_slot(stack_slot); - let mut flags = MemFlags::new(); - flags.set_notrap(); - data_ptr.store(fx, data, flags); + let data_ptr = fx.create_stack_slot( + u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(), + u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(), + ); + data_ptr.store(fx, data, MemFlags::trusted()); (data_ptr.get_addr(fx), vtable) } @@ -372,13 +365,11 @@ pub(crate) fn new_stack_slot( .fatal(format!("values of type {} are too big to store on the stack", layout.ty)); } - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (u32::try_from(layout.size.bytes()).unwrap() + 15) / 16 * 16, - }); - CPlace { inner: CPlaceInner::Addr(Pointer::stack_slot(stack_slot), None), layout } + let stack_slot = fx.create_stack_slot( + u32::try_from(layout.size.bytes()).unwrap(), + u32::try_from(layout.align.pref.bytes()).unwrap(), + ); + CPlace { inner: CPlaceInner::Addr(stack_slot, None), layout } } pub(crate) fn new_var( @@ -543,13 +534,7 @@ fn transmute_scalar<'tcx>( _ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data), _ if src_ty.is_vector() || dst_ty.is_vector() => { // FIXME(bytecodealliance/wasmtime#6104) do something more efficient for transmutes between vectors and integers. - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (src_ty.bytes() + 15) / 16 * 16, - }); - let ptr = Pointer::stack_slot(stack_slot); + let ptr = fx.create_stack_slot(src_ty.bytes(), src_ty.bytes()); ptr.store(fx, data, MemFlags::trusted()); ptr.load(fx, dst_ty, MemFlags::trusted()) }