From 02778b3e0ea2ab8818e77811b05f9dc2e01c9028 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 1 Mar 2024 03:53:26 -0500
Subject: [PATCH] Add `f16` and `f128` LLVM intrinsics

---
 compiler/rustc_codegen_llvm/src/context.rs    | 48 +++++++++++++
 .../src/debuginfo/metadata.rs                 |  2 +-
 compiler/rustc_codegen_llvm/src/intrinsic.rs  | 67 +++++++++++++++++++
 compiler/rustc_span/src/symbol.rs             | 46 +++++++++++++
 .../src/typeid/typeid_itanium_cxx_abi.rs      | 17 ++---
 compiler/rustc_symbol_mangling/src/v0.rs      |  3 +
 6 files changed, 174 insertions(+), 9 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 7dfcf1ab50e..16122e5557e 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -685,8 +685,10 @@ macro_rules! mk_struct {
         let t_i64 = self.type_i64();
         let t_i128 = self.type_i128();
         let t_isize = self.type_isize();
+        let t_f16 = self.type_f16();
         let t_f32 = self.type_f32();
         let t_f64 = self.type_f64();
+        let t_f128 = self.type_f128();
         let t_metadata = self.type_metadata();
         let t_token = self.type_token();
 
@@ -728,69 +730,115 @@ macro_rules! mk_struct {
         ifn!("llvm.debugtrap", fn() -> void);
         ifn!("llvm.frameaddress", fn(t_i32) -> ptr);
 
+        ifn!("llvm.powi.f16", fn(t_f16, t_i32) -> t_f16);
         ifn!("llvm.powi.f32", fn(t_f32, t_i32) -> t_f32);
         ifn!("llvm.powi.f64", fn(t_f64, t_i32) -> t_f64);
+        ifn!("llvm.powi.f128", fn(t_f128, t_i32) -> t_f128);
 
+        ifn!("llvm.pow.f16", fn(t_f16, t_f16) -> t_f16);
         ifn!("llvm.pow.f32", fn(t_f32, t_f32) -> t_f32);
         ifn!("llvm.pow.f64", fn(t_f64, t_f64) -> t_f64);
+        ifn!("llvm.pow.f128", fn(t_f128, t_f128) -> t_f128);
 
+        ifn!("llvm.sqrt.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.sqrt.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.sqrt.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.sqrt.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.sin.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.sin.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.sin.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.sin.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.cos.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.cos.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.cos.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.cos.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.exp.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.exp.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.exp.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.exp.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.exp2.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.exp2.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.exp2.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.exp2.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.log.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.log.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.log.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.log.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.log10.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.log10.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.log10.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.log10.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.log2.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.log2.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.log2.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.log2.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.fma.f16", fn(t_f16, t_f16, t_f16) -> t_f16);
         ifn!("llvm.fma.f32", fn(t_f32, t_f32, t_f32) -> t_f32);
         ifn!("llvm.fma.f64", fn(t_f64, t_f64, t_f64) -> t_f64);
+        ifn!("llvm.fma.f128", fn(t_f128, t_f128, t_f128) -> t_f128);
 
+        ifn!("llvm.fabs.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.fabs.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.fabs.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.fabs.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.minnum.f16", fn(t_f16, t_f16) -> t_f16);
         ifn!("llvm.minnum.f32", fn(t_f32, t_f32) -> t_f32);
         ifn!("llvm.minnum.f64", fn(t_f64, t_f64) -> t_f64);
+        ifn!("llvm.minnum.f128", fn(t_f128, t_f128) -> t_f128);
+
+        ifn!("llvm.maxnum.f16", fn(t_f16, t_f16) -> t_f16);
         ifn!("llvm.maxnum.f32", fn(t_f32, t_f32) -> t_f32);
         ifn!("llvm.maxnum.f64", fn(t_f64, t_f64) -> t_f64);
+        ifn!("llvm.maxnum.f128", fn(t_f128, t_f128) -> t_f128);
 
+        ifn!("llvm.floor.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.floor.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.floor.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.floor.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.ceil.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.ceil.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.ceil.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.ceil.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.trunc.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.trunc.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.trunc.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.trunc.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.copysign.f16", fn(t_f16, t_f16) -> t_f16);
         ifn!("llvm.copysign.f32", fn(t_f32, t_f32) -> t_f32);
         ifn!("llvm.copysign.f64", fn(t_f64, t_f64) -> t_f64);
+        ifn!("llvm.copysign.f128", fn(t_f128, t_f128) -> t_f128);
 
+        ifn!("llvm.round.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.round.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.round.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.round.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.roundeven.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.roundeven.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.roundeven.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.roundeven.f128", fn(t_f128) -> t_f128);
 
+        ifn!("llvm.rint.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.rint.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.rint.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.rint.f128", fn(t_f128) -> t_f128);
+
+        ifn!("llvm.nearbyint.f16", fn(t_f16) -> t_f16);
         ifn!("llvm.nearbyint.f32", fn(t_f32) -> t_f32);
         ifn!("llvm.nearbyint.f64", fn(t_f64) -> t_f64);
+        ifn!("llvm.nearbyint.f128", fn(t_f128) -> t_f128);
 
         ifn!("llvm.ctpop.i8", fn(t_i8) -> t_i8);
         ifn!("llvm.ctpop.i16", fn(t_i16) -> t_i16);
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index 045b6d2b651..1a5f9b42947 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -695,7 +695,7 @@ fn msvc_basic_name(self) -> &'static str {
 
 impl MsvcBasicName for ty::FloatTy {
     fn msvc_basic_name(self) -> &'static str {
-        // FIXME: f16 and f128 have no MSVE representation. We could improve the debuginfo.
+        // FIXME: f16 and f128 have no MSVC representation. We could improve the debuginfo.
         // See: <https://github.com/rust-lang/rust/pull/114607/files#r1454683264>
         match self {
             ty::FloatTy::F16 => "half",
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 1d4ab866cb3..f33a672aff0 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -28,51 +28,118 @@ fn get_simple_intrinsic<'ll>(
     name: Symbol,
 ) -> Option<(&'ll Type, &'ll Value)> {
     let llvm_name = match name {
+        sym::sqrtf16 => "llvm.sqrt.f16",
         sym::sqrtf32 => "llvm.sqrt.f32",
         sym::sqrtf64 => "llvm.sqrt.f64",
+        sym::sqrtf128 => "llvm.sqrt.f128",
+
+        sym::powif16 => "llvm.powi.f16",
         sym::powif32 => "llvm.powi.f32",
         sym::powif64 => "llvm.powi.f64",
+        sym::powif128 => "llvm.powi.f128",
+
+        sym::sinf16 => "llvm.sin.f16",
         sym::sinf32 => "llvm.sin.f32",
         sym::sinf64 => "llvm.sin.f64",
+        sym::sinf128 => "llvm.sin.f128",
+
+        sym::cosf16 => "llvm.cos.f16",
         sym::cosf32 => "llvm.cos.f32",
         sym::cosf64 => "llvm.cos.f64",
+        sym::cosf128 => "llvm.cos.f128",
+
+        sym::powf16 => "llvm.pow.f16",
         sym::powf32 => "llvm.pow.f32",
         sym::powf64 => "llvm.pow.f64",
+        sym::powf128 => "llvm.pow.f128",
+
+        sym::expf16 => "llvm.exp.f16",
         sym::expf32 => "llvm.exp.f32",
         sym::expf64 => "llvm.exp.f64",
+        sym::expf128 => "llvm.exp.f128",
+
+        sym::exp2f16 => "llvm.exp2.f16",
         sym::exp2f32 => "llvm.exp2.f32",
         sym::exp2f64 => "llvm.exp2.f64",
+        sym::exp2f128 => "llvm.exp2.f128",
+
+        sym::logf16 => "llvm.log.f16",
         sym::logf32 => "llvm.log.f32",
         sym::logf64 => "llvm.log.f64",
+        sym::logf128 => "llvm.log.f128",
+
+        sym::log10f16 => "llvm.log10.f16",
         sym::log10f32 => "llvm.log10.f32",
         sym::log10f64 => "llvm.log10.f64",
+        sym::log10f128 => "llvm.log10.f128",
+
+        sym::log2f16 => "llvm.log2.f16",
         sym::log2f32 => "llvm.log2.f32",
         sym::log2f64 => "llvm.log2.f64",
+        sym::log2f128 => "llvm.log2.f128",
+
+        sym::fmaf16 => "llvm.fma.f16",
         sym::fmaf32 => "llvm.fma.f32",
         sym::fmaf64 => "llvm.fma.f64",
+        sym::fmaf128 => "llvm.fma.f128",
+
+        sym::fabsf16 => "llvm.fabs.f16",
         sym::fabsf32 => "llvm.fabs.f32",
         sym::fabsf64 => "llvm.fabs.f64",
+        sym::fabsf128 => "llvm.fabs.f128",
+
+        sym::minnumf16 => "llvm.minnum.f16",
         sym::minnumf32 => "llvm.minnum.f32",
         sym::minnumf64 => "llvm.minnum.f64",
+        sym::minnumf128 => "llvm.minnum.f128",
+
+        sym::maxnumf16 => "llvm.maxnum.f16",
         sym::maxnumf32 => "llvm.maxnum.f32",
         sym::maxnumf64 => "llvm.maxnum.f64",
+        sym::maxnumf128 => "llvm.maxnum.f128",
+
+        sym::copysignf16 => "llvm.copysign.f16",
         sym::copysignf32 => "llvm.copysign.f32",
         sym::copysignf64 => "llvm.copysign.f64",
+        sym::copysignf128 => "llvm.copysign.f128",
+
+        sym::floorf16 => "llvm.floor.f16",
         sym::floorf32 => "llvm.floor.f32",
         sym::floorf64 => "llvm.floor.f64",
+        sym::floorf128 => "llvm.floor.f128",
+
+        sym::ceilf16 => "llvm.ceil.f16",
         sym::ceilf32 => "llvm.ceil.f32",
         sym::ceilf64 => "llvm.ceil.f64",
+        sym::ceilf128 => "llvm.ceil.f128",
+
+        sym::truncf16 => "llvm.trunc.f16",
         sym::truncf32 => "llvm.trunc.f32",
         sym::truncf64 => "llvm.trunc.f64",
+        sym::truncf128 => "llvm.trunc.f128",
+
+        sym::rintf16 => "llvm.rint.f16",
         sym::rintf32 => "llvm.rint.f32",
         sym::rintf64 => "llvm.rint.f64",
+        sym::rintf128 => "llvm.rint.f128",
+
+        sym::nearbyintf16 => "llvm.nearbyint.f16",
         sym::nearbyintf32 => "llvm.nearbyint.f32",
         sym::nearbyintf64 => "llvm.nearbyint.f64",
+        sym::nearbyintf128 => "llvm.nearbyint.f128",
+
+        sym::roundf16 => "llvm.round.f16",
         sym::roundf32 => "llvm.round.f32",
         sym::roundf64 => "llvm.round.f64",
+        sym::roundf128 => "llvm.round.f128",
+
         sym::ptr_mask => "llvm.ptrmask",
+
+        sym::roundevenf16 => "llvm.roundeven.f16",
         sym::roundevenf32 => "llvm.roundeven.f32",
         sym::roundevenf64 => "llvm.roundeven.f64",
+        sym::roundevenf128 => "llvm.roundeven.f128",
+
         _ => return None,
     };
     Some(cx.get_intrinsic(llvm_name))
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 2e86a8bd581..ee8d9ae9c53 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -490,6 +490,8 @@
         catch_unwind,
         cause,
         cdylib,
+        ceilf128,
+        ceilf16,
         ceilf32,
         ceilf64,
         cfg,
@@ -595,6 +597,8 @@
         copy,
         copy_closures,
         copy_nonoverlapping,
+        copysignf128,
+        copysignf16,
         copysignf32,
         copysignf64,
         core,
@@ -607,6 +611,8 @@
         coroutine_resume,
         coroutine_state,
         coroutines,
+        cosf128,
+        cosf16,
         cosf32,
         cosf64,
         count,
@@ -737,10 +743,14 @@
         exhaustive_integer_patterns,
         exhaustive_patterns,
         existential_type,
+        exp2f128,
+        exp2f16,
         exp2f32,
         exp2f64,
         expect,
         expected,
+        expf128,
+        expf16,
         expf32,
         expf64,
         explicit_generic_args_with_impl_trait,
@@ -759,7 +769,9 @@
         external_doc,
         f,
         f128,
+        f128_nan,
         f16,
+        f16_nan,
         f16c_target_feature,
         f32,
         f32_legacy_const_digits,
@@ -793,6 +805,8 @@
         f64_legacy_const_neg_infinity,
         f64_legacy_const_radix,
         f64_nan,
+        fabsf128,
+        fabsf16,
         fabsf32,
         fabsf64,
         fadd_algebraic,
@@ -813,8 +827,12 @@
         file,
         float,
         float_to_int_unchecked,
+        floorf128,
+        floorf16,
         floorf32,
         floorf64,
+        fmaf128,
+        fmaf16,
         fmaf32,
         fmaf64,
         fmt,
@@ -1030,11 +1048,17 @@
         loaded_from_disk,
         local,
         local_inner_macros,
+        log10f128,
+        log10f16,
         log10f32,
         log10f64,
+        log2f128,
+        log2f16,
         log2f32,
         log2f64,
         log_syntax,
+        logf128,
+        logf16,
         logf32,
         logf64,
         loongarch_target_feature,
@@ -1062,6 +1086,8 @@
         match_beginning_vert,
         match_default_bindings,
         matches_macro,
+        maxnumf128,
+        maxnumf16,
         maxnumf32,
         maxnumf64,
         may_dangle,
@@ -1093,6 +1119,8 @@
         min_exhaustive_patterns,
         min_specialization,
         min_type_alias_impl_trait,
+        minnumf128,
+        minnumf16,
         minnumf32,
         minnumf64,
         mips_target_feature,
@@ -1155,6 +1183,8 @@
         native_link_modifiers_whole_archive,
         natvis_file,
         ne,
+        nearbyintf128,
+        nearbyintf16,
         nearbyintf32,
         nearbyintf64,
         needs_allocator,
@@ -1289,8 +1319,12 @@
         poll_next,
         post_dash_lto: "post-lto",
         powerpc_target_feature,
+        powf128,
+        powf16,
         powf32,
         powf64,
+        powif128,
+        powif16,
         powif32,
         powif64,
         pre_dash_lto: "pre-lto",
@@ -1416,6 +1450,8 @@
         return_position_impl_trait_in_trait,
         return_type_notation,
         rhs,
+        rintf128,
+        rintf16,
         rintf32,
         rintf64,
         riscv_target_feature,
@@ -1424,8 +1460,12 @@
         ropi_rwpi: "ropi-rwpi",
         rotate_left,
         rotate_right,
+        roundevenf128,
+        roundevenf16,
         roundevenf32,
         roundevenf64,
+        roundf128,
+        roundf16,
         roundf32,
         roundf64,
         rt,
@@ -1630,6 +1670,8 @@
         simd_trunc,
         simd_xor,
         since,
+        sinf128,
+        sinf16,
         sinf32,
         sinf64,
         size,
@@ -1647,6 +1689,8 @@
         specialization,
         speed,
         spotlight,
+        sqrtf128,
+        sqrtf16,
         sqrtf32,
         sqrtf64,
         sreg,
@@ -1746,6 +1790,8 @@
         transparent_enums,
         transparent_unions,
         trivial_bounds,
+        truncf128,
+        truncf16,
         truncf32,
         truncf64,
         try_blocks,
diff --git a/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs b/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs
index b5b3aa27060..793cb574c85 100644
--- a/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs
+++ b/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs
@@ -464,16 +464,17 @@ fn encode_ty<'tcx>(
             typeid.push_str(&s);
         }
 
-        // Rust's f32 and f64 single (32-bit) and double (64-bit) precision floating-point types
-        // have IEEE-754 binary32 and binary64 floating-point layouts, respectively.
+        // Rust's f16, f32, f64, and f126 half (16-bit), single (32-bit), double (64-bit), and
+        // quad (128-bit)  precision floating-point types have IEEE-754 binary16, binary32,
+        // binary64, and binary128 floating-point layouts, respectively.
         //
         // (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#fixed-width-floating-point-types.)
         ty::Float(float_ty) => {
-            typeid.push(match float_ty {
-                FloatTy::F16 => unimplemented!("f16_f128"),
-                FloatTy::F32 => 'f',
-                FloatTy::F64 => 'd',
-                FloatTy::F128 => unimplemented!("f16_f128"),
+            typeid.push_str(match float_ty {
+                FloatTy::F16 => "Dh",
+                FloatTy::F32 => "f",
+                FloatTy::F64 => "d",
+                FloatTy::F128 => "g",
             });
         }
 
@@ -557,7 +558,7 @@ fn encode_ty<'tcx>(
                         // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-compression).
                         let builtin_types = [
                             "v", "w", "b", "c", "a", "h", "s", "t", "i", "j", "l", "m", "x", "y",
-                            "n", "o", "f", "d", "e", "g", "z",
+                            "n", "o", "f", "d", "e", "g", "z", "Dh",
                         ];
                         if !builtin_types.contains(&str) {
                             compress(dict, DictKey::Ty(ty, TyQ::None), &mut s);
diff --git a/compiler/rustc_symbol_mangling/src/v0.rs b/compiler/rustc_symbol_mangling/src/v0.rs
index 747c945960a..f1b1b4ed2bb 100644
--- a/compiler/rustc_symbol_mangling/src/v0.rs
+++ b/compiler/rustc_symbol_mangling/src/v0.rs
@@ -320,8 +320,11 @@ fn print_type(&mut self, ty: Ty<'tcx>) -> Result<(), PrintError> {
             ty::Uint(UintTy::U64) => "y",
             ty::Uint(UintTy::U128) => "o",
             ty::Uint(UintTy::Usize) => "j",
+            // FIXME(f16_f128): update these once `rustc-demangle` supports the new types
+            ty::Float(FloatTy::F16) => unimplemented!("f16_f128"),
             ty::Float(FloatTy::F32) => "f",
             ty::Float(FloatTy::F64) => "d",
+            ty::Float(FloatTy::F128) => unimplemented!("f16_f128"),
             ty::Never => "z",
 
             // Placeholders (should be demangled as `_`).