Use associated constants in core::num::dec2flt
This commit is contained in:
parent
4f32e0dfb2
commit
e9c74bc42d
@ -70,6 +70,7 @@
|
||||
#![feature(allow_internal_unstable)]
|
||||
#![feature(asm)]
|
||||
#![feature(associated_type_defaults)]
|
||||
#![feature(associated_consts)]
|
||||
#![feature(cfg_target_feature)]
|
||||
#![feature(cfg_target_has_atomic)]
|
||||
#![feature(concat_idents)]
|
||||
|
@ -106,17 +106,17 @@ mod fpu_precision {
|
||||
/// a bignum.
|
||||
pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Option<T> {
|
||||
let num_digits = integral.len() + fractional.len();
|
||||
// log_10(f64::max_sig) ~ 15.95. We compare the exact value to max_sig near the end,
|
||||
// log_10(f64::MAX_SIG) ~ 15.95. We compare the exact value to MAX_SIG near the end,
|
||||
// this is just a quick, cheap rejection (and also frees the rest of the code from
|
||||
// worrying about underflow).
|
||||
if num_digits > 16 {
|
||||
return None;
|
||||
}
|
||||
if e.abs() >= T::ceil_log5_of_max_sig() as i64 {
|
||||
if e.abs() >= T::CEIL_LOG5_OF_MAX_SIG as i64 {
|
||||
return None;
|
||||
}
|
||||
let f = num::from_str_unchecked(integral.iter().chain(fractional.iter()));
|
||||
if f > T::max_sig() {
|
||||
if f > T::MAX_SIG {
|
||||
return None;
|
||||
}
|
||||
|
||||
@ -154,14 +154,14 @@ pub fn fast_path<T: RawFloat>(integral: &[u8], fractional: &[u8], e: i64) -> Opt
|
||||
/// > the best possible approximation that uses p bits of significand.)
|
||||
pub fn bellerophon<T: RawFloat>(f: &Big, e: i16) -> T {
|
||||
let slop;
|
||||
if f <= &Big::from_u64(T::max_sig()) {
|
||||
if f <= &Big::from_u64(T::MAX_SIG) {
|
||||
// The cases abs(e) < log5(2^N) are in fast_path()
|
||||
slop = if e >= 0 { 0 } else { 3 };
|
||||
} else {
|
||||
slop = if e >= 0 { 1 } else { 4 };
|
||||
}
|
||||
let z = rawfp::big_to_fp(f).mul(&power_of_ten(e)).normalize();
|
||||
let exp_p_n = 1 << (P - T::sig_bits() as u32);
|
||||
let exp_p_n = 1 << (P - T::SIG_BITS as u32);
|
||||
let lowbits: i64 = (z.f % exp_p_n) as i64;
|
||||
// Is the slop large enough to make a difference when
|
||||
// rounding to n bits?
|
||||
@ -210,14 +210,14 @@ fn algorithm_r<T: RawFloat>(f: &Big, e: i16, z0: T) -> T {
|
||||
if d2 < y {
|
||||
let mut d2_double = d2;
|
||||
d2_double.mul_pow2(1);
|
||||
if m == T::min_sig() && d_negative && d2_double > y {
|
||||
if m == T::MIN_SIG && d_negative && d2_double > y {
|
||||
z = prev_float(z);
|
||||
} else {
|
||||
return z;
|
||||
}
|
||||
} else if d2 == y {
|
||||
if m % 2 == 0 {
|
||||
if m == T::min_sig() && d_negative {
|
||||
if m == T::MIN_SIG && d_negative {
|
||||
z = prev_float(z);
|
||||
} else {
|
||||
return z;
|
||||
@ -303,12 +303,12 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
|
||||
quick_start::<T>(&mut u, &mut v, &mut k);
|
||||
let mut rem = Big::from_small(0);
|
||||
let mut x = Big::from_small(0);
|
||||
let min_sig = Big::from_u64(T::min_sig());
|
||||
let max_sig = Big::from_u64(T::max_sig());
|
||||
let min_sig = Big::from_u64(T::MIN_SIG);
|
||||
let max_sig = Big::from_u64(T::MAX_SIG);
|
||||
loop {
|
||||
u.div_rem(&v, &mut x, &mut rem);
|
||||
if k == T::min_exp_int() {
|
||||
// We have to stop at the minimum exponent, if we wait until `k < T::min_exp_int()`,
|
||||
if k == T::MIN_EXP_INT {
|
||||
// We have to stop at the minimum exponent, if we wait until `k < T::MIN_EXP_INT`,
|
||||
// then we'd be off by a factor of two. Unfortunately this means we have to special-
|
||||
// case normal numbers with the minimum exponent.
|
||||
// FIXME find a more elegant formulation, but run the `tiny-pow10` test to make sure
|
||||
@ -318,8 +318,8 @@ pub fn algorithm_m<T: RawFloat>(f: &Big, e: i16) -> T {
|
||||
}
|
||||
return underflow(x, v, rem);
|
||||
}
|
||||
if k > T::max_exp_int() {
|
||||
return T::infinity2();
|
||||
if k > T::MAX_EXP_INT {
|
||||
return T::INFINITY;
|
||||
}
|
||||
if x < min_sig {
|
||||
u.mul_pow2(1);
|
||||
@ -345,18 +345,18 @@ fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
|
||||
// The target ratio is one where u/v is in an in-range significand. Thus our termination
|
||||
// condition is log2(u / v) being the significand bits, plus/minus one.
|
||||
// FIXME Looking at the second bit could improve the estimate and avoid some more divisions.
|
||||
let target_ratio = T::sig_bits() as i16;
|
||||
let target_ratio = T::SIG_BITS as i16;
|
||||
let log2_u = u.bit_length() as i16;
|
||||
let log2_v = v.bit_length() as i16;
|
||||
let mut u_shift: i16 = 0;
|
||||
let mut v_shift: i16 = 0;
|
||||
assert!(*k == 0);
|
||||
loop {
|
||||
if *k == T::min_exp_int() {
|
||||
if *k == T::MIN_EXP_INT {
|
||||
// Underflow or subnormal. Leave it to the main function.
|
||||
break;
|
||||
}
|
||||
if *k == T::max_exp_int() {
|
||||
if *k == T::MAX_EXP_INT {
|
||||
// Overflow. Leave it to the main function.
|
||||
break;
|
||||
}
|
||||
@ -376,7 +376,7 @@ fn quick_start<T: RawFloat>(u: &mut Big, v: &mut Big, k: &mut i16) {
|
||||
}
|
||||
|
||||
fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
|
||||
if x < Big::from_u64(T::min_sig()) {
|
||||
if x < Big::from_u64(T::MIN_SIG) {
|
||||
let q = num::to_u64(&x);
|
||||
let z = rawfp::encode_subnormal(q);
|
||||
return round_by_remainder(v, rem, q, z);
|
||||
@ -395,9 +395,9 @@ fn underflow<T: RawFloat>(x: Big, v: Big, rem: Big) -> T {
|
||||
// needs to be rounded up. Only when the rounded off bits are 1/2 and the remainder
|
||||
// is zero, we have a half-to-even situation.
|
||||
let bits = x.bit_length();
|
||||
let lsb = bits - T::sig_bits() as usize;
|
||||
let lsb = bits - T::SIG_BITS as usize;
|
||||
let q = num::get_bits(&x, lsb, bits);
|
||||
let k = T::min_exp_int() + lsb as i16;
|
||||
let k = T::MIN_EXP_INT + lsb as i16;
|
||||
let z = rawfp::encode_normal(Unpacked::new(q, k));
|
||||
let q_even = q % 2 == 0;
|
||||
match num::compare_with_half_ulp(&x, lsb) {
|
||||
|
@ -214,11 +214,11 @@ fn dec2flt<T: RawFloat>(s: &str) -> Result<T, ParseFloatError> {
|
||||
let (sign, s) = extract_sign(s);
|
||||
let flt = match parse_decimal(s) {
|
||||
ParseResult::Valid(decimal) => convert(decimal)?,
|
||||
ParseResult::ShortcutToInf => T::infinity2(),
|
||||
ParseResult::ShortcutToZero => T::zero2(),
|
||||
ParseResult::ShortcutToInf => T::INFINITY,
|
||||
ParseResult::ShortcutToZero => T::ZERO,
|
||||
ParseResult::Invalid => match s {
|
||||
"inf" => T::infinity2(),
|
||||
"NaN" => T::nan2(),
|
||||
"inf" => T::INFINITY,
|
||||
"NaN" => T::NAN,
|
||||
_ => { return Err(pfe_invalid()); }
|
||||
}
|
||||
};
|
||||
@ -254,7 +254,7 @@ fn convert<T: RawFloat>(mut decimal: Decimal) -> Result<T, ParseFloatError> {
|
||||
// FIXME These bounds are rather conservative. A more careful analysis of the failure modes
|
||||
// of Bellerophon could allow using it in more cases for a massive speed up.
|
||||
let exponent_in_range = table::MIN_E <= e && e <= table::MAX_E;
|
||||
let value_in_range = upper_bound <= T::max_normal_digits() as u64;
|
||||
let value_in_range = upper_bound <= T::MAX_NORMAL_DIGITS as u64;
|
||||
if exponent_in_range && value_in_range {
|
||||
Ok(algorithm::bellerophon(&f, e))
|
||||
} else {
|
||||
@ -315,17 +315,17 @@ fn bound_intermediate_digits(decimal: &Decimal, e: i64) -> u64 {
|
||||
fn trivial_cases<T: RawFloat>(decimal: &Decimal) -> Option<T> {
|
||||
// There were zeros but they were stripped by simplify()
|
||||
if decimal.integral.is_empty() && decimal.fractional.is_empty() {
|
||||
return Some(T::zero2());
|
||||
return Some(T::ZERO);
|
||||
}
|
||||
// This is a crude approximation of ceil(log10(the real value)). We don't need to worry too
|
||||
// much about overflow here because the input length is tiny (at least compared to 2^64) and
|
||||
// the parser already handles exponents whose absolute value is greater than 10^18
|
||||
// (which is still 10^19 short of 2^64).
|
||||
let max_place = decimal.exp + decimal.integral.len() as i64;
|
||||
if max_place > T::inf_cutoff() {
|
||||
return Some(T::infinity2());
|
||||
} else if max_place < T::zero_cutoff() {
|
||||
return Some(T::zero2());
|
||||
if max_place > T::INF_CUTOFF {
|
||||
return Some(T::INFINITY);
|
||||
} else if max_place < T::ZERO_CUTOFF {
|
||||
return Some(T::ZERO);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
@ -56,24 +56,12 @@ impl Unpacked {
|
||||
///
|
||||
/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
|
||||
/// Inherits from `Float` because there is some overlap, but all the reused methods are trivial.
|
||||
/// The "methods" (pseudo-constants) with default implementation should not be overriden.
|
||||
pub trait RawFloat : Float + Copy + Debug + LowerExp
|
||||
+ Mul<Output=Self> + Div<Output=Self> + Neg<Output=Self>
|
||||
{
|
||||
// suffix of "2" because Float::infinity is deprecated
|
||||
#[allow(deprecated)]
|
||||
fn infinity2() -> Self {
|
||||
Float::infinity()
|
||||
}
|
||||
|
||||
// suffix of "2" because Float::nan is deprecated
|
||||
#[allow(deprecated)]
|
||||
fn nan2() -> Self {
|
||||
Float::nan()
|
||||
}
|
||||
|
||||
// suffix of "2" because Float::zero is deprecated
|
||||
fn zero2() -> Self;
|
||||
const INFINITY: Self;
|
||||
const NAN: Self;
|
||||
const ZERO: Self;
|
||||
|
||||
// suffix of "2" because Float::integer_decode is deprecated
|
||||
#[allow(deprecated)]
|
||||
@ -94,94 +82,83 @@ pub trait RawFloat : Float + Copy + Debug + LowerExp
|
||||
/// represented, the other code in this module makes sure to never let that happen.
|
||||
fn from_int(x: u64) -> Self;
|
||||
|
||||
/// Get the value 10<sup>e</sup> from a pre-computed table. Panics for e >=
|
||||
/// ceil_log5_of_max_sig().
|
||||
/// Get the value 10<sup>e</sup> from a pre-computed table.
|
||||
/// Panics for `e >= CEIL_LOG5_OF_MAX_SIG`.
|
||||
fn short_fast_pow10(e: usize) -> Self;
|
||||
|
||||
// FIXME Everything that follows should be associated constants, but taking the value of an
|
||||
// associated constant from a type parameter does not work (yet?)
|
||||
// A possible workaround is having a `FloatInfo` struct for all the constants, but so far
|
||||
// the methods aren't painful enough to rewrite.
|
||||
|
||||
/// What the name says. It's easier to hard code than juggling intrinsics and
|
||||
/// hoping LLVM constant folds it.
|
||||
fn ceil_log5_of_max_sig() -> i16;
|
||||
const CEIL_LOG5_OF_MAX_SIG: i16;
|
||||
|
||||
// A conservative bound on the decimal digits of inputs that can't produce overflow or zero or
|
||||
/// subnormals. Probably the decimal exponent of the maximum normal value, hence the name.
|
||||
fn max_normal_digits() -> usize;
|
||||
const MAX_NORMAL_DIGITS: usize;
|
||||
|
||||
/// When the most significant decimal digit has a place value greater than this, the number
|
||||
/// is certainly rounded to infinity.
|
||||
fn inf_cutoff() -> i64;
|
||||
const INF_CUTOFF: i64;
|
||||
|
||||
/// When the most significant decimal digit has a place value less than this, the number
|
||||
/// is certainly rounded to zero.
|
||||
fn zero_cutoff() -> i64;
|
||||
const ZERO_CUTOFF: i64;
|
||||
|
||||
/// The number of bits in the exponent.
|
||||
fn exp_bits() -> u8;
|
||||
const EXP_BITS: u8;
|
||||
|
||||
/// The number of bits in the singificand, *including* the hidden bit.
|
||||
fn sig_bits() -> u8;
|
||||
const SIG_BITS: u8;
|
||||
|
||||
/// The number of bits in the singificand, *excluding* the hidden bit.
|
||||
fn explicit_sig_bits() -> u8 {
|
||||
Self::sig_bits() - 1
|
||||
}
|
||||
const EXPLICIT_SIG_BITS: u8;
|
||||
|
||||
/// The maximum legal exponent in fractional representation.
|
||||
fn max_exp() -> i16 {
|
||||
(1 << (Self::exp_bits() - 1)) - 1
|
||||
}
|
||||
const MAX_EXP: i16;
|
||||
|
||||
/// The minimum legal exponent in fractional representation, excluding subnormals.
|
||||
fn min_exp() -> i16 {
|
||||
-Self::max_exp() + 1
|
||||
}
|
||||
const MIN_EXP: i16;
|
||||
|
||||
/// `MAX_EXP` for integral representation, i.e., with the shift applied.
|
||||
fn max_exp_int() -> i16 {
|
||||
Self::max_exp() - (Self::sig_bits() as i16 - 1)
|
||||
}
|
||||
const MAX_EXP_INT: i16;
|
||||
|
||||
/// `MAX_EXP` encoded (i.e., with offset bias)
|
||||
fn max_encoded_exp() -> i16 {
|
||||
(1 << Self::exp_bits()) - 1
|
||||
}
|
||||
const MAX_ENCODED_EXP: i16;
|
||||
|
||||
/// `MIN_EXP` for integral representation, i.e., with the shift applied.
|
||||
fn min_exp_int() -> i16 {
|
||||
Self::min_exp() - (Self::sig_bits() as i16 - 1)
|
||||
}
|
||||
const MIN_EXP_INT: i16;
|
||||
|
||||
/// The maximum normalized singificand in integral representation.
|
||||
fn max_sig() -> u64 {
|
||||
(1 << Self::sig_bits()) - 1
|
||||
}
|
||||
const MAX_SIG: u64;
|
||||
|
||||
/// The minimal normalized significand in integral representation.
|
||||
fn min_sig() -> u64 {
|
||||
1 << (Self::sig_bits() - 1)
|
||||
const MIN_SIG: u64;
|
||||
}
|
||||
|
||||
// Mostly a workaround for #34344.
|
||||
macro_rules! other_constants {
|
||||
($type: ident) => {
|
||||
const EXPLICIT_SIG_BITS: u8 = Self::SIG_BITS - 1;
|
||||
const MAX_EXP: i16 = (1 << (Self::EXP_BITS - 1)) - 1;
|
||||
const MIN_EXP: i16 = -Self::MAX_EXP + 1;
|
||||
const MAX_EXP_INT: i16 = Self::MAX_EXP - (Self::SIG_BITS as i16 - 1);
|
||||
const MAX_ENCODED_EXP: i16 = (1 << Self::EXP_BITS) - 1;
|
||||
const MIN_EXP_INT: i16 = Self::MIN_EXP - (Self::SIG_BITS as i16 - 1);
|
||||
const MAX_SIG: u64 = (1 << Self::SIG_BITS) - 1;
|
||||
const MIN_SIG: u64 = 1 << (Self::SIG_BITS - 1);
|
||||
|
||||
const INFINITY: Self = $crate::$type::INFINITY;
|
||||
const NAN: Self = $crate::$type::NAN;
|
||||
const ZERO: Self = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
impl RawFloat for f32 {
|
||||
fn zero2() -> Self {
|
||||
0.0
|
||||
}
|
||||
|
||||
fn sig_bits() -> u8 {
|
||||
24
|
||||
}
|
||||
|
||||
fn exp_bits() -> u8 {
|
||||
8
|
||||
}
|
||||
|
||||
fn ceil_log5_of_max_sig() -> i16 {
|
||||
11
|
||||
}
|
||||
const SIG_BITS: u8 = 24;
|
||||
const EXP_BITS: u8 = 8;
|
||||
const CEIL_LOG5_OF_MAX_SIG: i16 = 11;
|
||||
const MAX_NORMAL_DIGITS: usize = 35;
|
||||
const INF_CUTOFF: i64 = 40;
|
||||
const ZERO_CUTOFF: i64 = -48;
|
||||
other_constants!(f32);
|
||||
|
||||
fn transmute(self) -> u64 {
|
||||
let bits: u32 = unsafe { transmute(self) };
|
||||
@ -207,37 +184,17 @@ impl RawFloat for f32 {
|
||||
fn short_fast_pow10(e: usize) -> Self {
|
||||
table::F32_SHORT_POWERS[e]
|
||||
}
|
||||
|
||||
fn max_normal_digits() -> usize {
|
||||
35
|
||||
}
|
||||
|
||||
fn inf_cutoff() -> i64 {
|
||||
40
|
||||
}
|
||||
|
||||
fn zero_cutoff() -> i64 {
|
||||
-48
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl RawFloat for f64 {
|
||||
fn zero2() -> Self {
|
||||
0.0
|
||||
}
|
||||
|
||||
fn sig_bits() -> u8 {
|
||||
53
|
||||
}
|
||||
|
||||
fn exp_bits() -> u8 {
|
||||
11
|
||||
}
|
||||
|
||||
fn ceil_log5_of_max_sig() -> i16 {
|
||||
23
|
||||
}
|
||||
const SIG_BITS: u8 = 53;
|
||||
const EXP_BITS: u8 = 11;
|
||||
const CEIL_LOG5_OF_MAX_SIG: i16 = 23;
|
||||
const MAX_NORMAL_DIGITS: usize = 305;
|
||||
const INF_CUTOFF: i64 = 310;
|
||||
const ZERO_CUTOFF: i64 = -326;
|
||||
other_constants!(f64);
|
||||
|
||||
fn transmute(self) -> u64 {
|
||||
let bits: u64 = unsafe { transmute(self) };
|
||||
@ -262,38 +219,27 @@ impl RawFloat for f64 {
|
||||
fn short_fast_pow10(e: usize) -> Self {
|
||||
table::F64_SHORT_POWERS[e]
|
||||
}
|
||||
|
||||
fn max_normal_digits() -> usize {
|
||||
305
|
||||
}
|
||||
|
||||
fn inf_cutoff() -> i64 {
|
||||
310
|
||||
}
|
||||
|
||||
fn zero_cutoff() -> i64 {
|
||||
-326
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Convert an Fp to the closest f64. Only handles number that fit into a normalized f64.
|
||||
/// Convert an Fp to the closest machine float type.
|
||||
/// Does not handle subnormal results.
|
||||
pub fn fp_to_float<T: RawFloat>(x: Fp) -> T {
|
||||
let x = x.normalize();
|
||||
// x.f is 64 bit, so x.e has a mantissa shift of 63
|
||||
let e = x.e + 63;
|
||||
if e > T::max_exp() {
|
||||
if e > T::MAX_EXP {
|
||||
panic!("fp_to_float: exponent {} too large", e)
|
||||
} else if e > T::min_exp() {
|
||||
} else if e > T::MIN_EXP {
|
||||
encode_normal(round_normal::<T>(x))
|
||||
} else {
|
||||
panic!("fp_to_float: exponent {} too small", e)
|
||||
}
|
||||
}
|
||||
|
||||
/// Round the 64-bit significand to 53 bit with half-to-even. Does not handle exponent overflow.
|
||||
/// Round the 64-bit significand to T::SIG_BITS bits with half-to-even.
|
||||
/// Does not handle exponent overflow.
|
||||
pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
|
||||
let excess = 64 - T::sig_bits() as i16;
|
||||
let excess = 64 - T::SIG_BITS as i16;
|
||||
let half: u64 = 1 << (excess - 1);
|
||||
let (q, rem) = (x.f >> excess, x.f & ((1 << excess) - 1));
|
||||
assert_eq!(q << excess | rem, x.f);
|
||||
@ -303,8 +249,8 @@ pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
|
||||
Unpacked::new(q, k)
|
||||
} else if rem == half && (q % 2) == 0 {
|
||||
Unpacked::new(q, k)
|
||||
} else if q == T::max_sig() {
|
||||
Unpacked::new(T::min_sig(), k + 1)
|
||||
} else if q == T::MAX_SIG {
|
||||
Unpacked::new(T::MIN_SIG, k + 1)
|
||||
} else {
|
||||
Unpacked::new(q + 1, k)
|
||||
}
|
||||
@ -313,22 +259,22 @@ pub fn round_normal<T: RawFloat>(x: Fp) -> Unpacked {
|
||||
/// Inverse of `RawFloat::unpack()` for normalized numbers.
|
||||
/// Panics if the significand or exponent are not valid for normalized numbers.
|
||||
pub fn encode_normal<T: RawFloat>(x: Unpacked) -> T {
|
||||
debug_assert!(T::min_sig() <= x.sig && x.sig <= T::max_sig(),
|
||||
debug_assert!(T::MIN_SIG <= x.sig && x.sig <= T::MAX_SIG,
|
||||
"encode_normal: significand not normalized");
|
||||
// Remove the hidden bit
|
||||
let sig_enc = x.sig & !(1 << T::explicit_sig_bits());
|
||||
let sig_enc = x.sig & !(1 << T::EXPLICIT_SIG_BITS);
|
||||
// Adjust the exponent for exponent bias and mantissa shift
|
||||
let k_enc = x.k + T::max_exp() + T::explicit_sig_bits() as i16;
|
||||
debug_assert!(k_enc != 0 && k_enc < T::max_encoded_exp(),
|
||||
let k_enc = x.k + T::MAX_EXP + T::EXPLICIT_SIG_BITS as i16;
|
||||
debug_assert!(k_enc != 0 && k_enc < T::MAX_ENCODED_EXP,
|
||||
"encode_normal: exponent out of range");
|
||||
// Leave sign bit at 0 ("+"), our numbers are all positive
|
||||
let bits = (k_enc as u64) << T::explicit_sig_bits() | sig_enc;
|
||||
let bits = (k_enc as u64) << T::EXPLICIT_SIG_BITS | sig_enc;
|
||||
T::from_bits(bits)
|
||||
}
|
||||
|
||||
/// Construct the subnormal. A mantissa of 0 is allowed and constructs zero.
|
||||
/// Construct a subnormal. A mantissa of 0 is allowed and constructs zero.
|
||||
pub fn encode_subnormal<T: RawFloat>(significand: u64) -> T {
|
||||
assert!(significand < T::min_sig(), "encode_subnormal: not actually subnormal");
|
||||
assert!(significand < T::MIN_SIG, "encode_subnormal: not actually subnormal");
|
||||
// Encoded exponent is 0, the sign bit is 0, so we just have to reinterpret the bits.
|
||||
T::from_bits(significand)
|
||||
}
|
||||
@ -364,8 +310,8 @@ pub fn prev_float<T: RawFloat>(x: T) -> T {
|
||||
Zero => panic!("prev_float: argument is zero"),
|
||||
Normal => {
|
||||
let Unpacked { sig, k } = x.unpack();
|
||||
if sig == T::min_sig() {
|
||||
encode_normal(Unpacked::new(T::max_sig(), k - 1))
|
||||
if sig == T::MIN_SIG {
|
||||
encode_normal(Unpacked::new(T::MAX_SIG, k - 1))
|
||||
} else {
|
||||
encode_normal(Unpacked::new(sig - 1, k))
|
||||
}
|
||||
@ -380,7 +326,7 @@ pub fn prev_float<T: RawFloat>(x: T) -> T {
|
||||
pub fn next_float<T: RawFloat>(x: T) -> T {
|
||||
match x.classify() {
|
||||
Nan => panic!("next_float: argument is NaN"),
|
||||
Infinite => T::infinity2(),
|
||||
Infinite => T::INFINITY,
|
||||
// This seems too good to be true, but it works.
|
||||
// 0.0 is encoded as the all-zero word. Subnormals are 0x000m...m where m is the mantissa.
|
||||
// In particular, the smallest subnormal is 0x0...01 and the largest is 0x000F...F.
|
||||
|
Loading…
x
Reference in New Issue
Block a user