From ac0714ba65579b158fb5d49ddad6aae569d5ea47 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 30 Jan 2025 12:52:35 +0000 Subject: [PATCH 1/2] Fix hex float trait recursion problem --- src/math/support/hex_float.rs | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/src/math/support/hex_float.rs b/src/math/support/hex_float.rs index da41622f2..ebc4f7c64 100644 --- a/src/math/support/hex_float.rs +++ b/src/math/support/hex_float.rs @@ -245,29 +245,21 @@ fn fmt_any_hex(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}") } -#[cfg(f16_enabled)] -impl fmt::LowerHex for Hexf { +impl fmt::LowerHex for Hexf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt_any_hex(&self.0, f) } } -impl fmt::LowerHex for Hexf { +impl fmt::LowerHex for Hexf<(F, F)> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_any_hex(&self.0, f) - } -} - -impl fmt::LowerHex for Hexf { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_any_hex(&self.0, f) + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) } } -#[cfg(f128_enabled)] -impl fmt::LowerHex for Hexf { +impl fmt::LowerHex for Hexf<(F, i32)> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_any_hex(&self.0, f) + write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) } } @@ -277,18 +269,6 @@ impl fmt::LowerHex for Hexf { } } -impl fmt::LowerHex for Hexf<(T1, T2)> -where - T1: Copy, - T2: Copy, - Hexf: fmt::LowerHex, - Hexf: fmt::LowerHex, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1)) - } -} - impl fmt::Debug for Hexf where Hexf: fmt::LowerHex, From 9c93e011fa110b8b183b20a32845230c7729c7e9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Fri, 3 Jan 2025 04:34:21 +0000 Subject: [PATCH 2/2] Add `scalbnf16`, `scalbnf128`, `ldexpf16`, and `ldexpf128` Use the generic `scalbn` to provide `f16` and `f128` versions, which also work for `ldexp`. This involves a new algorithm for `f16` because the default does not converge fast enough with a limited number of rounds. --- crates/libm-macros/src/shared.rs | 14 ++++ crates/libm-test/benches/icount.rs | 4 + crates/libm-test/benches/random.rs | 4 + crates/libm-test/src/mpfloat.rs | 61 +++++++------- crates/libm-test/src/precision.rs | 4 + crates/libm-test/tests/compare_built_musl.rs | 4 + crates/util/src/main.rs | 4 + etc/function-definitions.json | 26 ++++++ etc/function-list.txt | 4 + src/math/generic/scalbn.rs | 85 +++++++++++++++++--- src/math/ldexpf128.rs | 4 + src/math/ldexpf16.rs | 4 + src/math/mod.rs | 8 ++ src/math/scalbnf128.rs | 4 + src/math/scalbnf16.rs | 4 + 15 files changed, 195 insertions(+), 39 deletions(-) create mode 100644 src/math/ldexpf128.rs create mode 100644 src/math/ldexpf16.rs create mode 100644 src/math/scalbnf128.rs create mode 100644 src/math/scalbnf16.rs diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs index b1f4f46cc..4fd0834f6 100644 --- a/crates/libm-macros/src/shared.rs +++ b/crates/libm-macros/src/shared.rs @@ -134,6 +134,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &["jn", "yn"], ), + ( + // `(f16, i32) -> f16` + FloatTy::F16, + Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] }, + None, + &["scalbnf16", "ldexpf16"], + ), ( // `(f32, i32) -> f32` FloatTy::F32, @@ -148,6 +155,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] None, &["scalbn", "ldexp"], ), + ( + // `(f128, i32) -> f128` + FloatTy::F128, + Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] }, + None, + &["scalbnf128", "ldexpf128"], + ), ( // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)` FloatTy::F32, diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs index d5026f461..13de799c7 100644 --- a/crates/libm-test/benches/icount.rs +++ b/crates/libm-test/benches/icount.rs @@ -131,6 +131,8 @@ main!( icount_bench_jn_group, icount_bench_jnf_group, icount_bench_ldexp_group, + icount_bench_ldexpf128_group, + icount_bench_ldexpf16_group, icount_bench_ldexpf_group, icount_bench_lgamma_group, icount_bench_lgamma_r_group, @@ -163,6 +165,8 @@ main!( icount_bench_roundf16_group, icount_bench_roundf_group, icount_bench_scalbn_group, + icount_bench_scalbnf128_group, + icount_bench_scalbnf16_group, icount_bench_scalbnf_group, icount_bench_sin_group, icount_bench_sinf_group, diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs index ca9e86c10..56d288c33 100644 --- a/crates/libm-test/benches/random.rs +++ b/crates/libm-test/benches/random.rs @@ -133,10 +133,14 @@ libm_macros::for_each_function! { | fminf16 | fmodf128 | fmodf16 + | ldexpf128 + | ldexpf16 | rintf128 | rintf16 | roundf128 | roundf16 + | scalbnf128 + | scalbnf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs index 3d84740cc..e3211b913 100644 --- a/crates/libm-test/src/mpfloat.rs +++ b/crates/libm-test/src/mpfloat.rs @@ -159,6 +159,8 @@ libm_macros::for_each_function! { jnf, ldexp, ldexpf, + ldexpf128, + ldexpf16, lgamma_r, lgammaf_r, modf, @@ -178,6 +180,8 @@ libm_macros::for_each_function! { roundf16, scalbn, scalbnf, + scalbnf128, + scalbnf16, sincos,sincosf, trunc, truncf, @@ -351,34 +355,6 @@ macro_rules! impl_op_for_ty { } } - // `ldexp` and `scalbn` are the same for binary floating point, so just forward all - // methods. - impl MpOp for crate::op::[]::Routine { - type MpTy = ]::Routine as MpOp>::MpTy; - - fn new_mp() -> Self::MpTy { - ]::Routine as MpOp>::new_mp() - } - - fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - ]::Routine as MpOp>::run(this, input) - } - } - - impl MpOp for crate::op::[]::Routine { - type MpTy = MpFloat; - - fn new_mp() -> Self::MpTy { - new_mpfloat::() - } - - fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { - this.assign(input.0); - *this <<= input.1; - prep_retval::(this, Ordering::Equal) - } - } - impl MpOp for crate::op::[]::Routine { type MpTy = (MpFloat, MpFloat); @@ -464,6 +440,35 @@ macro_rules! impl_op_for_ty_all { this.1.assign(input.1); let ord = this.0.rem_assign_round(&this.1, Nearest); prep_retval::(&mut this.0, ord) + + } + } + + // `ldexp` and `scalbn` are the same for binary floating point, so just forward all + // methods. + impl MpOp for crate::op::[]::Routine { + type MpTy = ]::Routine as MpOp>::MpTy; + + fn new_mp() -> Self::MpTy { + ]::Routine as MpOp>::new_mp() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + ]::Routine as MpOp>::run(this, input) + } + } + + impl MpOp for crate::op::[]::Routine { + type MpTy = MpFloat; + + fn new_mp() -> Self::MpTy { + new_mpfloat::() + } + + fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet { + this.assign(input.0); + *this <<= input.1; + prep_retval::(this, Ordering::Equal) } } } diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs index ffb322e38..051960b7a 100644 --- a/crates/libm-test/src/precision.rs +++ b/crates/libm-test/src/precision.rs @@ -551,8 +551,12 @@ fn int_float_common( DEFAULT } +#[cfg(f16_enabled)] +impl MaybeOverride<(f16, i32)> for SpecialCase {} impl MaybeOverride<(f32, i32)> for SpecialCase {} impl MaybeOverride<(f64, i32)> for SpecialCase {} +#[cfg(f128_enabled)] +impl MaybeOverride<(f128, i32)> for SpecialCase {} impl MaybeOverride<(f32, f32, f32)> for SpecialCase { fn check_float( diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs index 5466edf4f..191c7e69d 100644 --- a/crates/libm-test/tests/compare_built_musl.rs +++ b/crates/libm-test/tests/compare_built_musl.rs @@ -95,10 +95,14 @@ libm_macros::for_each_function! { fminf16, fmodf128, fmodf16, + ldexpf128, + ldexpf16, rintf128, rintf16, roundf128, roundf16, + scalbnf128, + scalbnf16, sqrtf128, sqrtf16, truncf128, diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs index 357df6b4f..e5d6f374a 100644 --- a/crates/util/src/main.rs +++ b/crates/util/src/main.rs @@ -102,10 +102,14 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fminf16 | fmodf128 | fmodf16 + | ldexpf128 + | ldexpf16 | rintf128 | rintf16 | roundf128 | roundf16 + | scalbnf128 + | scalbnf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 574ffea2e..e38dfd236 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -554,6 +554,18 @@ ], "type": "f32" }, + "ldexpf128": { + "sources": [ + "src/math/ldexpf128.rs" + ], + "type": "f128" + }, + "ldexpf16": { + "sources": [ + "src/math/ldexpf16.rs" + ], + "type": "f16" + }, "lgamma": { "sources": [ "src/libm_helper.rs", @@ -774,6 +786,20 @@ ], "type": "f32" }, + "scalbnf128": { + "sources": [ + "src/math/generic/scalbn.rs", + "src/math/scalbnf128.rs" + ], + "type": "f128" + }, + "scalbnf16": { + "sources": [ + "src/math/generic/scalbn.rs", + "src/math/scalbnf16.rs" + ], + "type": "f16" + }, "sin": { "sources": [ "src/libm_helper.rs", diff --git a/etc/function-list.txt b/etc/function-list.txt index d82838b32..c92eaf9e2 100644 --- a/etc/function-list.txt +++ b/etc/function-list.txt @@ -79,6 +79,8 @@ jn jnf ldexp ldexpf +ldexpf128 +ldexpf16 lgamma lgamma_r lgammaf @@ -111,6 +113,8 @@ roundf128 roundf16 scalbn scalbnf +scalbnf128 +scalbnf16 sin sincos sincosf diff --git a/src/math/generic/scalbn.rs b/src/math/generic/scalbn.rs index f036c15cc..f15cb75d6 100644 --- a/src/math/generic/scalbn.rs +++ b/src/math/generic/scalbn.rs @@ -31,16 +31,27 @@ where let exp_max: i32 = F::EXP_BIAS as i32; let exp_min = -(exp_max - 1); - // 2 ^ Emax, where Emax is the maximum biased exponent value (1023 for f64) + // 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64) let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero); - // 2 ^ Emin, where Emin is the minimum biased exponent value (-1022 for f64) + // 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64) let f_exp_min = F::from_parts(false, 1, zero); - // 2 ^ sig_total_bits, representation of what can be accounted for with subnormals - let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero); + // 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64) + let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero); + + /* + * The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases + * where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with + * `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of + * the final scale operation by prescaling by the max/min power representable by `F`. + */ if n > exp_max { + // Worse case positive `n`: `x` is the minimum subnormal value, the result is `F::MAX`. + // This can be reached by three scaling multiplications (two here and one final). + debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3); + x *= f_exp_max; n -= exp_max; if n > exp_max { @@ -51,21 +62,61 @@ where } } } else if n < exp_min { - let mul = f_exp_min * f_exp_subnorm; - let add = (exp_max - 1) - sig_total_bits as i32; + // When scaling toward 0, the prescaling is limited to a value that does not allow `x` to + // go subnormal. This avoids double rounding. + if F::BITS > 16 { + // `mul` s.t. `!(x * mul).is_subnormal() ∀ x` + let mul = f_exp_min * f_pow_subnorm; + let add = -exp_min - sig_total_bits as i32; + + // Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`. + // This must be reachable by three scaling multiplications (two here and one final). + debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min); - x *= mul; - n += add; - if n < exp_min { x *= mul; n += add; + if n < exp_min { - n = exp_min; + x *= mul; + n += add; + + if n < exp_min { + n = exp_min; + } + } + } else { + // `f16` is unique compared to other float types in that the difference between the + // minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is + // small, only three. The above method depend on decrementing `n` by `add` two times; + // for other float types this works out because `add` is a substantial fraction of + // the exponent range. For `f16`, however, 3 is relatively small compared to the + // exponent range (which is 39), so that requires ~10 prescale rounds rather than two. + // + // Work aroudn this by using a different algorithm that calculates the prescale + // dynamically based on the maximum possible value. This adds more operations per round + // since it needs to construct the scale, but works better in the general case. + let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32); + let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero); + + x *= mul; + n += add; + + if n < exp_min { + let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32); + let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero); + + x *= mul; + n += add; + + if n < exp_min { + n = exp_min; + } } } } - x * F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero) + let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero); + x * scale } #[cfg(test)] @@ -111,6 +162,12 @@ mod tests { assert!(scalbn(-F::NAN, -10).is_nan()); } + #[test] + #[cfg(f16_enabled)] + fn spec_test_f16() { + spec_test::(); + } + #[test] fn spec_test_f32() { spec_test::(); @@ -120,4 +177,10 @@ mod tests { fn spec_test_f64() { spec_test::(); } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(); + } } diff --git a/src/math/ldexpf128.rs b/src/math/ldexpf128.rs new file mode 100644 index 000000000..b35277d15 --- /dev/null +++ b/src/math/ldexpf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf128(x: f128, n: i32) -> f128 { + super::scalbnf128(x, n) +} diff --git a/src/math/ldexpf16.rs b/src/math/ldexpf16.rs new file mode 100644 index 000000000..8de6cffd6 --- /dev/null +++ b/src/math/ldexpf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn ldexpf16(x: f16, n: i32) -> f16 { + super::scalbnf16(x, n) +} diff --git a/src/math/mod.rs b/src/math/mod.rs index 969c1bfd9..9b07dc8a7 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -349,8 +349,10 @@ cfg_if! { mod fmaxf16; mod fminf16; mod fmodf16; + mod ldexpf16; mod rintf16; mod roundf16; + mod scalbnf16; mod sqrtf16; mod truncf16; @@ -362,8 +364,10 @@ cfg_if! { pub use self::fmaxf16::fmaxf16; pub use self::fminf16::fminf16; pub use self::fmodf16::fmodf16; + pub use self::ldexpf16::ldexpf16; pub use self::rintf16::rintf16; pub use self::roundf16::roundf16; + pub use self::scalbnf16::scalbnf16; pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; } @@ -379,8 +383,10 @@ cfg_if! { mod fmaxf128; mod fminf128; mod fmodf128; + mod ldexpf128; mod rintf128; mod roundf128; + mod scalbnf128; mod sqrtf128; mod truncf128; @@ -392,8 +398,10 @@ cfg_if! { pub use self::fmaxf128::fmaxf128; pub use self::fminf128::fminf128; pub use self::fmodf128::fmodf128; + pub use self::ldexpf128::ldexpf128; pub use self::rintf128::rintf128; pub use self::roundf128::roundf128; + pub use self::scalbnf128::scalbnf128; pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; } diff --git a/src/math/scalbnf128.rs b/src/math/scalbnf128.rs new file mode 100644 index 000000000..c1d2b4855 --- /dev/null +++ b/src/math/scalbnf128.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf128(x: f128, n: i32) -> f128 { + super::generic::scalbn(x, n) +} diff --git a/src/math/scalbnf16.rs b/src/math/scalbnf16.rs new file mode 100644 index 000000000..2209e1a17 --- /dev/null +++ b/src/math/scalbnf16.rs @@ -0,0 +1,4 @@ +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn scalbnf16(x: f16, n: i32) -> f16 { + super::generic::scalbn(x, n) +}