WIP f16 fma

tgross35 · tgross35 · commit 9f501167b4d8 · 2025-01-25T02:05:19.000Z
simplify exp

resolve warnings

update

update
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
@@ -92,6 +92,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
         None,
         &["copysignf128", "fdimf128", "fmaxf128", "fminf128", "fmodf128"],
     ),
+    (
+        // `(f16, f16, f16) -> f16`
+        FloatTy::F16,
+        Signature { args: &[Ty::F16, Ty::F16, Ty::F16], returns: &[Ty::F16] },
+        None,
+        &["fmaf16"],
+    ),
     (
         // `(f32, f32, f32) -> f32`
         FloatTy::F32,
diff --git a/crates/libm-test/benches/icount.rs b/crates/libm-test/benches/icount.rs
@@ -105,6 +105,7 @@ main!(
     icount_bench_floorf16_group,
     icount_bench_floorf_group,
     icount_bench_fma_group,
+    icount_bench_fmaf16_group,
     icount_bench_fmaf_group,
     icount_bench_fmax_group,
     icount_bench_fmaxf128_group,
diff --git a/crates/libm-test/benches/random.rs b/crates/libm-test/benches/random.rs
@@ -126,6 +126,7 @@ libm_macros::for_each_function! {
         | fdimf128
         | fdimf16
         | floorf128
+        | fmaf16
         | floorf16
         | fmaxf128
         | fmaxf16
diff --git a/crates/libm-test/src/mpfloat.rs b/crates/libm-test/src/mpfloat.rs
@@ -190,7 +190,7 @@ libm_macros::for_each_function! {
         expm1 | expm1f => exp_m1,
         fabs | fabsf => abs,
         fdim | fdimf | fdimf16 | fdimf128  => positive_diff,
-        fma | fmaf => mul_add,
+        fma | fmaf | fmaf16 => mul_add,
         fmax | fmaxf | fmaxf16 | fmaxf128 => max,
         fmin | fminf | fminf16 | fminf128 => min,
         lgamma | lgammaf => ln_gamma,
diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
@@ -554,6 +554,9 @@ fn int_float_common<F1: Float, F2: Float>(
 impl MaybeOverride<(f32, i32)> for SpecialCase {}
 impl MaybeOverride<(f64, i32)> for SpecialCase {}
 
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, f16, f16)> for SpecialCase {}
+
 impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
     fn check_float<F: Float>(
         input: (f32, f32, f32),
@@ -575,6 +578,9 @@ impl MaybeOverride<(f64, f64, f64)> for SpecialCase {
     }
 }
 
+#[cfg(f128_enabled)]
+impl MaybeOverride<(f128, f128, f128)> for SpecialCase {}
+
 // F1 and F2 are always the same type, this is just to please generics
 fn ternop_common<F1: Float, F2: Float>(
     input: (F1, F1, F1),
diff --git a/crates/libm-test/tests/compare_built_musl.rs b/crates/libm-test/tests/compare_built_musl.rs
@@ -89,6 +89,7 @@ libm_macros::for_each_function! {
         fdimf16,
         floorf128,
         floorf16,
+        fmaf16,
         fmaxf128,
         fmaxf16,
         fminf128,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
@@ -96,12 +96,17 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | fdimf16
             | floorf128
             | floorf16
+<<<<<<< HEAD
             | fmaxf128
             | fmaxf16
             | fminf128
             | fminf16
             | fmodf128
             | fmodf16
+||||||| parent of f5a6da1 (WIP f16 fma)
+=======
+            | fmaf16
+>>>>>>> f5a6da1 (WIP f16 fma)
             | rintf128
             | rintf16
             | roundf128
diff --git a/etc/function-definitions.json b/etc/function-definitions.json
@@ -376,6 +376,12 @@
         ],
         "type": "f32"
     },
+    "fmaf16": {
+        "sources": [
+            "src/math/fmaf16.rs"
+        ],
+        "type": "f16"
+    },
     "fmax": {
         "sources": [
             "src/libm_helper.rs",
diff --git a/etc/function-list.txt b/etc/function-list.txt
@@ -53,6 +53,7 @@ floorf128
 floorf16
 fma
 fmaf
+fmaf16
 fmax
 fmaxf
 fmaxf128
diff --git a/etc/update-api-list.py b/etc/update-api-list.py
@@ -3,6 +3,8 @@
 functions are covered by our macros.
 """
 
+# TOOD: also match with `${name}_any_suffix` so we pick up `fma_big`.
+
 import difflib
 import json
 import subprocess as sp
diff --git a/src/math/fmaf.rs b/src/math/fmaf.rs
@@ -47,6 +47,10 @@ use super::fenv::{
 /// according to the rounding mode characterized by the value of FLT_ROUNDS.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
+    if true {
+        return super::generic::fma_big::<f32, f64>(x, y, z);
+    }
+
     let xy: f64;
     let mut result: f64;
     let mut ui: u64;
diff --git a/src/math/fmaf16.rs b/src/math/fmaf16.rs
@@ -0,0 +1,4 @@
+#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+pub fn fmaf16(x: f16, y: f16, z: f16) -> f16 {
+    super::generic::fma_big::<f16, f32>(x, y, z)
+}
diff --git a/src/math/generic/fma.rs b/src/math/generic/fma.rs
@@ -0,0 +1,48 @@
+use super::super::fenv::{FE_TONEAREST, fegetround};
+use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, IntTy, MinInt};
+
+/// FMA implementation when a hardware-backed larger float type is available.
+pub fn fma_big<F, B>(x: F, y: F, z: F) -> F
+where
+    F: Float + HFloat<D = B>,
+    B: Float + DFloat<H = F>,
+    B::Int: CastInto<i32>,
+    i32: CastFrom<i32>,
+{
+    let one = IntTy::<B>::ONE;
+
+    let xy: B = x.widen() * y.widen();
+    let result: B = xy + z.widen();
+    let mut ui: B::Int = result.to_bits();
+    let re = result.exp();
+    let zb: B = z.widen();
+
+    let prec_diff = B::SIG_BITS - F::SIG_BITS;
+    let excess_prec = ui & ((one << prec_diff) - one);
+    let halfway = one << (prec_diff - 1);
+
+    // Common case: the larger precision is fine if...
+    // This is not a halfway case
+    if excess_prec != halfway
+        // Or the result is NaN
+        || re == B::EXP_MAX
+        // Or the result is exact
+        || (result - xy == zb && result - zb == xy)
+        // Or the mode is something other than round to nearest
+        || fegetround() != FE_TONEAREST
+    {
+        // TODO: feclearexcept
+
+        return result.narrow();
+    }
+
+    let neg = ui & B::SIGN_MASK > IntTy::<B>::ZERO;
+    let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
+    if neg == (err < B::ZERO) {
+        ui += one;
+    } else {
+        ui -= one;
+    }
+
+    B::from_bits(ui).narrow()
+}
diff --git a/src/math/generic/mod.rs b/src/math/generic/mod.rs
@@ -3,6 +3,7 @@ mod copysign;
 mod fabs;
 mod fdim;
 mod floor;
+mod fma;
 mod fmax;
 mod fmin;
 mod fmod;
@@ -17,6 +18,7 @@ pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
 pub use floor::floor;
+pub use fma::fma_big;
 pub use fmax::fmax;
 pub use fmin::fmin;
 pub use fmod::fmod;
diff --git a/src/math/mod.rs b/src/math/mod.rs
@@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2;
 use self::rem_pio2_large::rem_pio2_large;
 use self::rem_pio2f::rem_pio2f;
 #[allow(unused_imports)]
-use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt};
+use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt};
 
 // Public modules
 mod acos;
@@ -346,6 +346,7 @@ cfg_if! {
         mod fabsf16;
         mod fdimf16;
         mod floorf16;
+        mod fmaf16;
         mod fmaxf16;
         mod fminf16;
         mod fmodf16;
@@ -359,6 +360,7 @@ cfg_if! {
         pub use self::fabsf16::fabsf16;
         pub use self::fdimf16::fdimf16;
         pub use self::floorf16::floorf16;
+        pub use self::fmaf16::fmaf16;
         pub use self::fmaxf16::fmaxf16;
         pub use self::fminf16::fminf16;
         pub use self::fmodf16::fmodf16;
diff --git a/src/math/support/float_traits.rs b/src/math/support/float_traits.rs
@@ -1,4 +1,5 @@
-use core::{fmt, mem, ops};
+use core::ops::{self, Neg};
+use core::{fmt, mem};
 
 use super::int_traits::{CastFrom, Int, MinInt};
 
@@ -23,7 +24,9 @@ pub trait Float:
     type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
 
     /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
+    type SignedInt: Int
+        + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
+        + Neg<Output = Self::SignedInt>;
 
     const ZERO: Self;
     const NEG_ZERO: Self;
@@ -155,7 +158,6 @@ pub trait Float:
 }
 
 /// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
-#[allow(dead_code)]
 pub type IntTy<F> = <F as Float>::Int;
 
 macro_rules! float_impl {
@@ -355,3 +357,63 @@ mod tests {
         assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
     }
 }
+
+/// Trait for floats twice the bit width of another integer.
+#[allow(unused)]
+pub trait DFloat: Float {
+    /// Float that is half the bit width of the floatthis trait is implemented for.
+    type H: HFloat<D = Self>;
+
+    /// Narrow the float type.
+    fn narrow(self) -> Self::H;
+}
+
+/// Trait for floats half the bit width of another float.
+#[allow(unused)]
+pub trait HFloat: Float {
+    /// Float that is double the bit width of the float this trait is implemented for.
+    type D: DFloat<H = Self>;
+
+    /// Widen the float type.
+    fn widen(self) -> Self::D;
+}
+
+macro_rules! impl_d_float {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DFloat for $D {
+                type H = $X;
+
+                fn narrow(self) -> Self::H {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_float {
+    ($($H:ident $X:ident),*) => {
+        $(
+            impl HFloat for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+            }
+        )*
+    };
+}
+
+impl_d_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_d_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_d_float!(f64 f128);
+
+impl_h_float!(f32 f64);
+#[cfg(f16_enabled)]
+impl_h_float!(f16 f32);
+#[cfg(f128_enabled)]
+impl_h_float!(f64 f128);
diff --git a/src/math/support/int_traits.rs b/src/math/support/int_traits.rs
@@ -92,6 +92,7 @@ pub trait Int:
     fn wrapping_shr(self, other: u32) -> Self;
     fn rotate_left(self, other: u32) -> Self;
     fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn overflowing_sub(self, other: Self) -> (Self, bool);
     fn leading_zeros(self) -> u32;
     fn ilog2(self) -> u32;
 }
@@ -150,6 +151,10 @@ macro_rules! int_impl_common {
             <Self>::overflowing_add(self, other)
         }
 
+        fn overflowing_sub(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_sub(self, other)
+        }
+
         fn leading_zeros(self) -> u32 {
             <Self>::leading_zeros(self)
         }
@@ -399,6 +404,20 @@ macro_rules! cast_into {
     )*};
 }
 
+macro_rules! cast_lossy{
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                unimplemented!("precise casting not available, use `cast_lossy` instead")
+            }
+
+            fn cast_lossy(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
 cast_into!(usize);
 cast_into!(isize);
 cast_into!(u8);
@@ -411,3 +430,28 @@ cast_into!(u64);
 cast_into!(i64);
 cast_into!(u128);
 cast_into!(i128);
+
+cast_into!(bool; u16);
+cast_into!(bool; u32);
+cast_into!(bool; u64);
+cast_into!(bool; u128);
+
+cast_lossy!(i64; f32, f64);
+cast_lossy!(f32; f64);
+cast_lossy!(f64; f32);
+
+cfg_if! {
+    if #[cfg(f16_enabled)] {
+        cast_lossy!(f16; f32, f64);
+        cast_lossy!(f32; f16);
+        cast_lossy!(f64; f16);
+    }
+}
+
+cfg_if! {
+    if #[cfg(f128_enabled)] {
+        cast_lossy!(f128; f32, f64);
+        cast_lossy!(f32; f128);
+        cast_lossy!(f64; f128);
+    }
+}
diff --git a/src/math/support/mod.rs b/src/math/support/mod.rs
@@ -6,7 +6,7 @@ mod hex_float;
 mod int_traits;
 
 #[allow(unused_imports)]
-pub use float_traits::{Float, IntTy};
+pub use float_traits::{DFloat, Float, HFloat, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
 #[cfg(f16_enabled)]
 pub use hex_float::hf16;