From 97a2289909f238973bac7b68bfb439cabe974e44 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 20 Apr 2025 04:05:36 +0000
Subject: [PATCH] Add `fmaf16`

---
 crates/libm-macros/src/shared.rs      | 10 ++++++++++
 crates/util/src/main.rs               |  1 +
 libm-test/benches/icount.rs           |  1 +
 libm-test/benches/random.rs           |  1 +
 libm-test/src/generate/case_list.rs   | 20 ++++++++++++++++++++
 libm-test/src/mpfloat.rs              |  2 +-
 libm-test/src/precision.rs            |  2 ++
 libm-test/tests/compare_built_musl.rs |  1 +
 libm/src/libm_helper.rs               |  1 +
 libm/src/math/fma_wide.rs             |  5 ++---
 libm/src/math/mod.rs                  |  3 +--
 11 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/crates/libm-macros/src/shared.rs b/crates/libm-macros/src/shared.rs
index 750ed1afb..ddb3c0fac 100644
--- a/crates/libm-macros/src/shared.rs
+++ b/crates/libm-macros/src/shared.rs
@@ -228,6 +228,16 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
             "fmodf128",
         ],
     ),
+    (
+        // `(f16, f16, f16) -> f16`
+        FloatTy::F16,
+        Signature {
+            args: &[Ty::F16, Ty::F16, Ty::F16],
+            returns: &[Ty::F16],
+        },
+        None,
+        &["fmaf16"],
+    ),
     (
         // `(f32, f32, f32) -> f32`
         FloatTy::F32,
diff --git a/crates/util/src/main.rs b/crates/util/src/main.rs
index e70578699..dc8beef29 100644
--- a/crates/util/src/main.rs
+++ b/crates/util/src/main.rs
@@ -97,6 +97,7 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | floorf128
             | floorf16
             | fmaf128
+            | fmaf16
             | fmaxf128
             | fmaxf16
             | fmaximum
diff --git a/libm-test/benches/icount.rs b/libm-test/benches/icount.rs
index da8c6bfd1..77b2b5f50 100644
--- a/libm-test/benches/icount.rs
+++ b/libm-test/benches/icount.rs
@@ -200,6 +200,7 @@ main!(
     icount_bench_floorf128_group,
     icount_bench_floorf16_group,
     icount_bench_floorf_group,
+    icount_bench_fmaf16_group,
     icount_bench_fma_group,
     icount_bench_fmaf128_group,
     icount_bench_fmaf_group,
diff --git a/libm-test/benches/random.rs b/libm-test/benches/random.rs
index 81f58e3a6..34b3156a2 100644
--- a/libm-test/benches/random.rs
+++ b/libm-test/benches/random.rs
@@ -137,6 +137,7 @@ libm_macros::for_each_function! {
         | floorf128
         | floorf16
         | fmaf128
+        | fmaf16
         | fmaxf128
         | fmaxf16
         | fmaximum
diff --git a/libm-test/src/generate/case_list.rs b/libm-test/src/generate/case_list.rs
index f1e6fcec3..3c4717f3b 100644
--- a/libm-test/src/generate/case_list.rs
+++ b/libm-test/src/generate/case_list.rs
@@ -6,6 +6,8 @@
 //!
 //! This is useful for adding regression tests or expected failures.
 
+#[cfg(f16_enabled)]
+use libm::hf16;
 use libm::hf64;
 #[cfg(f128_enabled)]
 use libm::hf128;
@@ -256,6 +258,24 @@ fn floorf16_cases() -> Vec<TestCase<op::floorf16::Routine>> {
     vec![]
 }
 
+#[cfg(f16_enabled)]
+fn fmaf16_cases() -> Vec<TestCase<op::fmaf16::Routine>> {
+    let mut v = vec![];
+    TestCase::append_pairs(
+        &mut v,
+        &[(
+            // Failed during extensive tests
+            (
+                hf16!("-0x1.c4p-12"),
+                hf16!("0x1.22p-14"),
+                hf16!("-0x1.f4p-15"),
+            ),
+            Some(hf16!("-0x1.f48p-15")),
+        )],
+    );
+    v
+}
+
 fn fma_cases() -> Vec<TestCase<op::fma::Routine>> {
     let mut v = vec![];
     TestCase::append_pairs(
diff --git a/libm-test/src/mpfloat.rs b/libm-test/src/mpfloat.rs
index 9b51dc605..1824301ce 100644
--- a/libm-test/src/mpfloat.rs
+++ b/libm-test/src/mpfloat.rs
@@ -208,7 +208,7 @@ libm_macros::for_each_function! {
         expm1 | expm1f => exp_m1,
         fabs | fabsf => abs,
         fdim | fdimf | fdimf16 | fdimf128  => positive_diff,
-        fma | fmaf | fmaf128 => mul_add,
+        fmaf16 | fma | fmaf | fmaf128 => mul_add,
         fmax | fmaxf | fmaxf16 | fmaxf128 |
         fmaximum_num | fmaximum_numf | fmaximum_numf16 | fmaximum_numf128 => max,
         fmin | fminf | fminf16 | fminf128 |
diff --git a/libm-test/src/precision.rs b/libm-test/src/precision.rs
index f5fb5f670..e3b484813 100644
--- a/libm-test/src/precision.rs
+++ b/libm-test/src/precision.rs
@@ -567,6 +567,8 @@ impl MaybeOverride<(f64, i32)> for SpecialCase {}
 #[cfg(f128_enabled)]
 impl MaybeOverride<(f128, i32)> for SpecialCase {}
 
+#[cfg(f16_enabled)]
+impl MaybeOverride<(f16, f16, f16)> for SpecialCase {}
 impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
 impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
 #[cfg(f128_enabled)]
diff --git a/libm-test/tests/compare_built_musl.rs b/libm-test/tests/compare_built_musl.rs
index cbb4bd49b..9c94b8369 100644
--- a/libm-test/tests/compare_built_musl.rs
+++ b/libm-test/tests/compare_built_musl.rs
@@ -100,6 +100,7 @@ libm_macros::for_each_function! {
         floorf128,
         floorf16,
         fmaf128,
+        fmaf16,
         fmaxf128,
         fmaxf16,
         fmaximum,
diff --git a/libm/src/libm_helper.rs b/libm/src/libm_helper.rs
index dfa1ff77b..d2c47b4fd 100644
--- a/libm/src/libm_helper.rs
+++ b/libm/src/libm_helper.rs
@@ -195,6 +195,7 @@ libm_helper! {
         (fn fabs(x: f16) -> (f16);                  => fabsf16);
         (fn fdim(x: f16, y: f16) -> (f16);          => fdimf16);
         (fn floor(x: f16) -> (f16);                 => floorf16);
+        (fn fma(x: f16, y: f16, z: f16) -> (f16);   => fmaf16);
         (fn fmax(x: f16, y: f16) -> (f16);          => fmaxf16);
         (fn fmaximum_num(x: f16, y: f16) -> (f16);  => fmaximum_numf16);
         (fn fmaximumf16(x: f16, y: f16) -> (f16);   => fmaximumf16);
diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/fma_wide.rs
index f268c2f14..9b2f1f4da 100644
--- a/libm/src/math/fma_wide.rs
+++ b/libm/src/math/fma_wide.rs
@@ -5,11 +5,10 @@ use super::support::{FpResult, IntTy, Round, Status};
 use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt};
 
 // Placeholder so we can have `fmaf16` in the `Float` trait.
-#[allow(unused)]
 #[cfg(f16_enabled)]
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
-    unimplemented!()
+pub fn fmaf16(x: f16, y: f16, z: f16) -> f16 {
+    fma_wide_round(x, y, z, Round::Nearest).val
 }
 
 /// Floating multiply add (f32)
diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs
index 949c18b40..ef0636364 100644
--- a/libm/src/math/mod.rs
+++ b/libm/src/math/mod.rs
@@ -322,6 +322,7 @@ cfg_if! {
         pub use self::fabs::fabsf16;
         pub use self::fdim::fdimf16;
         pub use self::floor::floorf16;
+        pub use self::fma_wide::fmaf16;
         pub use self::fmin_fmax::{fmaxf16, fminf16};
         pub use self::fminimum_fmaximum::{fmaximumf16, fminimumf16};
         pub use self::fminimum_fmaximum_num::{fmaximum_numf16, fminimum_numf16};
@@ -335,8 +336,6 @@ cfg_if! {
         pub use self::trunc::truncf16;
         // verify-sorted-end
 
-        #[allow(unused_imports)]
-        pub(crate) use self::fma_wide::fmaf16;
     }
 }