Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 9f50116

Browse files
committed
WIP f16 fma
simplify exp resolve warnings update update
1 parent 46a3bce commit 9f50116

18 files changed

+202
-6
lines changed

crates/libm-macros/src/shared.rs

+7
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
9292
None,
9393
&["copysignf128", "fdimf128", "fmaxf128", "fminf128", "fmodf128"],
9494
),
95+
(
96+
// `(f16, f16, f16) -> f16`
97+
FloatTy::F16,
98+
Signature { args: &[Ty::F16, Ty::F16, Ty::F16], returns: &[Ty::F16] },
99+
None,
100+
&["fmaf16"],
101+
),
95102
(
96103
// `(f32, f32, f32) -> f32`
97104
FloatTy::F32,

crates/libm-test/benches/icount.rs

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ main!(
105105
icount_bench_floorf16_group,
106106
icount_bench_floorf_group,
107107
icount_bench_fma_group,
108+
icount_bench_fmaf16_group,
108109
icount_bench_fmaf_group,
109110
icount_bench_fmax_group,
110111
icount_bench_fmaxf128_group,

crates/libm-test/benches/random.rs

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ libm_macros::for_each_function! {
126126
| fdimf128
127127
| fdimf16
128128
| floorf128
129+
| fmaf16
129130
| floorf16
130131
| fmaxf128
131132
| fmaxf16

crates/libm-test/src/mpfloat.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ libm_macros::for_each_function! {
190190
expm1 | expm1f => exp_m1,
191191
fabs | fabsf => abs,
192192
fdim | fdimf | fdimf16 | fdimf128 => positive_diff,
193-
fma | fmaf => mul_add,
193+
fma | fmaf | fmaf16 => mul_add,
194194
fmax | fmaxf | fmaxf16 | fmaxf128 => max,
195195
fmin | fminf | fminf16 | fminf128 => min,
196196
lgamma | lgammaf => ln_gamma,

crates/libm-test/src/precision.rs

+6
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,9 @@ fn int_float_common<F1: Float, F2: Float>(
554554
impl MaybeOverride<(f32, i32)> for SpecialCase {}
555555
impl MaybeOverride<(f64, i32)> for SpecialCase {}
556556

557+
#[cfg(f16_enabled)]
558+
impl MaybeOverride<(f16, f16, f16)> for SpecialCase {}
559+
557560
impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
558561
fn check_float<F: Float>(
559562
input: (f32, f32, f32),
@@ -575,6 +578,9 @@ impl MaybeOverride<(f64, f64, f64)> for SpecialCase {
575578
}
576579
}
577580

581+
#[cfg(f128_enabled)]
582+
impl MaybeOverride<(f128, f128, f128)> for SpecialCase {}
583+
578584
// F1 and F2 are always the same type, this is just to please generics
579585
fn ternop_common<F1: Float, F2: Float>(
580586
input: (F1, F1, F1),

crates/libm-test/tests/compare_built_musl.rs

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ libm_macros::for_each_function! {
8989
fdimf16,
9090
floorf128,
9191
floorf16,
92+
fmaf16,
9293
fmaxf128,
9394
fmaxf16,
9495
fminf128,

crates/util/src/main.rs

+5
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,17 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
9696
| fdimf16
9797
| floorf128
9898
| floorf16
99+
<<<<<<< HEAD
99100
| fmaxf128
100101
| fmaxf16
101102
| fminf128
102103
| fminf16
103104
| fmodf128
104105
| fmodf16
106+
||||||| parent of f5a6da1 (WIP f16 fma)
107+
=======
108+
| fmaf16
109+
>>>>>>> f5a6da1 (WIP f16 fma)
105110
| rintf128
106111
| rintf16
107112
| roundf128

etc/function-definitions.json

+6
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,12 @@
376376
],
377377
"type": "f32"
378378
},
379+
"fmaf16": {
380+
"sources": [
381+
"src/math/fmaf16.rs"
382+
],
383+
"type": "f16"
384+
},
379385
"fmax": {
380386
"sources": [
381387
"src/libm_helper.rs",

etc/function-list.txt

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ floorf128
5353
floorf16
5454
fma
5555
fmaf
56+
fmaf16
5657
fmax
5758
fmaxf
5859
fmaxf128

etc/update-api-list.py

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
functions are covered by our macros.
44
"""
55

6+
# TOOD: also match with `${name}_any_suffix` so we pick up `fma_big`.
7+
68
import difflib
79
import json
810
import subprocess as sp

src/math/fmaf.rs

+4
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ use super::fenv::{
4747
/// according to the rounding mode characterized by the value of FLT_ROUNDS.
4848
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
4949
pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
50+
if true {
51+
return super::generic::fma_big::<f32, f64>(x, y, z);
52+
}
53+
5054
let xy: f64;
5155
let mut result: f64;
5256
let mut ui: u64;

src/math/fmaf16.rs

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
2+
pub fn fmaf16(x: f16, y: f16, z: f16) -> f16 {
3+
super::generic::fma_big::<f16, f32>(x, y, z)
4+
}

src/math/generic/fma.rs

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
use super::super::fenv::{FE_TONEAREST, fegetround};
2+
use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, IntTy, MinInt};
3+
4+
/// FMA implementation when a hardware-backed larger float type is available.
5+
pub fn fma_big<F, B>(x: F, y: F, z: F) -> F
6+
where
7+
F: Float + HFloat<D = B>,
8+
B: Float + DFloat<H = F>,
9+
B::Int: CastInto<i32>,
10+
i32: CastFrom<i32>,
11+
{
12+
let one = IntTy::<B>::ONE;
13+
14+
let xy: B = x.widen() * y.widen();
15+
let result: B = xy + z.widen();
16+
let mut ui: B::Int = result.to_bits();
17+
let re = result.exp();
18+
let zb: B = z.widen();
19+
20+
let prec_diff = B::SIG_BITS - F::SIG_BITS;
21+
let excess_prec = ui & ((one << prec_diff) - one);
22+
let halfway = one << (prec_diff - 1);
23+
24+
// Common case: the larger precision is fine if...
25+
// This is not a halfway case
26+
if excess_prec != halfway
27+
// Or the result is NaN
28+
|| re == B::EXP_MAX
29+
// Or the result is exact
30+
|| (result - xy == zb && result - zb == xy)
31+
// Or the mode is something other than round to nearest
32+
|| fegetround() != FE_TONEAREST
33+
{
34+
// TODO: feclearexcept
35+
36+
return result.narrow();
37+
}
38+
39+
let neg = ui & B::SIGN_MASK > IntTy::<B>::ZERO;
40+
let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
41+
if neg == (err < B::ZERO) {
42+
ui += one;
43+
} else {
44+
ui -= one;
45+
}
46+
47+
B::from_bits(ui).narrow()
48+
}

src/math/generic/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ mod copysign;
33
mod fabs;
44
mod fdim;
55
mod floor;
6+
mod fma;
67
mod fmax;
78
mod fmin;
89
mod fmod;
@@ -17,6 +18,7 @@ pub use copysign::copysign;
1718
pub use fabs::fabs;
1819
pub use fdim::fdim;
1920
pub use floor::floor;
21+
pub use fma::fma_big;
2022
pub use fmax::fmax;
2123
pub use fmin::fmin;
2224
pub use fmod::fmod;

src/math/mod.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ use self::rem_pio2::rem_pio2;
121121
use self::rem_pio2_large::rem_pio2_large;
122122
use self::rem_pio2f::rem_pio2f;
123123
#[allow(unused_imports)]
124-
use self::support::{CastFrom, CastInto, DInt, Float, HInt, Int, IntTy, MinInt};
124+
use self::support::{CastFrom, CastInto, DFloat, DInt, Float, HFloat, HInt, Int, IntTy, MinInt};
125125

126126
// Public modules
127127
mod acos;
@@ -346,6 +346,7 @@ cfg_if! {
346346
mod fabsf16;
347347
mod fdimf16;
348348
mod floorf16;
349+
mod fmaf16;
349350
mod fmaxf16;
350351
mod fminf16;
351352
mod fmodf16;
@@ -359,6 +360,7 @@ cfg_if! {
359360
pub use self::fabsf16::fabsf16;
360361
pub use self::fdimf16::fdimf16;
361362
pub use self::floorf16::floorf16;
363+
pub use self::fmaf16::fmaf16;
362364
pub use self::fmaxf16::fmaxf16;
363365
pub use self::fminf16::fminf16;
364366
pub use self::fmodf16::fmodf16;

src/math/support/float_traits.rs

+65-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
use core::{fmt, mem, ops};
1+
use core::ops::{self, Neg};
2+
use core::{fmt, mem};
23

34
use super::int_traits::{CastFrom, Int, MinInt};
45

@@ -23,7 +24,9 @@ pub trait Float:
2324
type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
2425

2526
/// A int of the same width as the float
26-
type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
27+
type SignedInt: Int
28+
+ MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
29+
+ Neg<Output = Self::SignedInt>;
2730

2831
const ZERO: Self;
2932
const NEG_ZERO: Self;
@@ -155,7 +158,6 @@ pub trait Float:
155158
}
156159

157160
/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
158-
#[allow(dead_code)]
159161
pub type IntTy<F> = <F as Float>::Int;
160162

161163
macro_rules! float_impl {
@@ -355,3 +357,63 @@ mod tests {
355357
assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
356358
}
357359
}
360+
361+
/// Trait for floats twice the bit width of another integer.
362+
#[allow(unused)]
363+
pub trait DFloat: Float {
364+
/// Float that is half the bit width of the floatthis trait is implemented for.
365+
type H: HFloat<D = Self>;
366+
367+
/// Narrow the float type.
368+
fn narrow(self) -> Self::H;
369+
}
370+
371+
/// Trait for floats half the bit width of another float.
372+
#[allow(unused)]
373+
pub trait HFloat: Float {
374+
/// Float that is double the bit width of the float this trait is implemented for.
375+
type D: DFloat<H = Self>;
376+
377+
/// Widen the float type.
378+
fn widen(self) -> Self::D;
379+
}
380+
381+
macro_rules! impl_d_float {
382+
($($X:ident $D:ident),*) => {
383+
$(
384+
impl DFloat for $D {
385+
type H = $X;
386+
387+
fn narrow(self) -> Self::H {
388+
self as $X
389+
}
390+
}
391+
)*
392+
};
393+
}
394+
395+
macro_rules! impl_h_float {
396+
($($H:ident $X:ident),*) => {
397+
$(
398+
impl HFloat for $H {
399+
type D = $X;
400+
401+
fn widen(self) -> Self::D {
402+
self as $X
403+
}
404+
}
405+
)*
406+
};
407+
}
408+
409+
impl_d_float!(f32 f64);
410+
#[cfg(f16_enabled)]
411+
impl_d_float!(f16 f32);
412+
#[cfg(f128_enabled)]
413+
impl_d_float!(f64 f128);
414+
415+
impl_h_float!(f32 f64);
416+
#[cfg(f16_enabled)]
417+
impl_h_float!(f16 f32);
418+
#[cfg(f128_enabled)]
419+
impl_h_float!(f64 f128);

src/math/support/int_traits.rs

+44
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ pub trait Int:
9292
fn wrapping_shr(self, other: u32) -> Self;
9393
fn rotate_left(self, other: u32) -> Self;
9494
fn overflowing_add(self, other: Self) -> (Self, bool);
95+
fn overflowing_sub(self, other: Self) -> (Self, bool);
9596
fn leading_zeros(self) -> u32;
9697
fn ilog2(self) -> u32;
9798
}
@@ -150,6 +151,10 @@ macro_rules! int_impl_common {
150151
<Self>::overflowing_add(self, other)
151152
}
152153

154+
fn overflowing_sub(self, other: Self) -> (Self, bool) {
155+
<Self>::overflowing_sub(self, other)
156+
}
157+
153158
fn leading_zeros(self) -> u32 {
154159
<Self>::leading_zeros(self)
155160
}
@@ -399,6 +404,20 @@ macro_rules! cast_into {
399404
)*};
400405
}
401406

407+
macro_rules! cast_lossy{
408+
($ty:ty; $($into:ty),*) => {$(
409+
impl CastInto<$into> for $ty {
410+
fn cast(self) -> $into {
411+
unimplemented!("precise casting not available, use `cast_lossy` instead")
412+
}
413+
414+
fn cast_lossy(self) -> $into {
415+
self as $into
416+
}
417+
}
418+
)*};
419+
}
420+
402421
cast_into!(usize);
403422
cast_into!(isize);
404423
cast_into!(u8);
@@ -411,3 +430,28 @@ cast_into!(u64);
411430
cast_into!(i64);
412431
cast_into!(u128);
413432
cast_into!(i128);
433+
434+
cast_into!(bool; u16);
435+
cast_into!(bool; u32);
436+
cast_into!(bool; u64);
437+
cast_into!(bool; u128);
438+
439+
cast_lossy!(i64; f32, f64);
440+
cast_lossy!(f32; f64);
441+
cast_lossy!(f64; f32);
442+
443+
cfg_if! {
444+
if #[cfg(f16_enabled)] {
445+
cast_lossy!(f16; f32, f64);
446+
cast_lossy!(f32; f16);
447+
cast_lossy!(f64; f16);
448+
}
449+
}
450+
451+
cfg_if! {
452+
if #[cfg(f128_enabled)] {
453+
cast_lossy!(f128; f32, f64);
454+
cast_lossy!(f32; f128);
455+
cast_lossy!(f64; f128);
456+
}
457+
}

src/math/support/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ mod hex_float;
66
mod int_traits;
77

88
#[allow(unused_imports)]
9-
pub use float_traits::{Float, IntTy};
9+
pub use float_traits::{DFloat, Float, HFloat, IntTy};
1010
pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
1111
#[cfg(f16_enabled)]
1212
pub use hex_float::hf16;

0 commit comments

Comments
 (0)