Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Add ldexpf16, ldexpf128, scalbnf16, and scalbnf128 #391

Merged
merged 2 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions crates/libm-macros/src/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
None,
&["jn", "yn"],
),
(
// `(f16, i32) -> f16`
FloatTy::F16,
Signature { args: &[Ty::F16, Ty::I32], returns: &[Ty::F16] },
None,
&["scalbnf16", "ldexpf16"],
),
(
// `(f32, i32) -> f32`
FloatTy::F32,
Expand All @@ -148,6 +155,13 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option<Signature>, &[&str])]
None,
&["scalbn", "ldexp"],
),
(
// `(f128, i32) -> f128`
FloatTy::F128,
Signature { args: &[Ty::F128, Ty::I32], returns: &[Ty::F128] },
None,
&["scalbnf128", "ldexpf128"],
),
(
// `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
FloatTy::F32,
Expand Down
4 changes: 4 additions & 0 deletions crates/libm-test/benches/icount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ main!(
icount_bench_jn_group,
icount_bench_jnf_group,
icount_bench_ldexp_group,
icount_bench_ldexpf128_group,
icount_bench_ldexpf16_group,
icount_bench_ldexpf_group,
icount_bench_lgamma_group,
icount_bench_lgamma_r_group,
Expand Down Expand Up @@ -163,6 +165,8 @@ main!(
icount_bench_roundf16_group,
icount_bench_roundf_group,
icount_bench_scalbn_group,
icount_bench_scalbnf128_group,
icount_bench_scalbnf16_group,
icount_bench_scalbnf_group,
icount_bench_sin_group,
icount_bench_sinf_group,
Expand Down
4 changes: 4 additions & 0 deletions crates/libm-test/benches/random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,14 @@ libm_macros::for_each_function! {
| fminf16
| fmodf128
| fmodf16
| ldexpf128
| ldexpf16
| rintf128
| rintf16
| roundf128
| roundf16
| scalbnf128
| scalbnf16
| sqrtf128
| sqrtf16
| truncf128
Expand Down
61 changes: 33 additions & 28 deletions crates/libm-test/src/mpfloat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ libm_macros::for_each_function! {
jnf,
ldexp,
ldexpf,
ldexpf128,
ldexpf16,
lgamma_r,
lgammaf_r,
modf,
Expand All @@ -178,6 +180,8 @@ libm_macros::for_each_function! {
roundf16,
scalbn,
scalbnf,
scalbnf128,
scalbnf16,
sincos,sincosf,
trunc,
truncf,
Expand Down Expand Up @@ -351,34 +355,6 @@ macro_rules! impl_op_for_ty {
}
}

// `ldexp` and `scalbn` are the same for binary floating point, so just forward all
// methods.
impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
type MpTy = <crate::op::[<scalbn $suffix>]::Routine as MpOp>::MpTy;

fn new_mp() -> Self::MpTy {
<crate::op::[<scalbn $suffix>]::Routine as MpOp>::new_mp()
}

fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
<crate::op::[<scalbn $suffix>]::Routine as MpOp>::run(this, input)
}
}

impl MpOp for crate::op::[<scalbn $suffix>]::Routine {
type MpTy = MpFloat;

fn new_mp() -> Self::MpTy {
new_mpfloat::<Self::FTy>()
}

fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
this.assign(input.0);
*this <<= input.1;
prep_retval::<Self::FTy>(this, Ordering::Equal)
}
}

impl MpOp for crate::op::[<sincos $suffix>]::Routine {
type MpTy = (MpFloat, MpFloat);

Expand Down Expand Up @@ -464,6 +440,35 @@ macro_rules! impl_op_for_ty_all {
this.1.assign(input.1);
let ord = this.0.rem_assign_round(&this.1, Nearest);
prep_retval::<Self::RustRet>(&mut this.0, ord)

}
}

// `ldexp` and `scalbn` are the same for binary floating point, so just forward all
// methods.
impl MpOp for crate::op::[<ldexp $suffix>]::Routine {
type MpTy = <crate::op::[<scalbn $suffix>]::Routine as MpOp>::MpTy;

fn new_mp() -> Self::MpTy {
<crate::op::[<scalbn $suffix>]::Routine as MpOp>::new_mp()
}

fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
<crate::op::[<scalbn $suffix>]::Routine as MpOp>::run(this, input)
}
}

impl MpOp for crate::op::[<scalbn $suffix>]::Routine {
type MpTy = MpFloat;

fn new_mp() -> Self::MpTy {
new_mpfloat::<Self::FTy>()
}

fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
this.assign(input.0);
*this <<= input.1;
prep_retval::<Self::FTy>(this, Ordering::Equal)
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions crates/libm-test/src/precision.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,12 @@ fn int_float_common<F1: Float, F2: Float>(
DEFAULT
}

#[cfg(f16_enabled)]
impl MaybeOverride<(f16, i32)> for SpecialCase {}
impl MaybeOverride<(f32, i32)> for SpecialCase {}
impl MaybeOverride<(f64, i32)> for SpecialCase {}
#[cfg(f128_enabled)]
impl MaybeOverride<(f128, i32)> for SpecialCase {}

impl MaybeOverride<(f32, f32, f32)> for SpecialCase {
fn check_float<F: Float>(
Expand Down
4 changes: 4 additions & 0 deletions crates/libm-test/tests/compare_built_musl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,14 @@ libm_macros::for_each_function! {
fminf16,
fmodf128,
fmodf16,
ldexpf128,
ldexpf16,
rintf128,
rintf16,
roundf128,
roundf16,
scalbnf128,
scalbnf16,
sqrtf128,
sqrtf16,
truncf128,
Expand Down
4 changes: 4 additions & 0 deletions crates/util/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,14 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
| fminf16
| fmodf128
| fmodf16
| ldexpf128
| ldexpf16
| rintf128
| rintf16
| roundf128
| roundf16
| scalbnf128
| scalbnf16
| sqrtf128
| sqrtf16
| truncf128
Expand Down
26 changes: 26 additions & 0 deletions etc/function-definitions.json
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,18 @@
],
"type": "f32"
},
"ldexpf128": {
"sources": [
"src/math/ldexpf128.rs"
],
"type": "f128"
},
"ldexpf16": {
"sources": [
"src/math/ldexpf16.rs"
],
"type": "f16"
},
"lgamma": {
"sources": [
"src/libm_helper.rs",
Expand Down Expand Up @@ -774,6 +786,20 @@
],
"type": "f32"
},
"scalbnf128": {
"sources": [
"src/math/generic/scalbn.rs",
"src/math/scalbnf128.rs"
],
"type": "f128"
},
"scalbnf16": {
"sources": [
"src/math/generic/scalbn.rs",
"src/math/scalbnf16.rs"
],
"type": "f16"
},
"sin": {
"sources": [
"src/libm_helper.rs",
Expand Down
4 changes: 4 additions & 0 deletions etc/function-list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ jn
jnf
ldexp
ldexpf
ldexpf128
ldexpf16
lgamma
lgamma_r
lgammaf
Expand Down Expand Up @@ -111,6 +113,8 @@ roundf128
roundf16
scalbn
scalbnf
scalbnf128
scalbnf16
sin
sincos
sincosf
Expand Down
85 changes: 74 additions & 11 deletions src/math/generic/scalbn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,27 @@ where
let exp_max: i32 = F::EXP_BIAS as i32;
let exp_min = -(exp_max - 1);

// 2 ^ Emax, where Emax is the maximum biased exponent value (1023 for f64)
// 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);

// 2 ^ Emin, where Emin is the minimum biased exponent value (-1022 for f64)
// 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
let f_exp_min = F::from_parts(false, 1, zero);

// 2 ^ sig_total_bits, representation of what can be accounted for with subnormals
let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
// 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
let f_pow_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);

/*
* The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases
* where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with
* `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of
* the final scale operation by prescaling by the max/min power representable by `F`.
*/

if n > exp_max {
// Worse case positive `n`: `x` is the minimum subnormal value, the result is `F::MAX`.
// This can be reached by three scaling multiplications (two here and one final).
debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * 3);

x *= f_exp_max;
n -= exp_max;
if n > exp_max {
Expand All @@ -51,21 +62,61 @@ where
}
}
} else if n < exp_min {
let mul = f_exp_min * f_exp_subnorm;
let add = (exp_max - 1) - sig_total_bits as i32;
// When scaling toward 0, the prescaling is limited to a value that does not allow `x` to
// go subnormal. This avoids double rounding.
if F::BITS > 16 {
// `mul` s.t. `!(x * mul).is_subnormal() ∀ x`
let mul = f_exp_min * f_pow_subnorm;
let add = -exp_min - sig_total_bits as i32;

// Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`.
// This must be reachable by three scaling multiplications (two here and one final).
debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * 2 + -exp_min);

x *= mul;
n += add;
if n < exp_min {
x *= mul;
n += add;

if n < exp_min {
n = exp_min;
x *= mul;
n += add;

if n < exp_min {
n = exp_min;
}
}
} else {
// `f16` is unique compared to other float types in that the difference between the
// minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is
// small, only three. The above method depend on decrementing `n` by `add` two times;
// for other float types this works out because `add` is a substantial fraction of
// the exponent range. For `f16`, however, 3 is relatively small compared to the
// exponent range (which is 39), so that requires ~10 prescale rounds rather than two.
//
// Work aroudn this by using a different algorithm that calculates the prescale
// dynamically based on the maximum possible value. This adds more operations per round
// since it needs to construct the scale, but works better in the general case.
let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);

x *= mul;
n += add;

if n < exp_min {
let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
let mul = F::from_parts(false, (F::EXP_BIAS as i32 - add) as u32, zero);

x *= mul;
n += add;

if n < exp_min {
n = exp_min;
}
}
}
}

x * F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero)
let scale = F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero);
x * scale
}

#[cfg(test)]
Expand Down Expand Up @@ -111,6 +162,12 @@ mod tests {
assert!(scalbn(-F::NAN, -10).is_nan());
}

#[test]
#[cfg(f16_enabled)]
fn spec_test_f16() {
spec_test::<f16>();
}

#[test]
fn spec_test_f32() {
spec_test::<f32>();
Expand All @@ -120,4 +177,10 @@ mod tests {
fn spec_test_f64() {
spec_test::<f64>();
}

#[test]
#[cfg(f128_enabled)]
fn spec_test_f128() {
spec_test::<f128>();
}
}
4 changes: 4 additions & 0 deletions src/math/ldexpf128.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn ldexpf128(x: f128, n: i32) -> f128 {
super::scalbnf128(x, n)
}
4 changes: 4 additions & 0 deletions src/math/ldexpf16.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn ldexpf16(x: f16, n: i32) -> f16 {
super::scalbnf16(x, n)
}
Loading