Skip to content

[libc][math] Refactor exp10f16 implementation to header-only in src/__support/math folder. #148408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/shared/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "math/exp.h"
#include "math/exp10.h"
#include "math/exp10f.h"
#include "math/exp10f16.h"
#include "math/expf.h"
#include "math/expf16.h"
#include "math/frexpf.h"
Expand Down
29 changes: 29 additions & 0 deletions libc/shared/math/exp10f16.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===-- Shared exp10f16 function --------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SHARED_MATH_EXP10F_H
#define LLVM_LIBC_SHARED_MATH_EXP10F_H

#include "include/llvm-libc-macros/float16-macros.h"

#ifdef LIBC_TYPES_HAS_FLOAT16

#include "shared/libc_common.h"
#include "src/__support/math/exp10f16.h"

namespace LIBC_NAMESPACE_DECL {
namespace shared {

using math::exp10f16;

} // namespace shared
} // namespace LIBC_NAMESPACE_DECL

#endif // LIBC_TYPES_HAS_FLOAT16

#endif // LLVM_LIBC_SHARED_MATH_EXP10F_H
34 changes: 34 additions & 0 deletions libc/src/__support/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,37 @@ add_header_library(
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
)

add_header_library(
exp10_float16_constants
HDRS
exp10_float16_constants.h
DEPENDS
libc.src.__support.CPP.array
)

add_header_library(
exp10f16_utils
HDRS
exp10f16_utils.h
DEPENDS
.expf16_utils
.exp10_float16_constants
libc.src.__support.FPUtil.fp_bits
)

add_header_library(
exp10f16
HDRS
exp10f16.h
DEPENDS
.exp10f16_utils
libc.src.__support.FPUtil.fp_bits
src.__support.FPUtil.FEnvImpl
src.__support.FPUtil.FPBits
src.__support.FPUtil.cast
src.__support.FPUtil.rounding_mode
src.__support.FPUtil.except_value_utils
src.__support.macros.optimization
src.__support.macros.properties.cpu_features
)
43 changes: 43 additions & 0 deletions libc/src/__support/math/exp10_float16_constants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//===-- Constants for exp10f16 function -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_FLOAT16_CONSTANTS_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_FLOAT16_CONSTANTS_H

#include "include/llvm-libc-macros/float16-macros.h"
#include <stdint.h>

#ifdef LIBC_TYPES_HAS_FLOAT16

#include "src/__support/CPP/array.h"

namespace LIBC_NAMESPACE_DECL {

// Generated by Sollya with the following commands:
// > display = hexadecimal;
// > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN));
static constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = {
0x3f80'0000U, 0x3f8b'95c2U, 0x3f98'37f0U, 0x3fa5'fed7U,
0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U,
};

// Generated by Sollya with the following commands:
// > display = hexadecimal;
// > round(log2(10), SG, RN);
static constexpr float LOG2F_10 = 0x1.a934fp+1f;

// Generated by Sollya with the following commands:
// > display = hexadecimal;
// > round(log10(2), SG, RN);
static constexpr float LOG10F_2 = 0x1.344136p-2f;

} // namespace LIBC_NAMESPACE_DECL

#endif // LIBC_TYPES_HAS_FLOAT16

#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H
141 changes: 141 additions & 0 deletions libc/src/__support/math/exp10f16.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
//===-- Implementation header for exp10f16 ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H

#include "include/llvm-libc-macros/float16-macros.h"

#ifdef LIBC_TYPES_HAS_FLOAT16

#include "exp10f16_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/cast.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h"
#include "src/__support/macros/properties/cpu_features.h"

namespace LIBC_NAMESPACE_DECL {

namespace math {

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
static constexpr size_t N_EXP10F16_EXCEPTS = 5;
#else
static constexpr size_t N_EXP10F16_EXCEPTS = 8;
#endif

static constexpr fputil::ExceptValues<float16, N_EXP10F16_EXCEPTS>
EXP10F16_EXCEPTS = {{
// x = 0x1.8f4p-2, exp10f16(x) = 0x1.3ap+1 (RZ)
{0x363dU, 0x40e8U, 1U, 0U, 1U},
// x = 0x1.95cp-2, exp10f16(x) = 0x1.3ecp+1 (RZ)
{0x3657U, 0x40fbU, 1U, 0U, 0U},
// x = -0x1.018p-4, exp10f16(x) = 0x1.bbp-1 (RZ)
{0xac06U, 0x3aecU, 1U, 0U, 0U},
// x = -0x1.c28p+0, exp10f16(x) = 0x1.1ccp-6 (RZ)
{0xbf0aU, 0x2473U, 1U, 0U, 0U},
// x = -0x1.e1cp+1, exp10f16(x) = 0x1.694p-13 (RZ)
{0xc387U, 0x09a5U, 1U, 0U, 0U},
#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
// x = 0x1.0cp+1, exp10f16(x) = 0x1.f04p+6 (RZ)
{0x4030U, 0x57c1U, 1U, 0U, 1U},
// x = 0x1.1b8p+1, exp10f16(x) = 0x1.47cp+7 (RZ)
{0x406eU, 0x591fU, 1U, 0U, 1U},
// x = 0x1.1b8p+2, exp10f16(x) = 0x1.a4p+14 (RZ)
{0x446eU, 0x7690U, 1U, 0U, 1U},
#endif
}};
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

static constexpr float16 exp10f16(float16 x) {
using FPBits = fputil::FPBits<float16>;
FPBits x_bits(x);

uint16_t x_u = x_bits.uintval();
uint16_t x_abs = x_u & 0x7fffU;

// When |x| >= 5, or x is NaN.
if (LIBC_UNLIKELY(x_abs >= 0x4500U)) {
// exp10(NaN) = NaN
if (x_bits.is_nan()) {
if (x_bits.is_signaling_nan()) {
fputil::raise_except_if_required(FE_INVALID);
return FPBits::quiet_nan().get_val();
}

return x;
}

// When x >= 5.
if (x_bits.is_pos()) {
// exp10(+inf) = +inf
if (x_bits.is_inf())
return FPBits::inf().get_val();

switch (fputil::quick_get_round()) {
case FE_TONEAREST:
case FE_UPWARD:
fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_OVERFLOW);
return FPBits::inf().get_val();
default:
return FPBits::max_normal().get_val();
}
}

// When x <= -8.
if (x_u >= 0xc800U) {
// exp10(-inf) = +0
if (x_bits.is_inf())
return FPBits::zero().get_val();

fputil::set_errno_if_required(ERANGE);
fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);

if (fputil::fenv_is_round_up())
return FPBits::min_subnormal().get_val();
return FPBits::zero().get_val();
}
}

// When x is 1, 2, 3, or 4. These are hard-to-round cases with exact results.
if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) {
switch (x_u) {
case 0x3c00U: // x = 1.0f16
return fputil::cast<float16>(10.0);
case 0x4000U: // x = 2.0f16
return fputil::cast<float16>(100.0);
case 0x4200U: // x = 3.0f16
return fputil::cast<float16>(1'000.0);
case 0x4400U: // x = 4.0f16
return fputil::cast<float16>(10'000.0);
}
}

#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
if (auto r = EXP10F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
return r.value();
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS

// 10^x = 2^((hi + mid) * log2(10)) * 10^lo
auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x);
return fputil::cast<float16>(exp2_hi_mid * exp10_lo);
}

} // namespace math

} // namespace LIBC_NAMESPACE_DECL

#endif // LIBC_TYPES_HAS_FLOAT16

#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H
64 changes: 64 additions & 0 deletions libc/src/__support/math/exp10f16_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//===-- Common utils for exp10f16 -------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP_FLOAT_CONSTANTS_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP_FLOAT_CONSTANTS_H

#include "include/llvm-libc-macros/float16-macros.h"

#ifdef LIBC_TYPES_HAS_FLOAT16

#include "exp10_float16_constants.h"
#include "expf16_utils.h"
#include "src/__support/FPUtil/FPBits.h"

namespace LIBC_NAMESPACE_DECL {

LIBC_INLINE static constexpr ExpRangeReduction
exp10_range_reduction(float16 x) {
// For -8 < x < 5, to compute 10^x, we perform the following range reduction:
// find hi, mid, lo, such that:
// x = (hi + mid) * log2(10) + lo, in which
// hi is an integer,
// mid * 2^3 is an integer,
// -2^(-4) <= lo < 2^(-4).
// In particular,
// hi + mid = round(x * 2^3) * 2^(-3).
// Then,
// 10^x = 10^(hi + mid + lo) = 2^((hi + mid) * log2(10)) + 10^lo
// We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
// by adding hi to the exponent field of 2^mid. 10^lo is computed using a
// degree-4 minimax polynomial generated by Sollya.

float xf = x;
float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f));
int x_hi_mid = static_cast<int>(kf);
unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3;
unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7;
// lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x
float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf);

uint32_t exp2_hi_mid_bits =
EXP2_MID_BITS[x_mid] +
static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
// Degree-4 minimax polynomial generated by Sollya with the following
// commands:
// > display = hexadecimal;
// > P = fpminimax((10^x - 1)/x, 3, [|SG...|], [-2^-4, 2^-4]);
// > 1 + x * P;
float exp10_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.26bb14p+1f, 0x1.53526p+1f,
0x1.04b434p+1f, 0x1.2bcf9ep+0f);
return {exp2_hi_mid, exp10_lo};
}

} // namespace LIBC_NAMESPACE_DECL

#endif // LIBC_TYPES_HAS_FLOAT16

#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F16_H
21 changes: 5 additions & 16 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1477,20 +1477,8 @@ add_entrypoint_object(
HDRS
../exp10f16.h
DEPENDS
.expxf16
libc.hdr.errno_macros
libc.hdr.fenv_macros
libc.src.__support.CPP.array
libc.src.__support.FPUtil.cast
libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
libc.src.__support.macros.properties.cpu_features
libc.src.__support.math.exp10f16
libc.src.errno.errno
)

add_entrypoint_object(
Expand Down Expand Up @@ -1519,7 +1507,6 @@ add_entrypoint_object(
HDRS
../exp10m1f16.h
DEPENDS
.expxf16
libc.hdr.errno_macros
libc.hdr.fenv_macros
libc.src.__support.FPUtil.cast
Expand All @@ -1531,6 +1518,7 @@ add_entrypoint_object(
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
libc.src.__support.macros.properties.cpu_features
libc.src.__support.math.exp10f16_utils
)

add_entrypoint_object(
Expand Down Expand Up @@ -5023,10 +5011,11 @@ add_header_library(
HDRS
expxf16.h
DEPENDS
libc.src.__support.FPUtil.cast
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.cast
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.macros.attributes
libc.src.__support.math.expf16_utils
libc.src.__support.math.exp10_float16_constants
)
Loading
Loading