diff --git a/libc/shared/math.h b/libc/shared/math.h index 8dcfaf0352339..617e46602de8c 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -12,6 +12,7 @@ #include "libc_common.h" #include "math/acos.h" +#include "math/acosf.h" #include "math/exp.h" #include "math/exp10.h" #include "math/exp10f.h" diff --git a/libc/shared/math/acosf.h b/libc/shared/math/acosf.h new file mode 100644 index 0000000000000..7cdd64e7b379a --- /dev/null +++ b/libc/shared/math/acosf.h @@ -0,0 +1,23 @@ +//===-- Shared acosf function -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ACOSF_H +#define LLVM_LIBC_SHARED_MATH_ACOSF_H + +#include "shared/libc_common.h" +#include "src/__support/math/acosf.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::acosf; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_ACOSF_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 4a29c2975d523..fbe7e2c3125ce 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -17,6 +17,20 @@ add_header_library( libc.src.__support.macros.properties.cpu_features ) +add_header_library( + acosf + HDRS + acosf.h + DEPENDS + .inv_trigf_utils + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization +) + add_header_library( asin_utils HDRS @@ -98,6 +112,16 @@ add_header_library( libc.src.__support.FPUtil.manipulation_functions ) +add_header_library( + inv_trigf_utils + HDRS + inv_trigf_utils.h + DEPENDS + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.common +) + add_header_library( frexpf16 HDRS diff --git a/libc/src/__support/math/acosf.h b/libc/src/__support/math/acosf.h new file mode 100644 index 0000000000000..941c39f6c3eb6 --- /dev/null +++ b/libc/src/__support/math/acosf.h @@ -0,0 +1,139 @@ +//===-- Implementation header for acosf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF_H + +#include "inv_trigf_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS +static constexpr size_t N_EXCEPTS = 4; + +// Exceptional values when |x| <= 0.5 +static constexpr fputil::ExceptValues ACOSF_EXCEPTS = {{ + // (inputs, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.110b46p-26, acosf(x) = 0x1.921fb4p0 (RZ) + {0x328885a3, 0x3fc90fda, 1, 0, 1}, + // x = -0x1.110b46p-26, acosf(x) = 0x1.921fb4p0 (RZ) + {0xb28885a3, 0x3fc90fda, 1, 0, 1}, + // x = 0x1.04c444p-12, acosf(x) = 0x1.920f68p0 (RZ) + {0x39826222, 0x3fc907b4, 1, 0, 1}, + // x = -0x1.04c444p-12, acosf(x) = 0x1.923p0 (RZ) + {0xb9826222, 0x3fc91800, 1, 0, 1}, +}}; +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + +static constexpr float acosf(float x) { + using FPBits = typename fputil::FPBits; + + FPBits xbits(x); + uint32_t x_uint = xbits.uintval(); + uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU; + uint32_t x_sign = x_uint >> 31; + + // |x| <= 0.5 + if (LIBC_UNLIKELY(x_abs <= 0x3f00'0000U)) { + // |x| < 0x1p-10 + if (LIBC_UNLIKELY(x_abs < 0x3a80'0000U)) { + // When |x| < 2^-10, we use the following approximation: + // acos(x) = pi/2 - asin(x) + // ~ pi/2 - x - x^3 / 6 + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Check for exceptional values + if (auto r = ACOSF_EXCEPTS.lookup(x_uint); LIBC_UNLIKELY(r.has_value())) + return r.value(); +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + double xd = static_cast(x); + return static_cast(fputil::multiply_add( + -0x1.5555555555555p-3 * xd, xd * xd, M_MATH_PI_2 - xd)); + } + + // For |x| <= 0.5, we approximate acosf(x) by: + // acos(x) = pi/2 - asin(x) = pi/2 - x * P(x^2) + // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating + // asin(x)/x on [0, 0.5] generated by Sollya with: + // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|], + // [|1, D...|], [0, 0.5]); + double xd = static_cast(x); + double xsq = xd * xd; + double x3 = xd * xsq; + double r = asin_eval(xsq); + return static_cast(fputil::multiply_add(-x3, r, M_MATH_PI_2 - xd)); + } + + // |x| >= 1, return 0, 2pi, or NaNs. + if (LIBC_UNLIKELY(x_abs >= 0x3f80'0000U)) { + if (x_abs == 0x3f80'0000U) + return x_sign ? /* x == -1.0f */ fputil::round_result_slightly_down( + 0x1.921fb6p+1f) + : /* x == 1.0f */ 0.0f; + + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + // |x| <= +/-inf + if (x_abs <= 0x7f80'0000U) { + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + } + + return x + FPBits::quiet_nan().get_val(); + } + + // When 0.5 < |x| < 1, we perform range reduction as follow: + // + // Assume further that 0.5 < x <= 1, and let: + // y = acos(x) + // We use the double angle formula: + // x = cos(y) = 1 - 2 sin^2(y/2) + // So: + // sin(y/2) = sqrt( (1 - x)/2 ) + // And hence: + // y = 2 * asin( sqrt( (1 - x)/2 ) ) + // Let u = (1 - x)/2, then + // acos(x) = 2 * asin( sqrt(u) ) + // Moreover, since 0.5 < x <= 1, + // 0 <= u < 1/4, and 0 <= sqrt(u) < 0.5, + // And hence we can reuse the same polynomial approximation of asin(x) when + // |x| <= 0.5: + // acos(x) ~ 2 * sqrt(u) * P(u). + // + // When -1 < x <= -0.5, we use the identity: + // acos(x) = pi - acos(-x) + // which is reduced to the postive case. + + xbits.set_sign(Sign::POS); + double xd = static_cast(xbits.get_val()); + double u = fputil::multiply_add(-0.5, xd, 0.5); + double cv = 2 * fputil::sqrt(u); + + double r3 = asin_eval(u); + double r = fputil::multiply_add(cv * u, r3, cv); + return static_cast(x_sign ? M_MATH_PI - r : r); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ACOS_H diff --git a/libc/src/math/generic/inv_trigf_utils.cpp b/libc/src/__support/math/inv_trigf_utils.h similarity index 57% rename from libc/src/math/generic/inv_trigf_utils.cpp rename to libc/src/__support/math/inv_trigf_utils.h index f23028bb86b5c..b8f4914eeb9e5 100644 --- a/libc/src/math/generic/inv_trigf_utils.cpp +++ b/libc/src/__support/math/inv_trigf_utils.h @@ -1,4 +1,4 @@ -//===-- Single-precision general exp/log functions ------------------------===// +//===-- Single-precision general inverse trigonometric functions ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,11 +6,20 @@ // //===----------------------------------------------------------------------===// -#include "inv_trigf_utils.h" +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_INV_TRIGF_UTILS_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_INV_TRIGF_UTILS_H + +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/common.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { +// PI and PI / 2 +static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1; +static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0; + // Polynomial approximation for 0 <= x <= 1: // atan(x) ~ atan((i/16) + (x - (i/16)) * Q(x - i/16) // = P(x - i/16) @@ -29,7 +38,7 @@ namespace LIBC_NAMESPACE_DECL { // Notice that degree-7 is good enough for atanf, but degree-8 helps reduce the // error bounds for atan2f's fast pass 16 times, and it does not affect the // performance of atanf much. -double ATAN_COEFFS[17][9] = { +static constexpr double ATAN_COEFFS[17][9] = { {0.0, 1.0, 0x1.3f8d76d26d61bp-47, -0x1.5555555574cd8p-2, 0x1.0dde5d06878eap-29, 0x1.99997738acc77p-3, 0x1.2c43eac9797cap-16, -0x1.25fb020007dbdp-3, 0x1.c1b6c31d7b0aep-7}, @@ -83,4 +92,89 @@ double ATAN_COEFFS[17][9] = { 0x1.555e31a1e15e9p-6, -0x1.245240d65e629p-7, -0x1.fa9ba66478903p-11}, }; +// Look-up table for atan(k/16) with k = 0..16. +static constexpr double ATAN_K_OVER_16[17] = { + 0.0, + 0x1.ff55bb72cfdeap-5, + 0x1.fd5ba9aac2f6ep-4, + 0x1.7b97b4bce5b02p-3, + 0x1.f5b75f92c80ddp-3, + 0x1.362773707ebccp-2, + 0x1.6f61941e4def1p-2, + 0x1.a64eec3cc23fdp-2, + 0x1.dac670561bb4fp-2, + 0x1.0657e94db30dp-1, + 0x1.1e00babdefeb4p-1, + 0x1.345f01cce37bbp-1, + 0x1.4978fa3269ee1p-1, + 0x1.5d58987169b18p-1, + 0x1.700a7c5784634p-1, + 0x1.819d0b7158a4dp-1, + 0x1.921fb54442d18p-1, +}; + +// For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that: +// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x. +LIBC_INLINE static double atan_eval(double x, unsigned i) { + double x2 = x * x; + + double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]); + double c1 = fputil::multiply_add(x, ATAN_COEFFS[i][4], ATAN_COEFFS[i][3]); + double c2 = fputil::multiply_add(x, ATAN_COEFFS[i][6], ATAN_COEFFS[i][5]); + double c3 = fputil::multiply_add(x, ATAN_COEFFS[i][8], ATAN_COEFFS[i][7]); + + double x4 = x2 * x2; + double d1 = fputil::multiply_add(x2, c1, c0); + double d2 = fputil::multiply_add(x2, c3, c2); + double p = fputil::multiply_add(x4, d2, d1); + return p; +} + +// Evaluate atan without big lookup table. +// atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16))) +// = atan((n - d * k/16)) / (d + n * k/16)) +// So we let q = (n - d * k/16) / (d + n * k/16), +// and approximate with Taylor polynomial: +// atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9 +LIBC_INLINE static double atan_eval_no_table(double num, double den, + double k_over_16) { + double num_r = fputil::multiply_add(den, -k_over_16, num); + double den_r = fputil::multiply_add(num, k_over_16, den); + double q = num_r / den_r; + + constexpr double ATAN_TAYLOR[] = { + -0x1.5555555555555p-2, + 0x1.999999999999ap-3, + -0x1.2492492492492p-3, + 0x1.c71c71c71c71cp-4, + }; + double q2 = q * q; + double q3 = q2 * q; + double q4 = q2 * q2; + double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]); + double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]); + double d = fputil::multiply_add(q4, c1, c0); + return fputil::multiply_add(q3, d, q); +} + +// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|], +// [|1, D...|], [0, 0.5]); +static constexpr double ASIN_COEFFS[10] = { + 0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5, + 0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6, + 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8, + 0x1.02311ecf99c28p-5}; + +// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x +LIBC_INLINE static double asin_eval(double xsq) { + double x4 = xsq * xsq; + double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2], + ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]); + double r2 = fputil::polyeval(x4, ASIN_COEFFS[1], ASIN_COEFFS[3], + ASIN_COEFFS[5], ASIN_COEFFS[7], ASIN_COEFFS[9]); + return fputil::multiply_add(xsq, r2, r1); +} + } // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_INV_TRIGF_UTILS_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 7e6a32b7cdf16..c90665d5aa2a5 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3969,18 +3969,6 @@ add_entrypoint_object( libc.src.__support.macros.properties.types ) -add_object_library( - inv_trigf_utils - HDRS - inv_trigf_utils.h - SRCS - inv_trigf_utils.cpp - DEPENDS - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.common -) - add_entrypoint_object( asinf SRCS @@ -3994,7 +3982,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.sqrt libc.src.__support.macros.optimization - .inv_trigf_utils + libc.src.__support.math.inv_trigf_utils ) add_entrypoint_object( @@ -4042,13 +4030,7 @@ add_entrypoint_object( HDRS ../acosf.h DEPENDS - libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.sqrt - libc.src.__support.macros.optimization - .inv_trigf_utils + libc.src.__support.math.acosf ) add_entrypoint_object( @@ -4120,7 +4102,6 @@ add_entrypoint_object( HDRS ../atanf.h DEPENDS - .inv_trigf_utils libc.src.__support.FPUtil.except_value_utils libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add @@ -4128,6 +4109,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.inv_trigf_utils ) add_entrypoint_object( @@ -4176,7 +4158,6 @@ add_entrypoint_object( ../atan2f.h atan2f_float.h DEPENDS - .inv_trigf_utils libc.hdr.fenv_macros libc.src.__support.FPUtil.double_double libc.src.__support.FPUtil.fenv_impl @@ -4186,6 +4167,7 @@ add_entrypoint_object( libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization + libc.src.__support.math.inv_trigf_utils ) add_entrypoint_object( diff --git a/libc/src/math/generic/acosf.cpp b/libc/src/math/generic/acosf.cpp index 8dd6de2ce7474..7afc7d661d552 100644 --- a/libc/src/math/generic/acosf.cpp +++ b/libc/src/math/generic/acosf.cpp @@ -7,127 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/acosf.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/except_value_utils.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/sqrt.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY - -#include "inv_trigf_utils.h" +#include "src/__support/math/acosf.h" namespace LIBC_NAMESPACE_DECL { -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS -static constexpr size_t N_EXCEPTS = 4; - -// Exceptional values when |x| <= 0.5 -static constexpr fputil::ExceptValues ACOSF_EXCEPTS = {{ - // (inputs, RZ output, RU offset, RD offset, RN offset) - // x = 0x1.110b46p-26, acosf(x) = 0x1.921fb4p0 (RZ) - {0x328885a3, 0x3fc90fda, 1, 0, 1}, - // x = -0x1.110b46p-26, acosf(x) = 0x1.921fb4p0 (RZ) - {0xb28885a3, 0x3fc90fda, 1, 0, 1}, - // x = 0x1.04c444p-12, acosf(x) = 0x1.920f68p0 (RZ) - {0x39826222, 0x3fc907b4, 1, 0, 1}, - // x = -0x1.04c444p-12, acosf(x) = 0x1.923p0 (RZ) - {0xb9826222, 0x3fc91800, 1, 0, 1}, -}}; -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -LLVM_LIBC_FUNCTION(float, acosf, (float x)) { - using FPBits = typename fputil::FPBits; - - FPBits xbits(x); - uint32_t x_uint = xbits.uintval(); - uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU; - uint32_t x_sign = x_uint >> 31; - - // |x| <= 0.5 - if (LIBC_UNLIKELY(x_abs <= 0x3f00'0000U)) { - // |x| < 0x1p-10 - if (LIBC_UNLIKELY(x_abs < 0x3a80'0000U)) { - // When |x| < 2^-10, we use the following approximation: - // acos(x) = pi/2 - asin(x) - // ~ pi/2 - x - x^3 / 6 - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Check for exceptional values - if (auto r = ACOSF_EXCEPTS.lookup(x_uint); LIBC_UNLIKELY(r.has_value())) - return r.value(); -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - double xd = static_cast(x); - return static_cast(fputil::multiply_add( - -0x1.5555555555555p-3 * xd, xd * xd, M_MATH_PI_2 - xd)); - } - - // For |x| <= 0.5, we approximate acosf(x) by: - // acos(x) = pi/2 - asin(x) = pi/2 - x * P(x^2) - // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating - // asin(x)/x on [0, 0.5] generated by Sollya with: - // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|], - // [|1, D...|], [0, 0.5]); - double xd = static_cast(x); - double xsq = xd * xd; - double x3 = xd * xsq; - double r = asin_eval(xsq); - return static_cast(fputil::multiply_add(-x3, r, M_MATH_PI_2 - xd)); - } - - // |x| >= 1, return 0, 2pi, or NaNs. - if (LIBC_UNLIKELY(x_abs >= 0x3f80'0000U)) { - if (x_abs == 0x3f80'0000U) - return x_sign ? /* x == -1.0f */ fputil::round_result_slightly_down( - 0x1.921fb6p+1f) - : /* x == 1.0f */ 0.0f; - - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - - // |x| <= +/-inf - if (x_abs <= 0x7f80'0000U) { - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - } - - return x + FPBits::quiet_nan().get_val(); - } - - // When 0.5 < |x| < 1, we perform range reduction as follow: - // - // Assume further that 0.5 < x <= 1, and let: - // y = acos(x) - // We use the double angle formula: - // x = cos(y) = 1 - 2 sin^2(y/2) - // So: - // sin(y/2) = sqrt( (1 - x)/2 ) - // And hence: - // y = 2 * asin( sqrt( (1 - x)/2 ) ) - // Let u = (1 - x)/2, then - // acos(x) = 2 * asin( sqrt(u) ) - // Moreover, since 0.5 < x <= 1, - // 0 <= u < 1/4, and 0 <= sqrt(u) < 0.5, - // And hence we can reuse the same polynomial approximation of asin(x) when - // |x| <= 0.5: - // acos(x) ~ 2 * sqrt(u) * P(u). - // - // When -1 < x <= -0.5, we use the identity: - // acos(x) = pi - acos(-x) - // which is reduced to the postive case. - - xbits.set_sign(Sign::POS); - double xd = static_cast(xbits.get_val()); - double u = fputil::multiply_add(-0.5, xd, 0.5); - double cv = 2 * fputil::sqrt(u); - - double r3 = asin_eval(u); - double r = fputil::multiply_add(cv * u, r3, cv); - return static_cast(x_sign ? M_MATH_PI - r : r); -} +LLVM_LIBC_FUNCTION(float, acosf, (float x)) { return math::acosf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/asinf.cpp b/libc/src/math/generic/asinf.cpp index 12383bf6dacae..c8d6b38ab1560 100644 --- a/libc/src/math/generic/asinf.cpp +++ b/libc/src/math/generic/asinf.cpp @@ -17,7 +17,7 @@ #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "inv_trigf_utils.h" +#include "src/__support/math/inv_trigf_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp index c04b0eb1cc589..0a768494baa64 100644 --- a/libc/src/math/generic/atan2f.cpp +++ b/libc/src/math/generic/atan2f.cpp @@ -8,7 +8,6 @@ #include "src/math/atan2f.h" #include "hdr/fenv_macros.h" -#include "inv_trigf_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" @@ -18,6 +17,7 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/inv_trigf_utils.h" #if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \ defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp index 46196dbe4162c..d12456c591016 100644 --- a/libc/src/math/generic/atanf.cpp +++ b/libc/src/math/generic/atanf.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/math/atanf.h" -#include "inv_trigf_utils.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/except_value_utils.h" @@ -16,6 +15,7 @@ #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/math/inv_trigf_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h deleted file mode 100644 index 8b47aba342995..0000000000000 --- a/libc/src/math/generic/inv_trigf_utils.h +++ /dev/null @@ -1,110 +0,0 @@ -//===-- Single-precision general inverse trigonometric functions ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H -#define LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H - -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" - -namespace LIBC_NAMESPACE_DECL { - -// PI and PI / 2 -static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1; -static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0; - -extern double ATAN_COEFFS[17][9]; - -// Look-up table for atan(k/16) with k = 0..16. -static constexpr double ATAN_K_OVER_16[17] = { - 0.0, - 0x1.ff55bb72cfdeap-5, - 0x1.fd5ba9aac2f6ep-4, - 0x1.7b97b4bce5b02p-3, - 0x1.f5b75f92c80ddp-3, - 0x1.362773707ebccp-2, - 0x1.6f61941e4def1p-2, - 0x1.a64eec3cc23fdp-2, - 0x1.dac670561bb4fp-2, - 0x1.0657e94db30dp-1, - 0x1.1e00babdefeb4p-1, - 0x1.345f01cce37bbp-1, - 0x1.4978fa3269ee1p-1, - 0x1.5d58987169b18p-1, - 0x1.700a7c5784634p-1, - 0x1.819d0b7158a4dp-1, - 0x1.921fb54442d18p-1, -}; - -// For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that: -// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x. -LIBC_INLINE static double atan_eval(double x, unsigned i) { - double x2 = x * x; - - double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]); - double c1 = fputil::multiply_add(x, ATAN_COEFFS[i][4], ATAN_COEFFS[i][3]); - double c2 = fputil::multiply_add(x, ATAN_COEFFS[i][6], ATAN_COEFFS[i][5]); - double c3 = fputil::multiply_add(x, ATAN_COEFFS[i][8], ATAN_COEFFS[i][7]); - - double x4 = x2 * x2; - double d1 = fputil::multiply_add(x2, c1, c0); - double d2 = fputil::multiply_add(x2, c3, c2); - double p = fputil::multiply_add(x4, d2, d1); - return p; -} - -// Evaluate atan without big lookup table. -// atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16))) -// = atan((n - d * k/16)) / (d + n * k/16)) -// So we let q = (n - d * k/16) / (d + n * k/16), -// and approximate with Taylor polynomial: -// atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9 -LIBC_INLINE static double atan_eval_no_table(double num, double den, - double k_over_16) { - double num_r = fputil::multiply_add(den, -k_over_16, num); - double den_r = fputil::multiply_add(num, k_over_16, den); - double q = num_r / den_r; - - constexpr double ATAN_TAYLOR[] = { - -0x1.5555555555555p-2, - 0x1.999999999999ap-3, - -0x1.2492492492492p-3, - 0x1.c71c71c71c71cp-4, - }; - double q2 = q * q; - double q3 = q2 * q; - double q4 = q2 * q2; - double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]); - double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]); - double d = fputil::multiply_add(q4, c1, c0); - return fputil::multiply_add(q3, d, q); -} - -// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|], -// [|1, D...|], [0, 0.5]); -static constexpr double ASIN_COEFFS[10] = { - 0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5, - 0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6, - 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8, - 0x1.02311ecf99c28p-5}; - -// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x -LIBC_INLINE static double asin_eval(double xsq) { - double x4 = xsq * xsq; - double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2], - ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]); - double r2 = fputil::polyeval(x4, ASIN_COEFFS[1], ASIN_COEFFS[3], - ASIN_COEFFS[5], ASIN_COEFFS[7], ASIN_COEFFS[9]); - return fputil::multiply_add(xsq, r2, r1); -} - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 1d9989debdcdb..337423cfb96cb 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2009,22 +2009,6 @@ libc_support_library( ], ) -libc_support_library( - name = "inv_trigf_utils", - srcs = ["src/math/generic/inv_trigf_utils.cpp"], - hdrs = [ - "src/math/generic/inv_trigf_utils.h", - ], - deps = [ - ":__support_common", - ":__support_fputil_double_double", - ":__support_fputil_fma", - ":__support_fputil_multiply_add", - ":__support_fputil_polyeval", - ":__support_integer_literals", - ], -) - libc_support_library( name = "atan_utils", hdrs = ["src/math/generic/atan_utils.h"], @@ -2095,6 +2079,20 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_acosf", + hdrs = ["src/__support/math/acosf.h"], + deps = [ + ":__support_math_inv_trigf_utils", + ":__support_fputil_except_value_utils", + ":__support_fputil_fp_bits", + ":__support_fputil_multiply_add", + ":__support_fputil_polyeval", + ":__support_fputil_sqrt", + ":__support_macros_optimization", + ], +) + libc_support_library( name = "__support_math_asin_utils", hdrs = ["src/__support/math/asin_utils.h"], @@ -2177,6 +2175,16 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_inv_trigf_utils", + hdrs = ["src/__support/math/inv_trigf_utils.h"], + deps = [ + ":__support_fputil_multiply_add", + ":__support_fputil_polyeval", + ":__support_common", + ], +) + libc_support_library( name = "__support_math_frexpf16", hdrs = ["src/__support/math/frexpf16.h"], @@ -2596,13 +2604,7 @@ libc_math_function( libc_math_function( name = "acosf", additional_deps = [ - ":__support_fputil_fma", - ":__support_fputil_multiply_add", - ":__support_fputil_nearest_integer", - ":__support_fputil_polyeval", - ":__support_fputil_sqrt", - ":__support_macros_optimization", - ":inv_trigf_utils", + ":__support_math_acosf", ], ) @@ -2616,7 +2618,7 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_sqrt", ":__support_macros_optimization", - ":inv_trigf_utils", + ":__support_math_inv_trigf_utils", ], ) @@ -2644,7 +2646,7 @@ libc_math_function( ":__support_fputil_sqrt", ":__support_macros_optimization", ":__support_macros_properties_cpu_features", - ":inv_trigf_utils", + ":__support_math_inv_trigf_utils", ], ) @@ -2658,7 +2660,7 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_sqrt", ":__support_macros_optimization", - ":inv_trigf_utils", + ":__support_math_inv_trigf_utils", ], ) @@ -2685,7 +2687,7 @@ libc_math_function( ":__support_fputil_polyeval", ":__support_fputil_rounding_mode", ":__support_macros_optimization", - ":inv_trigf_utils", + ":__support_math_inv_trigf_utils", ], ) @@ -2704,7 +2706,7 @@ libc_math_function( additional_deps = [ ":__support_fputil_double_double", ":__support_fputil_nearest_integer", - ":inv_trigf_utils", + ":__support_math_inv_trigf_utils", ], )