make angle adding stuff work better

keptsecret · keptsecret · commit 18b66bed5bbe · 2025-10-31T16:07:46.000+07:00
diff --git a/include/nbl/builtin/hlsl/math/angle_adding.hlsl b/include/nbl/builtin/hlsl/math/angle_adding.hlsl
@@ -6,10 +6,8 @@
 
 #include "nbl/builtin/hlsl/cpp_compat.hlsl"
 #include "nbl/builtin/hlsl/numbers.hlsl"
-#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl"
-#include "nbl/builtin/hlsl/concepts/vector.hlsl"
-#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl"
-#include "nbl/builtin/hlsl/ieee754.hlsl"
+#include "nbl/builtin/hlsl/tgmath.hlsl"
+#include "nbl/builtin/hlsl/complex.hlsl"
 
 namespace nbl
 {
@@ -18,107 +16,59 @@ namespace hlsl
 namespace math
 {
 
-namespace impl
-{
+template <typename T>
 struct sincos_accumulator
 {
-    using this_t = sincos_accumulator;
+    using this_t = sincos_accumulator<T>;
 
     static this_t create()
     {
         this_t retval;
-        retval.tmp0 = 0;
-        retval.tmp1 = 0;
-        retval.tmp2 = 0;
-        retval.tmp3 = 0;
-        retval.tmp4 = 0;
-        retval.tmp5 = 0;
+        retval.runningSum = complex_t<T>::create(T(1.0), T(0.0));
         return retval;
     }
 
-    static this_t create(float cosA, float cosB, float cosC, float sinA, float sinB, float sinC)
+    static this_t create(T cosA)
     {
         this_t retval;
-        retval.tmp0 = cosA;
-        retval.tmp1 = cosB;
-        retval.tmp2 = cosC;
-        retval.tmp3 = sinA;
-        retval.tmp4 = sinB;
-        retval.tmp5 = sinC;
+        retval.runningSum = complex_t<T>::create(cosA, T(0));
         return retval;
     }
 
-    float getArccosSumofABC_minus_PI()
+    void addCosine(T cosA, T biasA)
     {
-        const bool AltminusB = tmp0 < (-tmp1);
-        const float cosSumAB = tmp0 * tmp1 - tmp3 * tmp4;
-        const bool ABltminusC = cosSumAB < (-tmp2);
-        const bool ABltC = cosSumAB < tmp2;
-        // apply triple angle formula
-        const float absArccosSumABC = acos<float>(clamp<float>(cosSumAB * tmp2 - (tmp0 * tmp4 + tmp3 * tmp1) * tmp5, -1.f, 1.f));
-        return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi<float> : (-numbers::pi<float>));
+        const T bias = biasA + runningSum.imag();
+        const T a = cosA;
+        const T b = runningSum.real();
+        const bool reverse = abs<T>(min<T>(a, b)) > max<T>(a, b);
+        const T c = a * b - sqrt<T>((T(1.0) - a * a) * (T(1.0) - b * b));
+
+        runningSum.real(ieee754::flipSign<T>(c, reverse));
+        runningSum.imag(hlsl::mix(bias, bias + numbers::pi<T>, reverse));
+    }
+    void addCosine(T cosA)
+    {
+        addCosine(cosA, T(0.0));
     }
 
-    static void combineCosForSumOfAcos(float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG(float) out0, NBL_REF_ARG(float) out1)
+    T getSumofArccos()
     {
-        const float bias = biasA + biasB;
-        const float a = cosA;
-        const float b = cosB;
-        const bool reverse = abs<float>(min<float>(a, b)) > max<float>(a, b);
-        const float c = a * b - sqrt<float>((1.0f - a * a) * (1.0f - b * b));
+        return acos<T>(runningSum.real()) + runningSum.imag();
+    }
 
-        if (reverse)
-        {
-            out0 = -c;
-            out1 = bias + numbers::pi<float>;
-        }
-        else
-        {
-            out0 = c;
-            out1 = bias;
-        }
+    static T getArccosSumofABC_minus_PI(T cosA, T cosB, T cosC, T sinA, T sinB, T sinC)
+    {
+        const bool AltminusB = cosA < (-cosB);
+        const T cosSumAB = cosA * cosB - sinA * sinB;
+        const bool ABltminusC = cosSumAB < (-cosC);
+        const bool ABltC = cosSumAB < cosC;
+        // apply triple angle formula
+        const T absArccosSumABC = acos<T>(clamp<T>(cosSumAB * cosC - (cosA * sinB + sinA * cosB) * sinC, T(-1.0), T(1.0)));
+        return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi<T> : (-numbers::pi<T>));
     }
 
-    float tmp0;
-    float tmp1;
-    float tmp2;
-    float tmp3;
-    float tmp4;
-    float tmp5;
+    complex_t<T> runningSum;
 };
-}
-
-float getArccosSumofABC_minus_PI(float cosA, float cosB, float cosC, float sinA, float sinB, float sinC)
-{
-    impl::sincos_accumulator acc = impl::sincos_accumulator::create(cosA, cosB, cosC, sinA, sinB, sinC);
-    return acc.getArccosSumofABC_minus_PI();
-}
-
-void combineCosForSumOfAcos(float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG(float) out0, NBL_REF_ARG(float) out1)
-{
-    impl::sincos_accumulator acc = impl::sincos_accumulator::create();
-    impl::sincos_accumulator::combineCosForSumOfAcos(cosA, cosB, biasA, biasB, acc.tmp0, acc.tmp1);
-    out0 = acc.tmp0;
-    out1 = acc.tmp1;
-}
-
-// returns acos(a) + acos(b)
-float getSumofArccosAB(float cosA, float cosB)
-{
-    impl::sincos_accumulator acc = impl::sincos_accumulator::create();
-    impl::sincos_accumulator::combineCosForSumOfAcos(cosA, cosB, 0.0f, 0.0f, acc.tmp0, acc.tmp1);
-    return acos<float>(acc.tmp0) + acc.tmp1;
-}
-
-// returns acos(a) + acos(b) + acos(c) + acos(d)
-float getSumofArccosABCD(float cosA, float cosB, float cosC, float cosD)
-{
-    impl::sincos_accumulator acc = impl::sincos_accumulator::create();
-    impl::sincos_accumulator::combineCosForSumOfAcos(cosA, cosB, 0.0f, 0.0f, acc.tmp0, acc.tmp1);
-    impl::sincos_accumulator::combineCosForSumOfAcos(cosC, cosD, 0.0f, 0.0f, acc.tmp2, acc.tmp3);
-    impl::sincos_accumulator::combineCosForSumOfAcos(acc.tmp0, acc.tmp2, acc.tmp1, acc.tmp3, acc.tmp4, acc.tmp5);
-    return acos<float>(acc.tmp4) + acc.tmp5;
-}
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/tgmath/impl.hlsl b/include/nbl/builtin/hlsl/tgmath/impl.hlsl
@@ -8,6 +8,7 @@
 #include <nbl/builtin/hlsl/spirv_intrinsics/glsl.std.450.hlsl>
 #include <nbl/builtin/hlsl/tgmath/output_structs.hlsl>
 #include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
+#include <nbl/builtin/hlsl/limits.hlsl>
 #include <nbl/builtin/hlsl/mpl.hlsl>
 
 // C++ includes