66
77#include "nbl/builtin/hlsl/cpp_compat.hlsl"
88#include "nbl/builtin/hlsl/numbers.hlsl"
9- #include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl"
10- #include "nbl/builtin/hlsl/concepts/vector.hlsl"
11- #include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl"
12- #include "nbl/builtin/hlsl/ieee754.hlsl"
9+ #include "nbl/builtin/hlsl/tgmath.hlsl"
10+ #include "nbl/builtin/hlsl/complex.hlsl"
1311
1412namespace nbl
1513{
@@ -18,107 +16,59 @@ namespace hlsl
1816namespace math
1917{
2018
21- namespace impl
22- {
19+ template <typename T>
2320struct sincos_accumulator
2421{
25- using this_t = sincos_accumulator;
22+ using this_t = sincos_accumulator<T> ;
2623
2724 static this_t create ()
2825 {
2926 this_t retval;
30- retval.tmp0 = 0 ;
31- retval.tmp1 = 0 ;
32- retval.tmp2 = 0 ;
33- retval.tmp3 = 0 ;
34- retval.tmp4 = 0 ;
35- retval.tmp5 = 0 ;
27+ retval.runningSum = complex_t<T>::create (T (1.0 ), T (0.0 ));
3628 return retval;
3729 }
3830
39- static this_t create (float cosA, float cosB, float cosC, float sinA, float sinB, float sinC )
31+ static this_t create (T cosA)
4032 {
4133 this_t retval;
42- retval.tmp0 = cosA;
43- retval.tmp1 = cosB;
44- retval.tmp2 = cosC;
45- retval.tmp3 = sinA;
46- retval.tmp4 = sinB;
47- retval.tmp5 = sinC;
34+ retval.runningSum = complex_t<T>::create (cosA, T (0 ));
4835 return retval;
4936 }
5037
51- float getArccosSumofABC_minus_PI ( )
38+ void addCosine (T cosA, T biasA )
5239 {
53- const bool AltminusB = tmp0 < (-tmp1);
54- const float cosSumAB = tmp0 * tmp1 - tmp3 * tmp4;
55- const bool ABltminusC = cosSumAB < (-tmp2);
56- const bool ABltC = cosSumAB < tmp2;
57- // apply triple angle formula
58- const float absArccosSumABC = acos<float >(clamp <float >(cosSumAB * tmp2 - (tmp0 * tmp4 + tmp3 * tmp1) * tmp5, -1.f , 1.f ));
59- return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi<float > : (-numbers::pi<float >));
40+ const T bias = biasA + runningSum.imag ();
41+ const T a = cosA;
42+ const T b = runningSum.real ();
43+ const bool reverse = abs<T>(min <T>(a, b)) > max <T>(a, b);
44+ const T c = a * b - sqrt<T>((T (1.0 ) - a * a) * (T (1.0 ) - b * b));
45+
46+ runningSum.real (ieee754::flipSign<T>(c, reverse));
47+ runningSum.imag (hlsl::mix (bias, bias + numbers::pi<T>, reverse));
48+ }
49+ void addCosine (T cosA)
50+ {
51+ addCosine (cosA, T (0.0 ));
6052 }
6153
62- static void combineCosForSumOfAcos ( float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG ( float ) out0, NBL_REF_ARG ( float ) out1 )
54+ T getSumofArccos ( )
6355 {
64- const float bias = biasA + biasB;
65- const float a = cosA;
66- const float b = cosB;
67- const bool reverse = abs<float >(min <float >(a, b)) > max <float >(a, b);
68- const float c = a * b - sqrt<float >((1.0f - a * a) * (1.0f - b * b));
56+ return acos<T>(runningSum.real ()) + runningSum.imag ();
57+ }
6958
70- if (reverse)
71- {
72- out0 = -c;
73- out1 = bias + numbers::pi<float >;
74- }
75- else
76- {
77- out0 = c;
78- out1 = bias;
79- }
59+ static T getArccosSumofABC_minus_PI (T cosA, T cosB, T cosC, T sinA, T sinB, T sinC)
60+ {
61+ const bool AltminusB = cosA < (-cosB);
62+ const T cosSumAB = cosA * cosB - sinA * sinB;
63+ const bool ABltminusC = cosSumAB < (-cosC);
64+ const bool ABltC = cosSumAB < cosC;
65+ // apply triple angle formula
66+ const T absArccosSumABC = acos<T>(clamp <T>(cosSumAB * cosC - (cosA * sinB + sinA * cosB) * sinC, T (-1.0 ), T (1.0 )));
67+ return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi<T> : (-numbers::pi<T>));
8068 }
8169
82- float tmp0;
83- float tmp1;
84- float tmp2;
85- float tmp3;
86- float tmp4;
87- float tmp5;
70+ complex_t<T> runningSum;
8871};
89- }
90-
91- float getArccosSumofABC_minus_PI (float cosA, float cosB, float cosC, float sinA, float sinB, float sinC)
92- {
93- impl::sincos_accumulator acc = impl::sincos_accumulator::create (cosA, cosB, cosC, sinA, sinB, sinC);
94- return acc.getArccosSumofABC_minus_PI ();
95- }
96-
97- void combineCosForSumOfAcos (float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG (float ) out0, NBL_REF_ARG (float ) out1)
98- {
99- impl::sincos_accumulator acc = impl::sincos_accumulator::create ();
100- impl::sincos_accumulator::combineCosForSumOfAcos (cosA, cosB, biasA, biasB, acc.tmp0, acc.tmp1);
101- out0 = acc.tmp0;
102- out1 = acc.tmp1;
103- }
104-
105- // returns acos(a) + acos(b)
106- float getSumofArccosAB (float cosA, float cosB)
107- {
108- impl::sincos_accumulator acc = impl::sincos_accumulator::create ();
109- impl::sincos_accumulator::combineCosForSumOfAcos (cosA, cosB, 0.0f , 0.0f , acc.tmp0, acc.tmp1);
110- return acos<float >(acc.tmp0) + acc.tmp1;
111- }
112-
113- // returns acos(a) + acos(b) + acos(c) + acos(d)
114- float getSumofArccosABCD (float cosA, float cosB, float cosC, float cosD)
115- {
116- impl::sincos_accumulator acc = impl::sincos_accumulator::create ();
117- impl::sincos_accumulator::combineCosForSumOfAcos (cosA, cosB, 0.0f , 0.0f , acc.tmp0, acc.tmp1);
118- impl::sincos_accumulator::combineCosForSumOfAcos (cosC, cosD, 0.0f , 0.0f , acc.tmp2, acc.tmp3);
119- impl::sincos_accumulator::combineCosForSumOfAcos (acc.tmp0, acc.tmp2, acc.tmp1, acc.tmp3, acc.tmp4, acc.tmp5);
120- return acos<float >(acc.tmp4) + acc.tmp5;
121- }
12272
12373}
12474}
0 commit comments