Skip to content

Commit 18b66be

Browse files
committed
make angle adding stuff work better
1 parent 36cd283 commit 18b66be

File tree

2 files changed

+34
-83
lines changed

2 files changed

+34
-83
lines changed

include/nbl/builtin/hlsl/math/angle_adding.hlsl

Lines changed: 33 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@
66

77
#include "nbl/builtin/hlsl/cpp_compat.hlsl"
88
#include "nbl/builtin/hlsl/numbers.hlsl"
9-
#include "nbl/builtin/hlsl/vector_utils/vector_traits.hlsl"
10-
#include "nbl/builtin/hlsl/concepts/vector.hlsl"
11-
#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl"
12-
#include "nbl/builtin/hlsl/ieee754.hlsl"
9+
#include "nbl/builtin/hlsl/tgmath.hlsl"
10+
#include "nbl/builtin/hlsl/complex.hlsl"
1311

1412
namespace nbl
1513
{
@@ -18,107 +16,59 @@ namespace hlsl
1816
namespace math
1917
{
2018

21-
namespace impl
22-
{
19+
template <typename T>
2320
struct sincos_accumulator
2421
{
25-
using this_t = sincos_accumulator;
22+
using this_t = sincos_accumulator<T>;
2623

2724
static this_t create()
2825
{
2926
this_t retval;
30-
retval.tmp0 = 0;
31-
retval.tmp1 = 0;
32-
retval.tmp2 = 0;
33-
retval.tmp3 = 0;
34-
retval.tmp4 = 0;
35-
retval.tmp5 = 0;
27+
retval.runningSum = complex_t<T>::create(T(1.0), T(0.0));
3628
return retval;
3729
}
3830

39-
static this_t create(float cosA, float cosB, float cosC, float sinA, float sinB, float sinC)
31+
static this_t create(T cosA)
4032
{
4133
this_t retval;
42-
retval.tmp0 = cosA;
43-
retval.tmp1 = cosB;
44-
retval.tmp2 = cosC;
45-
retval.tmp3 = sinA;
46-
retval.tmp4 = sinB;
47-
retval.tmp5 = sinC;
34+
retval.runningSum = complex_t<T>::create(cosA, T(0));
4835
return retval;
4936
}
5037

51-
float getArccosSumofABC_minus_PI()
38+
void addCosine(T cosA, T biasA)
5239
{
53-
const bool AltminusB = tmp0 < (-tmp1);
54-
const float cosSumAB = tmp0 * tmp1 - tmp3 * tmp4;
55-
const bool ABltminusC = cosSumAB < (-tmp2);
56-
const bool ABltC = cosSumAB < tmp2;
57-
// apply triple angle formula
58-
const float absArccosSumABC = acos<float>(clamp<float>(cosSumAB * tmp2 - (tmp0 * tmp4 + tmp3 * tmp1) * tmp5, -1.f, 1.f));
59-
return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi<float> : (-numbers::pi<float>));
40+
const T bias = biasA + runningSum.imag();
41+
const T a = cosA;
42+
const T b = runningSum.real();
43+
const bool reverse = abs<T>(min<T>(a, b)) > max<T>(a, b);
44+
const T c = a * b - sqrt<T>((T(1.0) - a * a) * (T(1.0) - b * b));
45+
46+
runningSum.real(ieee754::flipSign<T>(c, reverse));
47+
runningSum.imag(hlsl::mix(bias, bias + numbers::pi<T>, reverse));
48+
}
49+
void addCosine(T cosA)
50+
{
51+
addCosine(cosA, T(0.0));
6052
}
6153

62-
static void combineCosForSumOfAcos(float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG(float) out0, NBL_REF_ARG(float) out1)
54+
T getSumofArccos()
6355
{
64-
const float bias = biasA + biasB;
65-
const float a = cosA;
66-
const float b = cosB;
67-
const bool reverse = abs<float>(min<float>(a, b)) > max<float>(a, b);
68-
const float c = a * b - sqrt<float>((1.0f - a * a) * (1.0f - b * b));
56+
return acos<T>(runningSum.real()) + runningSum.imag();
57+
}
6958

70-
if (reverse)
71-
{
72-
out0 = -c;
73-
out1 = bias + numbers::pi<float>;
74-
}
75-
else
76-
{
77-
out0 = c;
78-
out1 = bias;
79-
}
59+
static T getArccosSumofABC_minus_PI(T cosA, T cosB, T cosC, T sinA, T sinB, T sinC)
60+
{
61+
const bool AltminusB = cosA < (-cosB);
62+
const T cosSumAB = cosA * cosB - sinA * sinB;
63+
const bool ABltminusC = cosSumAB < (-cosC);
64+
const bool ABltC = cosSumAB < cosC;
65+
// apply triple angle formula
66+
const T absArccosSumABC = acos<T>(clamp<T>(cosSumAB * cosC - (cosA * sinB + sinA * cosB) * sinC, T(-1.0), T(1.0)));
67+
return ((AltminusB ? ABltC : ABltminusC) ? (-absArccosSumABC) : absArccosSumABC) + ((AltminusB || ABltminusC) ? numbers::pi<T> : (-numbers::pi<T>));
8068
}
8169

82-
float tmp0;
83-
float tmp1;
84-
float tmp2;
85-
float tmp3;
86-
float tmp4;
87-
float tmp5;
70+
complex_t<T> runningSum;
8871
};
89-
}
90-
91-
float getArccosSumofABC_minus_PI(float cosA, float cosB, float cosC, float sinA, float sinB, float sinC)
92-
{
93-
impl::sincos_accumulator acc = impl::sincos_accumulator::create(cosA, cosB, cosC, sinA, sinB, sinC);
94-
return acc.getArccosSumofABC_minus_PI();
95-
}
96-
97-
void combineCosForSumOfAcos(float cosA, float cosB, float biasA, float biasB, NBL_REF_ARG(float) out0, NBL_REF_ARG(float) out1)
98-
{
99-
impl::sincos_accumulator acc = impl::sincos_accumulator::create();
100-
impl::sincos_accumulator::combineCosForSumOfAcos(cosA, cosB, biasA, biasB, acc.tmp0, acc.tmp1);
101-
out0 = acc.tmp0;
102-
out1 = acc.tmp1;
103-
}
104-
105-
// returns acos(a) + acos(b)
106-
float getSumofArccosAB(float cosA, float cosB)
107-
{
108-
impl::sincos_accumulator acc = impl::sincos_accumulator::create();
109-
impl::sincos_accumulator::combineCosForSumOfAcos(cosA, cosB, 0.0f, 0.0f, acc.tmp0, acc.tmp1);
110-
return acos<float>(acc.tmp0) + acc.tmp1;
111-
}
112-
113-
// returns acos(a) + acos(b) + acos(c) + acos(d)
114-
float getSumofArccosABCD(float cosA, float cosB, float cosC, float cosD)
115-
{
116-
impl::sincos_accumulator acc = impl::sincos_accumulator::create();
117-
impl::sincos_accumulator::combineCosForSumOfAcos(cosA, cosB, 0.0f, 0.0f, acc.tmp0, acc.tmp1);
118-
impl::sincos_accumulator::combineCosForSumOfAcos(cosC, cosD, 0.0f, 0.0f, acc.tmp2, acc.tmp3);
119-
impl::sincos_accumulator::combineCosForSumOfAcos(acc.tmp0, acc.tmp2, acc.tmp1, acc.tmp3, acc.tmp4, acc.tmp5);
120-
return acos<float>(acc.tmp4) + acc.tmp5;
121-
}
12272

12373
}
12474
}

include/nbl/builtin/hlsl/tgmath/impl.hlsl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <nbl/builtin/hlsl/spirv_intrinsics/glsl.std.450.hlsl>
99
#include <nbl/builtin/hlsl/tgmath/output_structs.hlsl>
1010
#include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
11+
#include <nbl/builtin/hlsl/limits.hlsl>
1112
#include <nbl/builtin/hlsl/mpl.hlsl>
1213

1314
// C++ includes

0 commit comments

Comments
 (0)