diff --git a/3rdparty/EGL b/3rdparty/EGL new file mode 160000 index 0000000000..9eeb49d626 --- /dev/null +++ b/3rdparty/EGL @@ -0,0 +1 @@ +Subproject commit 9eeb49d626cb1d78c76149602b87aef6720e9c3d diff --git a/3rdparty/SPIRV-Headers b/3rdparty/SPIRV-Headers new file mode 160000 index 0000000000..b8047fbe45 --- /dev/null +++ b/3rdparty/SPIRV-Headers @@ -0,0 +1 @@ +Subproject commit b8047fbe45f426f5918fadc67e8408f5b108c3c9 diff --git a/3rdparty/SPIRV-Tools b/3rdparty/SPIRV-Tools new file mode 160000 index 0000000000..75e53b9f68 --- /dev/null +++ b/3rdparty/SPIRV-Tools @@ -0,0 +1 @@ +Subproject commit 75e53b9f685830ac42242cf0c46cc9af523bd0df diff --git a/3rdparty/radeonrays b/3rdparty/radeonrays new file mode 160000 index 0000000000..e42145aa3b --- /dev/null +++ b/3rdparty/radeonrays @@ -0,0 +1 @@ +Subproject commit e42145aa3b1bedd3a3bb0157a873e561aabb8632 diff --git a/include/nbl/builtin/hlsl/math/complex.hlsl b/include/nbl/builtin/hlsl/math/complex.hlsl new file mode 100644 index 0000000000..0dea4b5c6f --- /dev/null +++ b/include/nbl/builtin/hlsl/math/complex.hlsl @@ -0,0 +1,132 @@ + +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_MATH_COMPLEX_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_COMPLEX_INCLUDED_ + +#include + + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ + + +template +struct complex_t +{ + // TODO: do it properly as an `exp` and `exp2` overload/shadow func + complex_t expImaginary(in float _theta) + { + complex_t result; + result.real = cos(_theta); + result.imaginary = sin(_theta); + return result; + } + + complex_t operator+(const complex_t other) + { + complex_t result; + result.real = real + other.real; + result.imaginary = imaginary + other.imaginary; + return result; + } + + complex_t operator-(const complex_t other) + { + complex_t result; + result.real = real - other.real; + result.imaginary = imaginary - other.imaginary; + return result; + } + + complex_t operator*(const complex_t other) + { + complex_t result; + result.real = real * other.real - imaginary * other.imaginary; + result.imaginary = real * other.real + imaginary * other.imaginary; + return result; + } + + complex_t conjugate() + { + complex_t result; + result.real = real; + result.imaginary = -imaginary; + return result; + } + + vector_t real, imaginary; +}; + + +// TODO: move to its own header +namespace fft +{ + +template +complex_t twiddle(in uint k, in float N) +{ + complex_t retval; + retval.x = cos(-2.f*PI*float(k)/N); + retval.y = sqrt(1.f-retval.x*retval.x); // twiddle is always half the range, so no conditional -1.f needed + return retval; +} + +template +complex_t twiddle(in uint k, in uint logTwoN) +{ + return twiddle(k,float(1u< +complex_t twiddle(in bool is_inverse, in uint k, in float N) +{ + complex_t twiddle = twiddle(k,N); + if (is_inverse) + return twiddle.conjugate; + return twiddle; +} + +template +complex_t twiddle(in bool is_inverse, in uint k, in uint logTwoN) +{ + return twiddle(is_inverse,k,float(1u< +void DIT_radix2(in complex_t twiddle, inout complex_t lo, inout complex_t hi) +{ + complex_t wHi = hi * twiddle; + hi = lo-wHi; + lo += wHi; +} + +// decimation in frequency +template +void DIF_radix2(in complex_t twiddle, inout complex_t lo, inout complex_t hi) +{ + complex_t diff = lo-hi; + lo += hi; + hi = diff * twiddle; +} + + +} + +// TODO: radices 4,8 and 16 + +} +} +} + + + +#endif diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl new file mode 100644 index 0000000000..6cf712c8a2 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl @@ -0,0 +1,104 @@ + +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_GLSL_MATH_QUATERNIONS_INCLUDED_ +#define _NBL_BUILTIN_GLSL_MATH_QUATERNIONS_INCLUDED_ + + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ + + +struct quaternion_t +{ + float4 data; + + + static quaternion_t constructFromTruncated(in float3 first3Components) + { + quaternion_t quat; + quat.data.xyz = first3Components; + quat.data.w = sqrt(1.0-dot(first3Components,first3Components)); + return quat; + } + + static quaternion_t lerp(in quaternion_t start, in quaternion_t end, in float fraction, in float totalPseudoAngle) + { + const uint negationMask = asuint(totalPseudoAngle) & 0x80000000u; + const float4 adjEnd = asfloat(asuint(end.data)^negationMask); + + quaternion_t quat; + quat.data = lerp(start.data, adjEnd, fraction); + return quat; + } + static quaternion_t lerp(in quaternion_t start, in quaternion_t end, in float fraction) + { + return lerp(start,end,fraction,dot(start.data,end.data)); + } + + static float flerp_impl_adj_interpolant(in float angle, in float fraction, in float interpolantPrecalcTerm2, in float interpolantPrecalcTerm3) + { + const float A = 1.0904f + angle * (-3.2452f + angle * (3.55645f - angle * 1.43519f)); + const float B = 0.848013f + angle * (-1.06021f + angle * 0.215638f); + const float k = A * interpolantPrecalcTerm2 + B; + return fraction+interpolantPrecalcTerm3*k; + } + + static quaternion_t flerp(in quaternion_t start, in quaternion_t end, in float fraction) + { + const float pseudoAngle = dot(start.data,end.data); + + const float interpolantPrecalcTerm = fraction-0.5f; + const float interpolantPrecalcTerm3 = fraction*interpolantPrecalcTerm*(fraction-1.f); + const float adjFrac = quaternion_t::flerp_impl_adj_interpolant(abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3); + quaternion_t quat = quaternion_t::lerp(start,end,adjFrac,pseudoAngle); + quat.data = normalize(quat.data); + return quat; + } + + static float3x3 constructMatrix(in quaternion_t quat) + { + float3x3 mat; + mat[0] = quat.data.yzx*quat.data.ywz+quat.data.zxy*quat.data.zyw*float3( 1.f, 1.f,-1.f); + mat[1] = quat.data.yzx*quat.data.xzw+quat.data.zxy*quat.data.wxz*float3(-1.f, 1.f, 1.f); + mat[2] = quat.data.yzx*quat.data.wyx+quat.data.zxy*quat.data.xwy*float3( 1.f,-1.f, 1.f); + mat[0][0] = 0.5f-mat[0][0]; + mat[1][1] = 0.5f-mat[1][1]; + mat[2][2] = 0.5f-mat[2][2]; + mat *= 2.f; + return mat; + } +}; + +float3 slerp_delta_impl(in float3 start, in float3 preScaledWaypoint, in float cosAngleFromStart) +{ + float3 planeNormal = cross(start,preScaledWaypoint); + + cosAngleFromStart *= 0.5; + const float sinAngle = sqrt(0.5-cosAngleFromStart); + const float cosAngle = sqrt(0.5+cosAngleFromStart); + + planeNormal *= sinAngle; + const float3 precompPart = cross(planeNormal,start)*2.0; + + return precompPart*cosAngle+cross(planeNormal,precompPart); +} + +float3 slerp_impl_impl(in float3 start, in float3 preScaledWaypoint, in float cosAngleFromStart) +{ + return start + slerp_delta_impl(start,preScaledWaypoint,cosAngleFromStart); +} + + + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/random/xoroshiro.hlsl b/include/nbl/builtin/hlsl/random/xoroshiro.hlsl index 6cf3eff842..38bfa60f41 100644 --- a/include/nbl/builtin/hlsl/random/xoroshiro.hlsl +++ b/include/nbl/builtin/hlsl/random/xoroshiro.hlsl @@ -1,79 +1,79 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _NBL_BUILTIN_HLSL_RANDOM_XOROSHIRO_HLSL_INCLUDED_ -#define _NBL_BUILTIN_HLSL_RANDOM_XOROSHIRO_HLSL_INCLUDED_ - -#include - -#include - -namespace nbl -{ -namespace hlsl -{ -// TODO -//namespace random -//{ - -struct Xoroshiro64StateHolder -{ - void xoroshiro64_state_advance() - { - state[1] ^= state[0]; - state[0] = rotl(state[0], 26u) ^ state[1] ^ (state[1]<<9u); // a, b - state[1] = rotl(state[1], 13u); // c - } - - uint32_t2 state; -}; - -struct Xoroshiro64Star -{ - using seed_type = uint32_t2; - - // TODO: create - static Xoroshiro64Star construct(const seed_type initialState) - { - Xoroshiro64StateHolder stateHolder = {initialState}; - return Xoroshiro64Star(stateHolder); - } - - uint32_t operator()() - { - const uint32_t result = stateHolder.state[0]*0x9E3779BBu; - stateHolder.xoroshiro64_state_advance(); - - return result; - } - - Xoroshiro64StateHolder stateHolder; -}; - -struct Xoroshiro64StarStar -{ - using seed_type = uint32_t2; - - // TODO: create - static Xoroshiro64StarStar construct(const seed_type initialState) - { - Xoroshiro64StateHolder stateHolder = {initialState}; - return Xoroshiro64StarStar(stateHolder); - } - - uint32_t operator()() - { - const uint32_t result = rotl(stateHolder.state[0]*0x9E3779BBu,5u)*5u; - stateHolder.xoroshiro64_state_advance(); - - return result; - } - - Xoroshiro64StateHolder stateHolder; -}; - -//} -} -} - +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_RANDOM_XOROSHIRO_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_RANDOM_XOROSHIRO_HLSL_INCLUDED_ + +#include + +#include + +namespace nbl +{ +namespace hlsl +{ +// TODO +//namespace random +//{ + +struct Xoroshiro64StateHolder +{ + void xoroshiro64_state_advance() + { + state[1] ^= state[0]; + state[0] = rotl(state[0], 26u) ^ state[1] ^ (state[1]<<9u); // a, b + state[1] = rotl(state[1], 13u); // c + } + + uint32_t2 state; +}; + +struct Xoroshiro64Star +{ + using seed_type = uint32_t2; + + // TODO: create + static Xoroshiro64Star construct(const seed_type initialState) + { + Xoroshiro64StateHolder stateHolder = {initialState}; + return Xoroshiro64Star(stateHolder); + } + + uint32_t operator()() + { + const uint32_t result = stateHolder.state[0]*0x9E3779BBu; + stateHolder.xoroshiro64_state_advance(); + + return result; + } + + Xoroshiro64StateHolder stateHolder; +}; + +struct Xoroshiro64StarStar +{ + using seed_type = uint32_t2; + + // TODO: create + static Xoroshiro64StarStar construct(const seed_type initialState) + { + Xoroshiro64StateHolder stateHolder = {initialState}; + return Xoroshiro64StarStar(stateHolder); + } + + uint32_t operator()() + { + const uint32_t result = rotl(stateHolder.state[0]*0x9E3779BBu,5u)*5u; + stateHolder.xoroshiro64_state_advance(); + + return result; + } + + Xoroshiro64StateHolder stateHolder; +}; + +//} +} +} + #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/shapes/frustum.hlsl b/include/nbl/builtin/hlsl/shapes/frustum.hlsl new file mode 100644 index 0000000000..5ca185e2f4 --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/frustum.hlsl @@ -0,0 +1,74 @@ + +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_FRUSTUM_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_FRUSTUM_INCLUDED_ + +#include + + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + + +struct Frustum_t +{ + float3x4 minPlanes; + float3x4 maxPlanes; + + + // gives false negatives + static bool fastestDoesNotIntersectAABB(in Frustum_t frust, in AABB_t aabb) + { + #define getClosestDP(R) (dot(aabb.getFarthestPointInFront(R.xyz), R.xyz) + R.w) + if (getClosestDP(frust.minPlanes[0])<=0.f) + return true; + if (getClosestDP(frust.minPlanes[1])<=0.f) + return true; + if (getClosestDP(frust.minPlanes[2])<=0.f) + return true; + + if (getClosestDP(frust.maxPlanes[0])<=0.f) + return true; + if (getClosestDP(frust.maxPlanes[1])<=0.f) + return true; + + return getClosestDP(frust.maxPlanes[2])<=0.f; + #undef getClosestDP + } + + + + // will place planes which correspond to the bounds in NDC + static Frustum_t extract(in float4x4 proj, in AABB_t bounds) + { + const float4x4 pTpose = transpose(proj); + + Frustum_t frust; + frust.minPlanes = (float3x4)(pTpose) - float3x4(pTpose[3]*bounds.minVx[0], pTpose[3]*bounds.minVx[1], pTpose[3]*bounds.minVx[2]); + frust.maxPlanes = float3x4(pTpose[3]*bounds.maxVx[0], pTpose[3]*bounds.maxVx[1], pTpose[3]*bounds.maxVx[2]) - (float3x4)(pTpose); + return frust; + } + + // assuming an NDC of [-1,1]^2 x [0,1] + static Frustum_t extract(in float4x4 proj) + { + AABB_t bounds; + bounds.minVx = float3(-1.f,-1.f,0.f); + bounds.maxVx = float3(1.f,1.f,1.f); + return extract(proj, bounds); + } +}; + + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/shapes/rectangle.hlsl b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl new file mode 100644 index 0000000000..3fe5cb4fec --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/rectangle.hlsl @@ -0,0 +1,43 @@ + +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_RECTANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_RECTANGLE_INCLUDED_ + +#include + + +namespace nbl +{ +namespace hlsl +{ +namespace shapes +{ + + +float3 getSphericalRectangle(in float3 observer, in float3 rectangleOrigin, in float3x3 rectangleNormalBasis) +{ + return mul((rectangleOrigin-observer), rectangleNormalBasis); +} + +float SolidAngleOfRectangle(in float3 r0, in float2 rectangleExtents) +{ + const float4 denorm_n_z = float4(-r0.y, r0.x+rectangleExtents.x, r0.y+rectangleExtents.y, -r0.x); + const float4 n_z = denorm_n_z*rsqrt(float4(r0.z*r0.z,r0.z*r0.z,r0.z*r0.z,r0.z*r0.z)+denorm_n_z*denorm_n_z); + const float4 cosGamma = float4( + -n_z[0]*n_z[1], + -n_z[1]*n_z[2], + -n_z[2]*n_z[3], + -n_z[3]*n_z[0] + ); + return math::getSumofArccosABCD(cosGamma[0], cosGamma[1], cosGamma[2], cosGamma[3]) - 2 * math::PI; +} + + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/shapes/triangle.hlsl b/include/nbl/builtin/hlsl/shapes/triangle.hlsl new file mode 100644 index 0000000000..134a482739 --- /dev/null +++ b/include/nbl/builtin/hlsl/shapes/triangle.hlsl @@ -0,0 +1,116 @@ + +#ifndef _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SHAPES_TRIANGLE_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ + +namespace shapes +{ + +// +float3x3 getSphericalTriangle(in float3x3 vertices, in float3 origin) +{ + float3x3 tp_vertices = transpose(vertices); + + // the `normalize` cannot be optimized out + return float3x3(normalize(tp_vertices[0]-origin), normalize(tp_vertices[1]-origin), normalize(tp_vertices[2]-origin)); +} + +// returns true if pyramid degenerated into a line +bool SphericalTrianglePyramidAngles(in float3x3 sphericalVertices, out float3 cos_sides, out float3 csc_sides) +{ + float3x3 tp_sphericalVertices = transpose(sphericalVertices); + // The sides are denoted by lower-case letters a, b, and c. + // On the unit sphere their lengths are numerically equal to the radian measure of the angles that the great circle arcs subtend at the centre. + // The sides of proper spherical triangles are (by convention) less than PI + cos_sides = float3(dot(tp_sphericalVertices[1],tp_sphericalVertices[2]), + dot(tp_sphericalVertices[2],tp_sphericalVertices[0]), + dot(tp_sphericalVertices[0],tp_sphericalVertices[1])); + csc_sides = rsqrt(float3(1.f,1.f,1.f)-cos_sides*cos_sides); + return any((csc_sides >= float3(FLT_MAX,FLT_MAX,FLT_MAX))); +} + +// returns solid angle of a spherical triangle, this function is beyond optimized. +float SolidAngleOfTriangle(in float3x3 sphericalVertices, out float3 cos_vertices, out float3 sin_vertices, out float cos_a, out float cos_c, out float csc_b, out float csc_c) +{ + float3 cos_sides,csc_sides; + if (SphericalTrianglePyramidAngles(sphericalVertices,cos_sides,csc_sides)) + return 0.f; + + // these variables might eventually get optimized out + cos_a = cos_sides[0]; + cos_c = cos_sides[2]; + csc_b = csc_sides[1]; + csc_c = csc_sides[2]; + + // Both vertices and angles at the vertices are denoted by the same upper case letters A, B, and C. The angles A, B, C of the triangle are equal to the angles between the planes that intersect the surface of the sphere or, equivalently, the angles between the tangent vectors of the great circle arcs where they meet at the vertices. Angles are in radians. The angles of proper spherical triangles are (by convention) less than PI + cos_vertices = clamp((cos_sides-cos_sides.yzx*cos_sides.zxy)*csc_sides.yzx*csc_sides.zxy,float3(-1.f,-1.f,-1.f),float3(1.f,1.f,1.f)); + // using Spherical Law of Cosines (TODO: do we need to clamp anymore? since the pyramid angles method introduction?) + sin_vertices = sqrt(float3(1.f,1.f,1.f)-cos_vertices*cos_vertices); + + // the solid angle of a triangle is the sum of its planar vertices' angles minus PI + return math::getArccosSumofABC_minus_PI(cos_vertices[0],cos_vertices[1],cos_vertices[2],sin_vertices[0],sin_vertices[1],sin_vertices[2]); +} +float SolidAngleOfTriangle(in float3x3 sphericalVertices) +{ + float3 dummy0,dummy1; + float dummy2,dummy3,dummy4,dummy5; + return SolidAngleOfTriangle(sphericalVertices,dummy0,dummy1,dummy2,dummy3,dummy4,dummy5); +} +// returns solid angle of a triangle given by its world-space vertices and world-space viewing position +float SolidAngleOfTriangle(in float3x3 vertices, in float3 origin) +{ + return SolidAngleOfTriangle(getSphericalTriangle(vertices,origin)); +} + + +// return projected solid angle of a spherical triangle +float ProjectedSolidAngleOfTriangle(in float3x3 sphericalVertices, in float3 receiverNormal, out float3 cos_sides, out float3 csc_sides, out float3 cos_vertices) +{ + if (SphericalTrianglePyramidAngles(sphericalVertices,cos_sides,csc_sides)) + return 0.f; + + float3x3 tp_sphericalVertices = transpose(sphericalVertices); + + const float3x3 awayFromEdgePlane = float3x3( + cross(tp_sphericalVertices[1],tp_sphericalVertices[2])*csc_sides[0], + cross(tp_sphericalVertices[2],tp_sphericalVertices[0])*csc_sides[1], + cross(tp_sphericalVertices[0],tp_sphericalVertices[1])*csc_sides[2] + ); + + float3x3 tp_awayFromEdgePlane = transpose(awayFromEdgePlane); + + // useless here but could be useful somewhere else + cos_vertices[0] = dot(tp_awayFromEdgePlane[1],tp_awayFromEdgePlane[2]); + cos_vertices[1] = dot(tp_awayFromEdgePlane[2],tp_awayFromEdgePlane[0]); + cos_vertices[2] = dot(tp_awayFromEdgePlane[0],tp_awayFromEdgePlane[1]); + // TODO: above dot products are in the wrong order, either work out which is which, or try all 6 permutations till it works + cos_vertices = clamp((cos_sides-cos_sides.yzx*cos_sides.zxy)*csc_sides.yzx*csc_sides.zxy,float3(-1.f,-1.f,-1.f),float3(1.f,1.f,1.f)); + + const float3 externalProducts = abs(mul(tp_awayFromEdgePlane, receiverNormal)); + + const float3 pyramidAngles = acos(cos_sides); + return dot(pyramidAngles,externalProducts)/(2.f * math::PI); +} +float ProjectedSolidAngleOfTriangle(in float3x3 sphericalVertices, in float3 receiverNormal, out float3 cos_sides, out float3 csc_sides) +{ + float3 cos_vertices; + return ProjectedSolidAngleOfTriangle(sphericalVertices,receiverNormal,cos_sides,csc_sides,cos_vertices); +} +float ProjectedSolidAngleOfTriangle(in float3x3 sphericalVertices, in float3 receiverNormal) +{ + float3 cos_sides,csc_sides; + return ProjectedSolidAngleOfTriangle(sphericalVertices,receiverNormal,cos_sides,csc_sides); +} + +} +} +} +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/utils/compressed_normal_matrix_t.hlsl b/include/nbl/builtin/hlsl/utils/compressed_normal_matrix_t.hlsl new file mode 100644 index 0000000000..6614ea4b83 --- /dev/null +++ b/include/nbl/builtin/hlsl/utils/compressed_normal_matrix_t.hlsl @@ -0,0 +1,41 @@ + +#ifndef _NBL_BUILTIN_HLSL_UTILS_COMPRESSED_NORMAL_MATRIX_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_UTILS_COMPRESSED_NORMAL_MATRIX_T_INCLUDED_ + +#include "nbl/builtin/hlsl/common.hlsl" +#include "nbl/builtin/hlsl/math/constants.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct CompressedNormalMatrix_t +{ + uint4 data; + + + float3x3 decode(in CompressedNormalMatrix_t compr) + { + float3x3 m; + + const uint4 bottomBits = compr.data & (0x00030003u).xxxx; + const uint firstComp = (bottomBits[3]<<6u)|(bottomBits[2]<<4u)|(bottomBits[1]<<2u)|bottomBits[0]; + m[0].x = unpackSnorm2x16((firstComp>>8u)|firstComp).x; + + const uint4 remaining8Comp = compr.data & (0xFFFCFFFCu).xxxx; + m[0].yz = unpackSnorm2x16(remaining8Comp[0]); + m[1].xy = unpackSnorm2x16(remaining8Comp[1]); + const float2 tmp = unpackSnorm2x16(remaining8Comp[2]); + m[1].z = tmp[0]; + m[2].x = tmp[1]; + m[2].yz = unpackSnorm2x16(remaining8Comp[3]); + + return m; + } +}; + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/utils/culling.hlsl b/include/nbl/builtin/hlsl/utils/culling.hlsl new file mode 100644 index 0000000000..092d2ba539 --- /dev/null +++ b/include/nbl/builtin/hlsl/utils/culling.hlsl @@ -0,0 +1,129 @@ + +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_UTILS_CULLING_INCLUDED_ +#define _NBL_BUILTIN_HLSL_UTILS_CULLING_INCLUDED_ + +#include +#include + + +namespace nbl +{ +namespace hlsl +{ + + +// gives false negatives +bool fastestFrustumCullAABB(in float4x4 proj, in shapes::AABB_t aabb) +{ + const shapes::Frustum_t frust = shapes::Frustum_t::extract(proj); + return shapes::Frustum_t::fastestDoesNotIntersectAABB(frust, aabb); +} + +// gives very few false negatives +bool fastFrustumCullAABB(in float4x4 proj, in float4x4 invProj, in shapes::AABB_t aabb) +{ + if (fastestFrustumCullAABB(proj,aabb)) + return true; + + const shapes::Frustum_t boxInvFrustum = shapes::Frustum_t::extract(invProj); + shapes::AABB_t ndc; + ndc.minVx = float3(-1.f,-1.f,0.f); + ndc.maxVx = float3(1.f,1.f,1.f); + return shapes::Frustum_t::fastestDoesNotIntersectAABB(boxInvFrustum,ndc); +} + +// perfect Separating Axis Theorem, needed for Clustered/Tiled Lighting +bool preciseFrustumCullAABB(in float4x4 proj, in float4x4 invProj, in shapes::AABB_t aabb) +{ + const shapes::Frustum_t viewFrust = shapes::Frustum_t::extract(proj); + if (shapes::Frustum_t::fastestDoesNotIntersectAABB(viewFrust,aabb)) + return true; + + const shapes::Frustum_t boxInvFrustum = shapes::Frustum_t::extract(invProj); + shapes::AABB_t ndc; + ndc.minVx = float3(-1.f,-1.f,0.f); + ndc.maxVx = float3(1.f,1.f,1.f); + if (shapes::Frustum_t::fastestDoesNotIntersectAABB(boxInvFrustum,ndc)) + return true; + + float3 edges[12]; + edges[ 0] = cross(viewFrust.minPlanes[0].xyz,viewFrust.minPlanes[1].xyz); + edges[ 1] = cross(viewFrust.minPlanes[0].xyz,viewFrust.minPlanes[2].xyz); + edges[ 2] = cross(viewFrust.minPlanes[0].xyz,viewFrust.maxPlanes[1].xyz); + edges[ 3] = cross(viewFrust.minPlanes[0].xyz,viewFrust.maxPlanes[2].xyz); + edges[ 4] = cross(viewFrust.minPlanes[1].xyz,viewFrust.minPlanes[0].xyz); + edges[ 5] = cross(viewFrust.minPlanes[1].xyz,viewFrust.minPlanes[2].xyz); + edges[ 6] = cross(viewFrust.minPlanes[1].xyz,viewFrust.maxPlanes[0].xyz); + edges[ 7] = cross(viewFrust.minPlanes[1].xyz,viewFrust.maxPlanes[2].xyz); + edges[ 8] = cross(viewFrust.minPlanes[2].xyz,viewFrust.minPlanes[0].xyz); + edges[ 0] = cross(viewFrust.minPlanes[2].xyz,viewFrust.minPlanes[1].xyz); + edges[10] = cross(viewFrust.minPlanes[2].xyz,viewFrust.maxPlanes[0].xyz); + edges[11] = cross(viewFrust.minPlanes[2].xyz,viewFrust.maxPlanes[1].xyz); + for (int i=0; i<12; i++) + { +#define getClosestDP(R) (dot(ndc.getFarthestPointInFront(R.xyz),R.xyz)+R.w) + /* TODO: These are buggy! + // cross(e_0,edges[i]) + { + const float2 normal = float2(-edges[i].z,edges[i].y); + const bool2 negMask = lessThan(normal,float2(0.f)); + const float4 planeBase = normal.x*invProj[1]+normal.y*invProj[2]; + + const float minAABB = dot(lerp(aabb.minVx.yz,aabb.maxVx.yz,negMask),normal); + const float4 minPlane = planeBase-invProj[3]*minAABB; + if (getClosestDP(minPlane)<=0.f) + return true; + const float maxAABB = dot(lerp(aabb.maxVx.yz,aabb.minVx.yz,negMask),normal); + const float4 maxPlane = invProj[3]*maxAABB-planeBase; + if (getClosestDP(maxPlane)<=0.f) + return true; + } + // cross(e_1,edges[i]) + { + const float2 normal = float2(-edges[i].x,edges[i].z); + const bool2 negMask = lessThan(normal,float2(0.f)); + const float4 planeBase = normal.x*invProj[0]+normal.y*invProj[2]; + const float minAABB = dot(lerp(aabb.minVx.xz,aabb.maxVx.xz,negMask),normal); + const float4 minPlane = planeBase-invProj[3]*minAABB; + if (getClosestDP(minPlane)<=0.f) + return true; + const float maxAABB = dot(lerp(aabb.maxVx.xz,aabb.minVx.xz,negMask),normal); + const float4 maxPlane = invProj[3]*maxAABB-planeBase; + if (getClosestDP(maxPlane)<=0.f) + return true; + } the last one is probably buggy too*/ + // cross(e_2,edges[i]) + { + const float2 normal = float2(-edges[i].y,edges[i].x); + const bool2 negMask = normal < (0.0f).xx; + const float4 planeBase = normal.x*invProj[0]+normal.y*invProj[1]; + + const float minAABB = dot(lerp(aabb.minVx.xy,aabb.maxVx.xy,negMask),normal); + const float4 minPlane = planeBase-invProj[3]*minAABB; + if (getClosestDP(minPlane)<=0.f) + return true; + const float maxAABB = dot(lerp(aabb.maxVx.xy,aabb.minVx.xy,negMask),normal); + const float4 maxPlane = invProj[3]*maxAABB-planeBase; + if (getClosestDP(maxPlane)<=0.f) + return true; + } +#undef getClosestDP + } + return false; +} + +// TODO: Other culls useful for clustered lighting +// - Sphere vs Frustum +// - Convex Infinite Cone vs Frustum +// - Concave Infinite Cone vs Frustum (! is frustum inside of an convex infinite cone with half angle PI-theta) + + +} +} + + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/utils/normal_decode.hlsl b/include/nbl/builtin/hlsl/utils/normal_decode.hlsl new file mode 100644 index 0000000000..0d9a1bd8ae --- /dev/null +++ b/include/nbl/builtin/hlsl/utils/normal_decode.hlsl @@ -0,0 +1,33 @@ + +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_UTILS_NORMAL_DECODE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_UTILS_NORMAL_DECODE_INCLUDED_ + +#include "nbl/builtin/hlsl/math/constants.hlsl" + +#include "nbl/builtin/hlsl/utils/compressed_normal_matrix_t.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace normal_decode +{ + + +float3 signedSpherical(in float2 enc) +{ + float ang = enc.x * math::PI; + return float3(float2(cos(ang),sin(ang))*sqrt(1.0-enc.y*enc.y), enc.y); +} + + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/utils/normal_encode.hlsl b/include/nbl/builtin/hlsl/utils/normal_encode.hlsl new file mode 100644 index 0000000000..585d91425f --- /dev/null +++ b/include/nbl/builtin/hlsl/utils/normal_encode.hlsl @@ -0,0 +1,27 @@ + +#ifndef _NBL_BUILTIN_HLSL_UTILS_NORMAL_ENCODE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_UTILS_NORMAL_ENCODE_INCLUDED_ + +#include "nbl/builtin/hlsl/math/constants.hlsl" +#include "nbl/builtin/hlsl/utils/compressed_normal_matrix_t.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace normal_encode +{ + + +float2 signedSpherical(in float3 n) +{ + return float2(atan2(n.y,n.x)/math::PI, n.z); +} + + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/utils/surface_transform.hlsl b/include/nbl/builtin/hlsl/utils/surface_transform.hlsl new file mode 100644 index 0000000000..2bc7a7573d --- /dev/null +++ b/include/nbl/builtin/hlsl/utils/surface_transform.hlsl @@ -0,0 +1,177 @@ + +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_INCLUDED_ + +#include "nbl/builtin/hlsl/utils/surface_transform_e.h" + + +namespace nbl +{ +namespace hlsl +{ +namespace surface_transform +{ + + +//! Use this function to apply the INVERSE of swapchain tranformation to the screenspace coordinate `coord` +//! For example when the device orientation is 90°CW then this transforms the point 90°CCW. +//! Usecase = [Gather]: +//! Applications such as raytracing in shaders where you would want to generate rays from screen space coordinates. +//! Warnings: +//! - You don't need to consider this using in your raytracing shaders if you apply the forward transformation to your projection matrix. +//! - Be aware that almost always you'd want to do a single transform in your rendering pipeline. +int2 applyInverseToScreenSpaceCoordinate(in uint swapchainTransform, in int2 coord, in int2 screenSize) +{ + int2 lastTexel = screenSize - (1).xx; + switch (swapchainTransform) + { + case IDENTITY: + return coord; + case ROTATE_90: + return int2(lastTexel.y - coord.y, coord.x); + case ROTATE_180: + return int2(lastTexel) - coord; + case ROTATE_270: + return int2(coord.y, lastTexel.x - coord.x); + case HORIZONTAL_MIRROR: + return int2(lastTexel.x - coord.x, coord.y); + case HORIZONTAL_MIRROR_ROTATE_90: + return lastTexel - coord.yx; + case HORIZONTAL_MIRROR_ROTATE_180: + return int2(coord.x, lastTexel.y - coord.y); + case HORIZONTAL_MIRROR_ROTATE_270: + return coord.yx; + default: + return (0).xx; + } +} + +//! Use this function to apply the swapchain tranformation to the screenspace coordinate `coord` +//! Usecase = [Scatter]: +//! When directly writing to your swapchain using `imageStore` in order to match the orientation of the device relative to it's natural orientation. +//! Warning: Be aware that almost always you'd want to do a single transform in your rendering pipeline. +int2 applyToScreenSpaceCoordinate(in uint swapchainTransform, in int2 coord, in int2 screenSize) +{ + int2 lastTexel = screenSize - (1).xx; + switch (swapchainTransform) + { + case IDENTITY: + return coord; + case ROTATE_90: + return int2(coord.y, lastTexel.x - coord.x); + case ROTATE_180: + return int2(lastTexel) - coord; + case ROTATE_270: + return int2(lastTexel.y - coord.y, coord.x); + case HORIZONTAL_MIRROR: + return int2(lastTexel.x - coord.x, coord.y); + case HORIZONTAL_MIRROR_ROTATE_90: + return coord.yx; + case HORIZONTAL_MIRROR_ROTATE_180: + return int2(coord.x, lastTexel.y - coord.y); + case HORIZONTAL_MIRROR_ROTATE_270: + return lastTexel - coord.yx; + default: + return (0).xx; + } +} + +//! [width,height] might switch to [height, width] in orientations such as 90°CW +//! Usecase: Currently none in the shaders +int2 transformedExtents(in uint swapchainTransform, in int2 screenSize) +{ + switch (swapchainTransform) + { + case IDENTITY: + case HORIZONTAL_MIRROR: + case HORIZONTAL_MIRROR_ROTATE_180: + case ROTATE_180: + return screenSize; + case ROTATE_90: + case ROTATE_270: + case HORIZONTAL_MIRROR_ROTATE_90: + case HORIZONTAL_MIRROR_ROTATE_270: + return screenSize.yx; + default: + return (0).xx; + } +} + +// TODO: surface_transform::transformedDerivatives implementations are untested + +// If rendering directly to the swapchain, dFdx/dFdy operations may be incorrect due to the swapchain +// transform. Use these helper functions to transform the dFdx or dFdy accordingly. + +float2 transformedDerivatives(in uint swapchainTransform, in float2 ddxDdy) +{ + #define OUTPUT_TYPE float2 + #include "nbl/builtin/hlsl/utils/surface_transform_transformedDerivatives.hlsl" + #undef OUTPUT_TYPE +} +float2x2 transformedDerivatives(in uint swapchainTransform, in float2x2 ddxDdy) +{ + #define OUTPUT_TYPE float2x2 + #include "nbl/builtin/hlsl/utils/surface_transform_transformedDerivatives.hlsl" + #undef OUTPUT_TYPE +} +float2x3 transformedDerivatives(in uint swapchainTransform, in float2x3 ddxDdy) +{ + #define OUTPUT_TYPE float2x3 + #include "nbl/builtin/hlsl/utils/surface_transform_transformedDerivatives.hlsl" + #undef OUTPUT_TYPE +} +float2x4 transformedDerivatives(in uint swapchainTransform, in float2x4 ddxDdy) +{ + #define OUTPUT_TYPE float2x4 + #include "nbl/builtin/hlsl/utils/surface_transform_transformedDerivatives.hlsl" + #undef OUTPUT_TYPE +} + +//! Same as `surface_transform::applyToScreenSpaceCoordinate` but in NDC space +//! If rendering to the swapchain, you may use this function to transform the NDC coordinates directly +//! to be fed into gl_Position in vertex shading +//! Warning: Be aware that almost always you'd want to do a single transform in your rendering pipeline. +float2 applyToNDC(in uint swapchainTransform, in float2 ndc) +{ + const float sin90 = 1.0, cos90 = 0.0, + sin180 = 0.0, cos180 = -1.0, + sin270 = -1.0, cos270 = 0.0; + switch (swapchainTransform) + { + case ROTATE_90: + return mul(ndc, float2x2(cos90, -sin90, sin90, cos90)); + + case ROTATE_180: + return mul(ndc, float2x2(cos180, -sin180, sin180, cos180)); + + case ROTATE_270: + return mul(ndc, float2x2(cos270, -sin270, sin270, cos270)); + + case HORIZONTAL_MIRROR: + return mul(ndc, float2x2(-1, 0, 0, 1)); + + case HORIZONTAL_MIRROR_ROTATE_90: + return mul(ndc, float2x2(-cos90, sin90, sin90, cos90)); + + case HORIZONTAL_MIRROR_ROTATE_180: + return mul(ndc, float2x2(-cos180, sin180, sin180, cos180)); + + case HORIZONTAL_MIRROR_ROTATE_270: + return mul(ndc, float2x2(-cos270, sin270, sin270, cos270)); + + default: + return ndc; + } +} + + + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/utils/surface_transform_e.h b/include/nbl/builtin/hlsl/utils/surface_transform_e.h new file mode 100644 index 0000000000..2024ae7c73 --- /dev/null +++ b/include/nbl/builtin/hlsl/utils/surface_transform_e.h @@ -0,0 +1,31 @@ + +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_E_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SURFACE_TRANSFORM_E_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace surface_transform +{ + + +static const uint IDENTITY = 0x00000001; +static const uint ROTATE_90 = 0x00000002; +static const uint ROTATE_180 = 0x00000004; +static const uint ROTATE_270 = 0x00000008; +static const uint HORIZONTAL_MIRROR = 0x00000010; +static const uint HORIZONTAL_MIRROR_ROTATE_90 = 0x00000020; +static const uint HORIZONTAL_MIRROR_ROTATE_180 = 0x00000040; +static const uint HORIZONTAL_MIRROR_ROTATE_270 = 0x00000080; + + +} +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/utils/surface_transform_transformedDerivatives.hlsl b/include/nbl/builtin/hlsl/utils/surface_transform_transformedDerivatives.hlsl new file mode 100644 index 0000000000..e23523f786 --- /dev/null +++ b/include/nbl/builtin/hlsl/utils/surface_transform_transformedDerivatives.hlsl @@ -0,0 +1,29 @@ + +// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + + + +switch (swapchainTransform) +{ + case IDENTITY: + return ddxDdy; + case HORIZONTAL_MIRROR: + return OUTPUT_TYPE(-ddxDdy[0], ddxDdy[1]); + case HORIZONTAL_MIRROR_ROTATE_180: + return OUTPUT_TYPE(ddxDdy[0], -ddxDdy[1]); + case ROTATE_180: + return OUTPUT_TYPE(-ddxDdy[0], -ddxDdy[1]); + case ROTATE_90: + return OUTPUT_TYPE(ddxDdy[1], -ddxDdy[0]); + case ROTATE_270: + return OUTPUT_TYPE(-ddxDdy[1], ddxDdy[0]); + case HORIZONTAL_MIRROR_ROTATE_90: + return OUTPUT_TYPE(ddxDdy[1], ddxDdy[0]); + case HORIZONTAL_MIRROR_ROTATE_270: + return OUTPUT_TYPE(-ddxDdy[1], -ddxDdy[0]); + default: + return (0); +} +