Skip to content

Conversation

devshgraphicsprogramming
Copy link
Member

Description

Merges #439 and #475 keeping commit histories intact.

Testing

nothing we always overwrite changes

TODO list:

Address all comments in #439 and #475

nahiim and others added 30 commits November 7, 2022 07:42
… HLSL shaders and files. Make it capable of validating #include files with depth = 1. The target has it's own rule for each input file to compile and can find and validate the file's dependencies at generate time, therefore it will be good optimized (because it won't try to recompile each shader in the engine once any input file changes) and great for CI checking all shaders in Nabla soon. TODO: create a wrapper cmake script for DXC compiling because there is something off with MSVC environment and we have errors while compiling with DXC, but we don't have them once we copy-paste the executed command in any CMD
@AnastaZIuk AnastaZIuk force-pushed the master branch 3 times, most recently from a07e79f to ffbd843 Compare January 18, 2024 21:24
Comment on lines +2 to +102
// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h

#ifndef _NBL_BUILTIN_GLSL_MATH_QUATERNIONS_INCLUDED_
#define _NBL_BUILTIN_GLSL_MATH_QUATERNIONS_INCLUDED_


namespace nbl
{
namespace hlsl
{
namespace math
{


struct quaternion_t
{
float4 data;


static quaternion_t constructFromTruncated(in float3 first3Components)
{
quaternion_t quat;
quat.data.xyz = first3Components;
quat.data.w = sqrt(1.0-dot(first3Components,first3Components));
return quat;
}

static quaternion_t lerp(in quaternion_t start, in quaternion_t end, in float fraction, in float totalPseudoAngle)
{
const uint negationMask = asuint(totalPseudoAngle) & 0x80000000u;
const float4 adjEnd = asfloat(asuint(end.data)^negationMask);

quaternion_t quat;
quat.data = lerp(start.data, adjEnd, fraction);
return quat;
}
static quaternion_t lerp(in quaternion_t start, in quaternion_t end, in float fraction)
{
return lerp(start,end,fraction,dot(start.data,end.data));
}

static float flerp_impl_adj_interpolant(in float angle, in float fraction, in float interpolantPrecalcTerm2, in float interpolantPrecalcTerm3)
{
const float A = 1.0904f + angle * (-3.2452f + angle * (3.55645f - angle * 1.43519f));
const float B = 0.848013f + angle * (-1.06021f + angle * 0.215638f);
const float k = A * interpolantPrecalcTerm2 + B;
return fraction+interpolantPrecalcTerm3*k;
}

static quaternion_t flerp(in quaternion_t start, in quaternion_t end, in float fraction)
{
const float pseudoAngle = dot(start.data,end.data);

const float interpolantPrecalcTerm = fraction-0.5f;
const float interpolantPrecalcTerm3 = fraction*interpolantPrecalcTerm*(fraction-1.f);
const float adjFrac = quaternion_t::flerp_impl_adj_interpolant(abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3);
quaternion_t quat = quaternion_t::lerp(start,end,adjFrac,pseudoAngle);
quat.data = normalize(quat.data);
return quat;
}

static float3x3 constructMatrix(in quaternion_t quat)
{
float3x3 mat;
mat[0] = quat.data.yzx*quat.data.ywz+quat.data.zxy*quat.data.zyw*float3( 1.f, 1.f,-1.f);
mat[1] = quat.data.yzx*quat.data.xzw+quat.data.zxy*quat.data.wxz*float3(-1.f, 1.f, 1.f);
mat[2] = quat.data.yzx*quat.data.wyx+quat.data.zxy*quat.data.xwy*float3( 1.f,-1.f, 1.f);
mat[0][0] = 0.5f-mat[0][0];
mat[1][1] = 0.5f-mat[1][1];
mat[2][2] = 0.5f-mat[2][2];
mat *= 2.f;
return mat;
}
};

float3 slerp_delta_impl(in float3 start, in float3 preScaledWaypoint, in float cosAngleFromStart)
{
float3 planeNormal = cross(start,preScaledWaypoint);

cosAngleFromStart *= 0.5;
const float sinAngle = sqrt(0.5-cosAngleFromStart);
const float cosAngle = sqrt(0.5+cosAngleFromStart);

planeNormal *= sinAngle;
const float3 precompPart = cross(planeNormal,start)*2.0;

return precompPart*cosAngle+cross(planeNormal,precompPart);
}

float3 slerp_impl_impl(in float3 start, in float3 preScaledWaypoint, in float cosAngleFromStart)
{
return start + slerp_delta_impl(start,preScaledWaypoint,cosAngleFromStart);
}



}
}
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kevyuu @keptsecret this file might be useful to you, but I can't guarantee correctness, so rewrite anyway but look at the ideas.

Comment on lines +13 to +129
namespace nbl
{
namespace hlsl
{


// gives false negatives
bool fastestFrustumCullAABB(in float4x4 proj, in shapes::AABB_t aabb)
{
const shapes::Frustum_t frust = shapes::Frustum_t::extract(proj);
return shapes::Frustum_t::fastestDoesNotIntersectAABB(frust, aabb);
}

// gives very few false negatives
bool fastFrustumCullAABB(in float4x4 proj, in float4x4 invProj, in shapes::AABB_t aabb)
{
if (fastestFrustumCullAABB(proj,aabb))
return true;

const shapes::Frustum_t boxInvFrustum = shapes::Frustum_t::extract(invProj);
shapes::AABB_t ndc;
ndc.minVx = float3(-1.f,-1.f,0.f);
ndc.maxVx = float3(1.f,1.f,1.f);
return shapes::Frustum_t::fastestDoesNotIntersectAABB(boxInvFrustum,ndc);
}

// perfect Separating Axis Theorem, needed for Clustered/Tiled Lighting
bool preciseFrustumCullAABB(in float4x4 proj, in float4x4 invProj, in shapes::AABB_t aabb)
{
const shapes::Frustum_t viewFrust = shapes::Frustum_t::extract(proj);
if (shapes::Frustum_t::fastestDoesNotIntersectAABB(viewFrust,aabb))
return true;

const shapes::Frustum_t boxInvFrustum = shapes::Frustum_t::extract(invProj);
shapes::AABB_t ndc;
ndc.minVx = float3(-1.f,-1.f,0.f);
ndc.maxVx = float3(1.f,1.f,1.f);
if (shapes::Frustum_t::fastestDoesNotIntersectAABB(boxInvFrustum,ndc))
return true;

float3 edges[12];
edges[ 0] = cross(viewFrust.minPlanes[0].xyz,viewFrust.minPlanes[1].xyz);
edges[ 1] = cross(viewFrust.minPlanes[0].xyz,viewFrust.minPlanes[2].xyz);
edges[ 2] = cross(viewFrust.minPlanes[0].xyz,viewFrust.maxPlanes[1].xyz);
edges[ 3] = cross(viewFrust.minPlanes[0].xyz,viewFrust.maxPlanes[2].xyz);
edges[ 4] = cross(viewFrust.minPlanes[1].xyz,viewFrust.minPlanes[0].xyz);
edges[ 5] = cross(viewFrust.minPlanes[1].xyz,viewFrust.minPlanes[2].xyz);
edges[ 6] = cross(viewFrust.minPlanes[1].xyz,viewFrust.maxPlanes[0].xyz);
edges[ 7] = cross(viewFrust.minPlanes[1].xyz,viewFrust.maxPlanes[2].xyz);
edges[ 8] = cross(viewFrust.minPlanes[2].xyz,viewFrust.minPlanes[0].xyz);
edges[ 0] = cross(viewFrust.minPlanes[2].xyz,viewFrust.minPlanes[1].xyz);
edges[10] = cross(viewFrust.minPlanes[2].xyz,viewFrust.maxPlanes[0].xyz);
edges[11] = cross(viewFrust.minPlanes[2].xyz,viewFrust.maxPlanes[1].xyz);
for (int i=0; i<12; i++)
{
#define getClosestDP(R) (dot(ndc.getFarthestPointInFront(R.xyz),R.xyz)+R.w)
/* TODO: These are buggy!
// cross(e_0,edges[i])
{
const float2 normal = float2(-edges[i].z,edges[i].y);
const bool2 negMask = lessThan(normal,float2(0.f));
const float4 planeBase = normal.x*invProj[1]+normal.y*invProj[2];

const float minAABB = dot(lerp(aabb.minVx.yz,aabb.maxVx.yz,negMask),normal);
const float4 minPlane = planeBase-invProj[3]*minAABB;
if (getClosestDP(minPlane)<=0.f)
return true;
const float maxAABB = dot(lerp(aabb.maxVx.yz,aabb.minVx.yz,negMask),normal);
const float4 maxPlane = invProj[3]*maxAABB-planeBase;
if (getClosestDP(maxPlane)<=0.f)
return true;
}
// cross(e_1,edges[i])
{
const float2 normal = float2(-edges[i].x,edges[i].z);
const bool2 negMask = lessThan(normal,float2(0.f));
const float4 planeBase = normal.x*invProj[0]+normal.y*invProj[2];
const float minAABB = dot(lerp(aabb.minVx.xz,aabb.maxVx.xz,negMask),normal);
const float4 minPlane = planeBase-invProj[3]*minAABB;
if (getClosestDP(minPlane)<=0.f)
return true;
const float maxAABB = dot(lerp(aabb.maxVx.xz,aabb.minVx.xz,negMask),normal);
const float4 maxPlane = invProj[3]*maxAABB-planeBase;
if (getClosestDP(maxPlane)<=0.f)
return true;
} the last one is probably buggy too*/
// cross(e_2,edges[i])
{
const float2 normal = float2(-edges[i].y,edges[i].x);
const bool2 negMask = normal < (0.0f).xx;
const float4 planeBase = normal.x*invProj[0]+normal.y*invProj[1];

const float minAABB = dot(lerp(aabb.minVx.xy,aabb.maxVx.xy,negMask),normal);
const float4 minPlane = planeBase-invProj[3]*minAABB;
if (getClosestDP(minPlane)<=0.f)
return true;
const float maxAABB = dot(lerp(aabb.maxVx.xy,aabb.minVx.xy,negMask),normal);
const float4 maxPlane = invProj[3]*maxAABB-planeBase;
if (getClosestDP(maxPlane)<=0.f)
return true;
}
#undef getClosestDP
}
return false;
}

// TODO: Other culls useful for clustered lighting
// - Sphere vs Frustum
// - Convex Infinite Cone vs Frustum
// - Concave Infinite Cone vs Frustum (! is frustum inside of an convex infinite cone with half angle PI-theta)


}
}


#endif No newline at end of file
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@deprilula28 some leftover code if you need

Comment on lines +20 to +67
struct Frustum_t
{
float3x4 minPlanes;
float3x4 maxPlanes;


// gives false negatives
static bool fastestDoesNotIntersectAABB(in Frustum_t frust, in AABB_t aabb)
{
#define getClosestDP(R) (dot(aabb.getFarthestPointInFront(R.xyz), R.xyz) + R.w)
if (getClosestDP(frust.minPlanes[0])<=0.f)
return true;
if (getClosestDP(frust.minPlanes[1])<=0.f)
return true;
if (getClosestDP(frust.minPlanes[2])<=0.f)
return true;

if (getClosestDP(frust.maxPlanes[0])<=0.f)
return true;
if (getClosestDP(frust.maxPlanes[1])<=0.f)
return true;

return getClosestDP(frust.maxPlanes[2])<=0.f;
#undef getClosestDP
}



// will place planes which correspond to the bounds in NDC
static Frustum_t extract(in float4x4 proj, in AABB_t bounds)
{
const float4x4 pTpose = transpose(proj);

Frustum_t frust;
frust.minPlanes = (float3x4)(pTpose) - float3x4(pTpose[3]*bounds.minVx[0], pTpose[3]*bounds.minVx[1], pTpose[3]*bounds.minVx[2]);
frust.maxPlanes = float3x4(pTpose[3]*bounds.maxVx[0], pTpose[3]*bounds.maxVx[1], pTpose[3]*bounds.maxVx[2]) - (float3x4)(pTpose);
return frust;
}

// assuming an NDC of [-1,1]^2 x [0,1]
static Frustum_t extract(in float4x4 proj)
{
AABB_t bounds;
bounds.minVx = float3(-1.f,-1.f,0.f);
bounds.maxVx = float3(1.f,1.f,1.f);
return extract(proj, bounds);
}
};
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have probably stored the planes as shapes::InfinitePlane { float32_t3 normal; float32_t distance;}; instead of abusing 3x4 matrices

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants