Improve rasterizer depth buffer

xarray · xarray · commit ad0a08c9f7fa · 2025-05-07T18:09:46.000+08:00
diff --git a/pipeline/Rasterizer.cpp b/pipeline/Rasterizer.cpp
@@ -148,7 +148,8 @@ namespace osgVerse
         rd->data->setModelViewProjection(m.ptr());
     }
 
-    void UserRasterizer::render(const osg::Vec3& cameraPos, std::vector<float>& depthData)
+    void UserRasterizer::render(const osg::Vec3& cameraPos, std::vector<float>* depthData,
+                                std::vector<unsigned short>* hizData)
     {
         std::vector<BatchOccluder*> globalOccluders;
         for (std::set<osg::ref_ptr<UserOccluder>>::iterator it = _occluders.begin();
@@ -165,8 +166,8 @@ namespace osgVerse
         __m128 camPos = convertFromVec3(cameraPos);
         std::sort(globalOccluders.begin(), globalOccluders.end(), [&](const BatchOccluder* o1, const BatchOccluder* o2)
             {
-                __m128 dist1 = _mm_sub_ps(convertFromVec3(o1->getCenter()), camPos);
-                __m128 dist2 = _mm_sub_ps(convertFromVec3(o2->getCenter()), camPos);
+                __m128 dist1 = _mm_sub_ps(((BatchOccluderData*)o1->getOccluder())->data->m_center, camPos);
+                __m128 dist2 = _mm_sub_ps(((BatchOccluderData*)o2->getOccluder())->data->m_center, camPos);
                 return _mm_comilt_ss(_mm_dp_ps(dist1, dist1, 0x7f), _mm_dp_ps(dist2, dist2, 0x7f));
             });
 
@@ -186,49 +187,60 @@ namespace osgVerse
              it != _occluders.end(); ++it)
         {
             std::set<osg::ref_ptr<BatchOccluder>>& batches = (*it)->getBatches();
-            size_t count = 0, maxCount = batches.size();
+            size_t count = 0, count2 = 0, maxCount = batches.size();
 
             for (std::set<osg::ref_ptr<BatchOccluder>>::iterator it2 = batches.begin();
                  it2 != batches.end(); ++it2)
             {
                 BatchOccluder* bo = (*it2).get(); bool needsClipping = false;
                 BatchOccluderData* od = (BatchOccluderData*)bo->getOccluder();
                 if (rd->data->queryVisibility(od->data->m_boundsMin, od->data->m_boundsMax, needsClipping))
-                    count++;
+                { count++; if (needsClipping) count2++; }
             }
-            std::cout << (*it)->getName() << "; " << count << " / " << maxCount << "\n";
+            std::cout << (*it)->getName() << "; " << count << " (" << count2 << ") / " << maxCount << "\n";
         }
 
         // Get result depth image
         std::vector<__m128i>& depthBuffer = rd->data->getDepthBuffer();
         std::vector<uint16_t>& hizBuffer = rd->data->getHiZ();
-        depthData.resize(_blockNumX * _blockNumY * 64, 1.0f);
-
-        const float bias = 3.9623753e+28f; // 1.0f / floatCompressionBias
-        for (uint32_t y = 0; y < _blockNumY; ++y)
+        if (depthData)
         {
-            for (uint32_t x = 0; x < _blockNumX; ++x)
+            const float bias = 3.9623753e+28f; // 1.0f / floatCompressionBias
+            depthData->resize(_blockNumX * _blockNumY * 64);
+            for (uint32_t y = 0; y < _blockNumY; ++y)
             {
-                uint32_t index = y * _blockNumX + x;
-                if (hizBuffer[index] == 1) continue;
-
-                const __m128i* source = &depthBuffer[8 * index];
-                for (uint32_t subY = 0; subY < 8; ++subY)
+                for (uint32_t x = 0; x < _blockNumX; ++x)
                 {
-                    __m128i depthI = _mm_load_si128(source++);
-                    __m256i depthI256 = _mm256_slli_epi32(_mm256_cvtepu16_epi32(depthI), 12);
-                    __m256 depth = _mm256_mul_ps(_mm256_castsi256_ps(depthI256), _mm256_set1_ps(bias));
-                    __m256 linDepth = _mm256_div_ps(_mm256_set1_ps(2 * 0.25f),
-                                                    _mm256_sub_ps(_mm256_set1_ps(0.25f + 1000.0f),
-                                                                  _mm256_mul_ps(_mm256_sub_ps(_mm256_set1_ps(1.0f), depth),
-                                                                                _mm256_set1_ps(1000.0f - 0.25f))));
-                    float linDepthA[16]; _mm256_storeu_ps(linDepthA, linDepth);
-
-                    std::vector<float>::iterator it = depthData.begin() + ((8 * _blockNumX) * (8 * y + subY) + 8 * x);
-                    for (uint32_t subX = 0; subX < 8; ++subX, ++it) *it = linDepthA[subX];
+                    uint32_t index = y * _blockNumX + x;
+                    if (hizBuffer[index] == 1)
+                    {
+                        for (uint32_t subY = 0; subY < 8; ++subY)
+                        {
+                            std::vector<float>::iterator it = depthData->begin() + ((8 * _blockNumX) * (8 * y + subY) + 8 * x);
+                            for (uint32_t subX = 0; subX < 8; ++subX, ++it) *it = -1.0f;
+                        }
+                        continue;
+                    }
+
+                    const __m128i* source = &depthBuffer[8 * index];
+                    for (uint32_t subY = 0; subY < 8; ++subY)
+                    {
+                        __m128i depthI = _mm_load_si128(source++);
+                        __m256i depthI256 = _mm256_slli_epi32(_mm256_cvtepu16_epi32(depthI), 12);
+                        __m256 depth = _mm256_mul_ps(_mm256_castsi256_ps(depthI256), _mm256_set1_ps(bias));
+                        __m256 linDepth = _mm256_div_ps(_mm256_set1_ps(2 * 0.25f),
+                                                        _mm256_sub_ps(_mm256_set1_ps(0.25f + 1000.0f),
+                                                                      _mm256_mul_ps(_mm256_sub_ps(_mm256_set1_ps(1.0f), depth),
+                                                                                    _mm256_set1_ps(1000.0f - 0.25f))));
+                        float linDepthA[16]; _mm256_storeu_ps(linDepthA, linDepth);
+
+                        std::vector<float>::iterator it = depthData->begin() + ((8 * _blockNumX) * (8 * y + subY) + 8 * x);
+                        for (uint32_t subX = 0; subX < 8; ++subX, ++it) *it = linDepthA[subX];
+                    }
                 }
             }
         }
+        if (hizData) hizData->assign(hizBuffer.begin(), hizBuffer.end());
 
 #   if false
         std::vector<char> rawData(_blockNumX * _blockNumY * 256);
diff --git a/pipeline/Rasterizer.h b/pipeline/Rasterizer.h
@@ -15,7 +15,7 @@ namespace osgVerse
         BatchOccluder(UserOccluder* u, const std::vector<osg::Vec3> vertices,
                       const osg::BoundingBoxf& refBound);
         BatchOccluder(UserOccluder* u, void* verticesInternal, const osg::BoundingBoxf& refBound);
-        void* getOccluder() { return _privateData; }
+        void* getOccluder() const { return _privateData; }
         UserOccluder* getOwner() { return _owner; }
 
         osg::BoundingBoxf getBound() const;
@@ -51,7 +51,8 @@ namespace osgVerse
     public:
         UserRasterizer(unsigned int width, unsigned int height);
         void setModelViewProjection(const osg::Matrixf& matrix);
-        void render(const osg::Vec3& cameraPos, std::vector<float>& depthData);
+        void render(const osg::Vec3& cameraPos, std::vector<float>* depthData,
+                    std::vector<unsigned short>* hizData);
 
         void addOccluder(UserOccluder* o) { _occluders.insert(o); }
         void removeOccluder(UserOccluder* o);
diff --git a/tests/occlusion_cull_test.cpp b/tests/occlusion_cull_test.cpp
@@ -46,14 +46,14 @@ int main(int argc, char** argv)
     osg::ref_ptr<osgVerse::UserRasterizer> rasterizer = new osgVerse::UserRasterizer(1280, 720);
     rasterizer->addOccluder(occ1.get()); rasterizer->addOccluder(occ2.get());
 
-    std::vector<float> depthData;
+    std::vector<float> depthData; std::vector<unsigned short> hizData;
     while (!viewer.done())
     {
         osg::Matrix mvp = viewer.getCamera()->getViewMatrix()
                         * viewer.getCamera()->getProjectionMatrix();
         osg::Vec3 cameraPos = osg::Vec3() * viewer.getCamera()->getInverseViewMatrix();
         rasterizer->setModelViewProjection(mvp);
-        rasterizer->render(cameraPos, depthData);
+        rasterizer->render(cameraPos, &depthData, &hizData);
 
         viewer.frame();
     }
@@ -65,8 +65,13 @@ int main(int argc, char** argv)
         for (int y = 0; y < image->t(); ++y)
             for (int x = 0; x < image->s(); ++x)
             {
-                unsigned char value = (unsigned char)(255.0f * depthData[y * image->s() + x]);
-                *(ptr + y * image->s() + x) = osg::Vec4ub(value, value, value, 255);
+                unsigned char mid = 0, value = 0, alpha = 255;
+                float depth = depthData[y * image->s() + x] * 100.0f;
+                if (depth > 1.0f) { mid = (unsigned char)(floor(depth) * 2.55f); depth *= 0.01f; }
+                else if (depth < 0.0f) { alpha = 0; depth = 0.0f; }
+
+                value = (unsigned char)(255.0f * depth);
+                *(ptr + y * image->s() + x) = osg::Vec4ub(value, mid, 0, alpha);
             }
     }
     osgDB::writeImageFile(*image, "test_occlusion.png");