diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp index 64d940d44fb..44804b1c86e 100644 --- a/backends/vulkan/runtime/api/Context.cpp +++ b/backends/vulkan/runtime/api/Context.cpp @@ -217,7 +217,7 @@ void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) { } void Context::flush() { - VK_CHECK(vkQueueWaitIdle(queue())); + VK_CHECK(vkQueueWaitIdle(queue().handle)); command_pool_.flush(); descriptor_pool_.flush(); diff --git a/backends/vulkan/runtime/api/Context.h b/backends/vulkan/runtime/api/Context.h index 9d8e7c92255..3efa8d0276d 100644 --- a/backends/vulkan/runtime/api/Context.h +++ b/backends/vulkan/runtime/api/Context.h @@ -90,8 +90,8 @@ class Context final { return device_; } - inline VkQueue queue() { - return queue_.handle; + inline vkapi::Adapter::Queue& queue() { + return queue_; } // Device Caches @@ -230,6 +230,10 @@ class Context final { VkFence fence_handle = VK_NULL_HANDLE, const bool final_use = false); + vkapi::CommandBuffer& extract_cmd() { + return cmd_; + } + void flush(); #ifdef VULKAN_DEBUG diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index f4740666bea..ee5621d9c12 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -158,6 +158,7 @@ ComputeGraph::~ComputeGraph() { prepack_nodes_.clear(); execute_nodes_.clear(); + clear_deferred_cmds(); context_->flush(); } @@ -775,6 +776,53 @@ void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) { context_->fences().return_fence(fence); } +void ComputeGraph::submit_cmd( + vkapi::CommandBuffer& cmd_buf, + VkSemaphore wait_semaphore, + VkSemaphore signal_semaphore, + VkFence fence) { + if (cmd_buf) { + cmd_buf.end(); + context_->adapter_ptr()->submit_cmd( + context_->queue(), + cmd_buf.get_submit_handle(false), + fence, + wait_semaphore, + signal_semaphore); + } +} + +void ComputeGraph::submit_deferred_cmds_and_wait() { + VkSemaphore prev_semaphore = VK_NULL_HANDLE; + vkapi::VulkanFence fence = context_->fences().get_fence(); + + for (uint32_t i = 0; i < deferred_cmd_list_.size(); i++) { + auto& cmd = deferred_cmd_list_[i]; + VkSemaphore wait_semaphore = prev_semaphore; + VkSemaphore signal_semaphore = cmd.get_signal_semaphore(); + prev_semaphore = signal_semaphore; + + submit_cmd( + cmd, + wait_semaphore, + signal_semaphore, + i == (deferred_cmd_list_.size() - 1) ? fence.get_submit_handle() + : VK_NULL_HANDLE); + } + fence.wait(); + context_->fences().return_fence(fence); +} + +void ComputeGraph::clear_deferred_cmds() { + for (auto& cmd : deferred_cmd_list_) { + if (cmd) { + cmd.end(); + cmd.invalidate(); + } + } + deferred_cmd_list_.clear(); +} + void ComputeGraph::prepack() { int i = 0; bool submitted = false; @@ -813,6 +861,7 @@ void ComputeGraph::prepack() { } void ComputeGraph::encode_execute() { + clear_deferred_cmds(); context_->flush(); context_->set_cmd(/*reusable = */ true); @@ -821,13 +870,12 @@ void ComputeGraph::encode_execute() { for (std::unique_ptr& node : execute_nodes_) { node->encode(this); } + + deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd())); } void ComputeGraph::execute() { - vkapi::VulkanFence fence = context_->fences().get_fence(); - context_->submit_cmd_to_gpu(fence.get_submit_handle()); - fence.wait(); - context_->fences().return_fence(fence); + submit_deferred_cmds_and_wait(); execute_count_++; } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 1961f5046e2..4b1089b0de8 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -193,6 +193,9 @@ class ComputeGraph final { // Utility constexpr to express byte quantities constexpr static size_t MB = 1024 * 1024; + // List of command buffers deferred for submission + std::vector deferred_cmd_list_; + protected: size_t values_in_use_ = 0; size_t execute_count_ = 0; @@ -851,6 +854,25 @@ class ComputeGraph final { */ void submit_current_cmd_and_wait(const bool final_use = false); + /* + * Submit one command buffer to the GPU. + */ + void submit_cmd( + vkapi::CommandBuffer& cmd_buf, + VkSemaphore wait_semaphore, + VkSemaphore signal_semaphore, + VkFence fence); + + /* + * Submits all the commands gathered in deferred_cmd_bufs_ to the GPU. + */ + void submit_deferred_cmds_and_wait(); + + /* + * Ends and invalidates all deferred commands. + */ + void clear_deferred_cmds(); + public: // // Graph Prepacking