diff --git a/include/API/Device.h b/include/API/Device.h index c5be534ba..bab463c13 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -335,6 +335,10 @@ createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf); llvm::Expected> createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height); +// Creates a depth texture from a CPUBuffer whose GpuFormat is a depth format. +llvm::Expected> +createDepthBufferFromCPUBuffer(Device &Dev, const CPUBuffer &Buf); + llvm::Expected> createBufferWithData(Device &Dev, std::string Name, const BufferCreateDesc &Desc, const void *Data, diff --git a/include/API/FormatConversion.h b/include/API/FormatConversion.h index 096d6aaf4..619d671ac 100644 --- a/include/API/FormatConversion.h +++ b/include/API/FormatConversion.h @@ -80,13 +80,6 @@ inline llvm::Expected toFormat(DataFormat Format, int Channels) { return Format::RGBA32Float; } break; - case DataFormat::Depth32: - // D32FloatS8Uint is not expressible as DataFormat + Channels because the - // stencil component is uint8, not a second Depth32 channel. Once the - // pipeline uses Format directly, this limitation goes away. - if (Channels == 1) - return Format::D32Float; - break; case DataFormat::UInt64: // Only 1 and 2 channels of 64-bit integers are supported. switch (Channels) { @@ -172,6 +165,42 @@ validateTextureDescMatchesCPUBuffer(const TextureCreateDesc &Desc, return llvm::Error::success(); } +// Validates that a TextureCreateDesc's dimensions and footprint are consistent +// with the CPUBuffer used for readback storage. Call this when format +// equivalence is not derived from DataFormat and Channels. +// This helper intentionally skips the toFormat-based format check. +// In that path, Desc.Fmt is set directly from GpuFormat. +inline llvm::Error +validateTextureDimsMatchCPUBuffer(const TextureCreateDesc &Desc, + const CPUBuffer &Buf) { + if (Desc.Width != static_cast(Buf.OutputProps.Width)) + return llvm::createStringError( + std::errc::invalid_argument, + "TextureCreateDesc width %u does not match CPUBuffer width %d.", + Desc.Width, Buf.OutputProps.Width); + if (Desc.Height != static_cast(Buf.OutputProps.Height)) + return llvm::createStringError( + std::errc::invalid_argument, + "TextureCreateDesc height %u does not match CPUBuffer height %d.", + Desc.Height, Buf.OutputProps.Height); + if (Desc.MipLevels != static_cast(Buf.OutputProps.MipLevels)) + return llvm::createStringError( + std::errc::invalid_argument, + "TextureCreateDesc mip levels %u does not match CPUBuffer mip " + "levels %d.", + Desc.MipLevels, Buf.OutputProps.MipLevels); + const uint32_t TexelSize = getFormatSizeInBytes(Desc.Fmt); + const uint64_t ExpectedSize = + static_cast(Desc.Width) * Desc.Height * TexelSize; + if (static_cast(Buf.size()) != ExpectedSize) + return llvm::createStringError( + std::errc::invalid_argument, + "CPUBuffer size %u does not match expected size %llu " + "(width %u * height %u * element size %u).", + Buf.size(), ExpectedSize, Desc.Width, Desc.Height, TexelSize); + return llvm::Error::success(); +} + } // namespace offloadtest #endif // OFFLOADTEST_API_FORMATCONVERSION_H diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 285f1534f..7b98f8bdf 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -107,7 +107,6 @@ enum class DataFormat { Float16, Float32, Float64, - Depth32, Bool, }; @@ -198,7 +197,6 @@ static inline uint32_t getFormatSize(DataFormat Format) { case DataFormat::UInt32: case DataFormat::Int32: case DataFormat::Float32: - case DataFormat::Depth32: case DataFormat::Bool: return 4; case DataFormat::Hex64: @@ -216,6 +214,11 @@ struct CPUBuffer { int Channels; int Stride; uint32_t ArraySize; + // When set, names the GPU texture format directly (e.g. D32Float) instead of + // inferring it from DataFormat + Channels via toFormat(). This lets depth + // buffers and other special formats be expressed without extending + // DataFormat. + std::optional GpuFormat; // Data can contain one block of data for a singular resource // or multiple blocks for a resource array. llvm::SmallVector> Data; @@ -458,6 +461,19 @@ struct IOBindings { std::string RenderTarget; CPUBuffer *RTargetBufferPtr = nullptr; + + // Optional depth target bound for readback; when Name is empty, backends + // create an internal depth target. Ptr is resolved after parsing to the + // named CPUBuffer entry that owns the readback storage; the GPU format for + // the depth texture comes from the buffer's GpuFormat field. + struct DepthBufferBinding { + std::string Name; + CPUBuffer *Ptr = nullptr; + + bool empty() const { return Name.empty(); } + }; + DepthBufferBinding DepthBuffer; + PrimitiveTopology Topology = PrimitiveTopology::TriangleList; // Set if Topology == PatchList. Validated in @@ -746,6 +762,10 @@ template <> struct MappingTraits { static void mapping(IO &I, offloadtest::IOBindings &B); }; +template <> struct MappingTraits { + static void mapping(IO &I, offloadtest::IOBindings::DepthBufferBinding &B); +}; + template <> struct MappingTraits { static void mapping(IO &I, offloadtest::PushConstantValue &B); }; @@ -911,7 +931,6 @@ template <> struct ScalarEnumerationTraits { ENUM_CASE(Float16); ENUM_CASE(Float32); ENUM_CASE(Float64); - ENUM_CASE(Depth32); ENUM_CASE(Bool); #undef ENUM_CASE } diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 669e44540..ce7c767cd 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -119,9 +119,6 @@ static DXGI_FORMAT getDXFormat(DataFormat Format, int Channels) { if (Channels == 2) return DXGI_FORMAT_R32G32B32A32_UINT; llvm_unreachable("Unsupported channel count for 64-bit format"); - case DataFormat::Depth32: - llvm_unreachable( - "Depth32 format is not yet supported in the DirectX backend."); default: llvm_unreachable("Unsupported Resource format specified"); } @@ -1173,6 +1170,7 @@ class DXDevice : public offloadtest::Device { std::unique_ptr RenderTarget; std::unique_ptr RTReadback; std::unique_ptr DepthStencil; + std::unique_ptr DSReadback; std::unique_ptr VB; llvm::SmallVector DescTables; @@ -2788,6 +2786,28 @@ class DXDevice : public offloadtest::Device { P.Bindings.RTargetBufferPtr->copyFromTexture(Mapped, Placed.Footprint.RowPitch); Readback.Buffer->Unmap(0, nullptr); + + if (IS.DSReadback) { + void *DSMapped = nullptr; + auto &DSReadback = llvm::cast(*IS.DSReadback); + if (auto Err = HR::toError(DSReadback.Buffer->Map(0, nullptr, &DSMapped), + "Failed to map depth buffer readback")) + return Err; + + auto &DS = llvm::cast(*IS.DepthStencil); + const D3D12_RESOURCE_DESC DSDesc = DS.Resource->GetDesc(); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT DSPlaced = {}; + uint32_t DSNumRows = 0; + uint64_t DSRowSizeInBytes = 0; + uint64_t DSTotalBytes = 0; + Device->GetCopyableFootprints(&DSDesc, 0u, 1u, 0u, &DSPlaced, &DSNumRows, + &DSRowSizeInBytes, &DSTotalBytes); + + P.Bindings.DepthBuffer.Ptr->copyFromTexture(DSMapped, + DSPlaced.Footprint.RowPitch); + DSReadback.Buffer->Unmap(0, nullptr); + } + return llvm::Error::success(); } @@ -2821,6 +2841,27 @@ class DXDevice : public offloadtest::Device { } llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { + // If the test bound a CPU-readable depth buffer, create the depth target + // from it and allocate a readback buffer. Otherwise fall back to the + // default depth target (which is not read back). + if (P.Bindings.DepthBuffer.Ptr) { + const CPUBuffer &DSBuf = *P.Bindings.DepthBuffer.Ptr; + auto TexOrErr = offloadtest::createDepthBufferFromCPUBuffer(*this, DSBuf); + if (!TexOrErr) + return TexOrErr.takeError(); + IS.DepthStencil = std::move(*TexOrErr); + + BufferCreateDesc BufDesc = {}; + BufDesc.Location = MemoryLocation::GpuToCpu; + BufDesc.Usage = BufferUsage::Storage; + auto BufOrErr = createBuffer("DSReadback", BufDesc, + getAlignedTextureBufferSize(DSBuf)); + if (!BufOrErr) + return BufOrErr.takeError(); + IS.DSReadback = std::move(*BufOrErr); + return llvm::Error::success(); + } + auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, P.Bindings.RTargetBufferPtr->OutputProps.Height); @@ -2906,6 +2947,33 @@ class DXDevice : public offloadtest::Device { IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + // If a depth buffer is bound for readback, transition the depth target + // from DEPTH_WRITE to COPY_SOURCE and copy its contents to the readback + // buffer using the depth-aspect placed footprint. + if (IS.DSReadback) { + auto &DSReadback = llvm::cast(*IS.DSReadback); + const D3D12_RESOURCE_BARRIER DSBarrier = + CD3DX12_RESOURCE_BARRIER::Transition( + DS.Resource.Get(), D3D12_RESOURCE_STATE_DEPTH_WRITE, + D3D12_RESOURCE_STATE_COPY_SOURCE); + IS.CB->CmdList->ResourceBarrier(1, &DSBarrier); + + const CPUBuffer &DSBuf = *P.Bindings.DepthBuffer.Ptr; + // CopyTextureRegion footprint format must match the source resource + // (D32_FLOAT), not the shader-visible R32_FLOAT SRV cast. + const DXGI_FORMAT DSResFormat = DS.Resource->GetDesc().Format; + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT DSFootprint{ + 0, + CD3DX12_SUBRESOURCE_FOOTPRINT( + DSResFormat, DSBuf.OutputProps.Width, DSBuf.OutputProps.Height, 1, + getAlignedTexturePitch(DSBuf.OutputProps.Width, + DSBuf.getElementSize()))}; + const CD3DX12_TEXTURE_COPY_LOCATION DSDstLoc(DSReadback.Buffer.Get(), + DSFootprint); + const CD3DX12_TEXTURE_COPY_LOCATION DSSrcLoc(DS.Resource.Get(), 0); + IS.CB->CmdList->CopyTextureRegion(&DSDstLoc, 0, 0, 0, &DSSrcLoc, nullptr); + } + auto CopyBackResource = [&IS, this](ResourcePair &R) { if (R.first->isTexture()) { const offloadtest::CPUBuffer &B = *R.first->BufferPtr; @@ -3068,7 +3136,7 @@ class DXDevice : public offloadtest::Device { TraditionalRasterPipelineCreateDesc PipelineDesc = {}; PipelineDesc.Topology = P.Bindings.Topology; PipelineDesc.PatchControlPoints = P.Bindings.PatchControlPoints; - PipelineDesc.DSFormat = Format::D32FloatS8Uint; + PipelineDesc.DSFormat = State.DepthStencil->getDesc().Fmt; for (auto &Shader : P.Shaders) { ShaderContainer SC = {}; SC.EntryPoint = Shader.Entry; diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index 5365ed548..9463f01b1 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -231,6 +231,31 @@ offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, return Dev.createTexture("DepthStencil", Desc); } +llvm::Expected> +offloadtest::createDepthBufferFromCPUBuffer(Device &Dev, const CPUBuffer &Buf) { + if (!Buf.GpuFormat || !isDepthFormat(*Buf.GpuFormat)) + return llvm::createStringError( + std::errc::invalid_argument, + "Depth buffer requires a CPUBuffer with a depth GpuFormat; got '%s'.", + Buf.GpuFormat ? getFormatName(*Buf.GpuFormat).data() : ""); + + const Format Fmt = *Buf.GpuFormat; + + TextureCreateDesc Desc = {}; + Desc.Location = MemoryLocation::GpuOnly; + Desc.Usage = TextureUsage::DepthStencil; + Desc.Fmt = Fmt; + Desc.Width = Buf.OutputProps.Width; + Desc.Height = Buf.OutputProps.Height; + Desc.MipLevels = 1; + Desc.OptimizedClearValue = ClearDepthStencil{1.0f, 0}; + + if (auto Err = validateTextureDimsMatchCPUBuffer(Desc, Buf)) + return Err; + + return Dev.createTexture("DepthBuffer", Desc); +} + // This is a separate function because recursion is not allowed in this code // base. static llvm::Expected> diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 7159826ad..b0d4a5bac 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -1560,6 +1560,10 @@ class MTLDevice : public offloadtest::Device { } llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { + if (P.Bindings.DepthBuffer.Ptr) + return llvm::createStringError( + std::errc::not_supported, + "Bindings.DepthBuffer is not yet supported on the Metal backend."); auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, P.Bindings.RTargetBufferPtr->OutputProps.Height); diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index a4128c1bc..a1dd7d5f8 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -59,10 +59,6 @@ static VkFormat getVKFormat(DataFormat Format, int Channels) { VKFormats(UINT, 64) break; case DataFormat::Float64: VKFormats(SFLOAT, 64) break; - case DataFormat::Depth32: - if (Channels != 1) - llvm_unreachable("Depth32 format only supports a single channel."); - return VK_FORMAT_D32_SFLOAT; default: llvm_unreachable("Unsupported Resource format specified"); } @@ -1400,6 +1396,7 @@ class VulkanDevice : public offloadtest::Device { std::unique_ptr RenderTarget; std::unique_ptr RTReadback; std::unique_ptr DepthStencil; + std::unique_ptr DSReadback; std::unique_ptr VB; uint32_t ShaderStageMask = 0; @@ -2649,8 +2646,9 @@ class VulkanDevice : public offloadtest::Device { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; ViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; } else { - ViewCi.subresourceRange.aspectMask = - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + ViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + if (isStencilFormat(Desc.Fmt)) + ViewCi.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; } // Tex destructor will clean up Image + Memory on failure. if (auto Err = VK::toError( @@ -3133,13 +3131,15 @@ class VulkanDevice : public offloadtest::Device { llvm::Expected createImage(Resource &R, BufferRef &Host, int UsageOverride = 0) { const offloadtest::CPUBuffer &B = *R.BufferPtr; - if (B.Format == DataFormat::Depth32 && R.isReadWrite()) + const bool IsDepth = B.GpuFormat.has_value() && isDepthFormat(*B.GpuFormat); + if (IsDepth && R.isReadWrite()) return llvm::createStringError(std::errc::invalid_argument, "Image memory allocation failed."); VkImageCreateInfo ImageCreateInfo = {}; ImageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; ImageCreateInfo.imageType = getVKImageType(R.Kind); - ImageCreateInfo.format = getVKFormat(B.Format, B.Channels); + ImageCreateInfo.format = B.GpuFormat ? getVulkanFormat(*B.GpuFormat) + : getVKFormat(B.Format, B.Channels); ImageCreateInfo.mipLevels = B.OutputProps.MipLevels; ImageCreateInfo.arrayLayers = 1; ImageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; @@ -3329,6 +3329,26 @@ class VulkanDevice : public offloadtest::Device { } llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { + // If the test bound a CPU-readable depth buffer, create the depth target + // from it and allocate a readback buffer. Otherwise fall back to the + // default depth target (which is not read back). + if (P.Bindings.DepthBuffer.Ptr) { + const CPUBuffer &DSBuf = *P.Bindings.DepthBuffer.Ptr; + auto TexOrErr = offloadtest::createDepthBufferFromCPUBuffer(*this, DSBuf); + if (!TexOrErr) + return TexOrErr.takeError(); + IS.DepthStencil = std::move(*TexOrErr); + + BufferCreateDesc BufDesc = {}; + BufDesc.Location = MemoryLocation::GpuToCpu; + BufDesc.Usage = BufferUsage::Storage; + auto BufOrErr = createBuffer("DSReadback", BufDesc, DSBuf.size()); + if (!BufOrErr) + return BufOrErr.takeError(); + IS.DSReadback = std::move(*BufOrErr); + return llvm::Error::success(); + } + auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, P.Bindings.RTargetBufferPtr->OutputProps.Height); @@ -3561,12 +3581,14 @@ class VulkanDevice : public offloadtest::Device { ViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; ViewCreateInfo.viewType = getImageViewType(R.Kind); ViewCreateInfo.format = - getVKFormat(R.BufferPtr->Format, R.BufferPtr->Channels); + R.BufferPtr->GpuFormat + ? getVulkanFormat(*R.BufferPtr->GpuFormat) + : getVKFormat(R.BufferPtr->Format, R.BufferPtr->Channels); ViewCreateInfo.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; ViewCreateInfo.subresourceRange.aspectMask = - R.BufferPtr->Format == DataFormat::Depth32 + (R.BufferPtr->GpuFormat && isDepthFormat(*R.BufferPtr->GpuFormat)) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; ViewCreateInfo.subresourceRange.baseMipLevel = 0; @@ -3824,13 +3846,14 @@ class VulkanDevice : public offloadtest::Device { return; if (R.isImage()) { const offloadtest::CPUBuffer &B = *R.BufferPtr; + const bool IsDepth = + B.GpuFormat.has_value() && isDepthFormat(*B.GpuFormat); llvm::SmallVector Regions; uint64_t CurrentOffset = 0; for (int I = 0; I < B.OutputProps.MipLevels; ++I) { VkBufferImageCopy Region = {}; - Region.imageSubresource.aspectMask = B.Format == DataFormat::Depth32 - ? VK_IMAGE_ASPECT_DEPTH_BIT - : VK_IMAGE_ASPECT_COLOR_BIT; + Region.imageSubresource.aspectMask = + IsDepth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; Region.imageSubresource.mipLevel = I; Region.imageSubresource.baseArrayLayer = 0; Region.imageSubresource.layerCount = 1; @@ -3848,9 +3871,8 @@ class VulkanDevice : public offloadtest::Device { } VkImageSubresourceRange SubRange = {}; - SubRange.aspectMask = B.Format == DataFormat::Depth32 - ? VK_IMAGE_ASPECT_DEPTH_BIT - : VK_IMAGE_ASPECT_COLOR_BIT; + SubRange.aspectMask = + IsDepth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; SubRange.baseMipLevel = 0; SubRange.levelCount = B.OutputProps.MipLevels; SubRange.layerCount = 1; @@ -3970,10 +3992,11 @@ class VulkanDevice : public offloadtest::Device { return; if (R.isImage()) { const offloadtest::CPUBuffer &B = *R.BufferPtr; + const bool IsDepth = + B.GpuFormat.has_value() && isDepthFormat(*B.GpuFormat); VkImageSubresourceRange SubRange = {}; - SubRange.aspectMask = B.Format == DataFormat::Depth32 - ? VK_IMAGE_ASPECT_DEPTH_BIT - : VK_IMAGE_ASPECT_COLOR_BIT; + SubRange.aspectMask = + IsDepth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; SubRange.baseMipLevel = 0; SubRange.levelCount = B.OutputProps.MipLevels; SubRange.layerCount = 1; @@ -4000,9 +4023,8 @@ class VulkanDevice : public offloadtest::Device { uint64_t CurrentOffset = 0; for (int I = 0; I < B.OutputProps.MipLevels; ++I) { VkBufferImageCopy Region = {}; - Region.imageSubresource.aspectMask = B.Format == DataFormat::Depth32 - ? VK_IMAGE_ASPECT_DEPTH_BIT - : VK_IMAGE_ASPECT_COLOR_BIT; + Region.imageSubresource.aspectMask = + IsDepth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; Region.imageSubresource.mipLevel = I; Region.imageSubresource.baseArrayLayer = 0; Region.imageSubresource.layerCount = 1; @@ -4169,6 +4191,15 @@ class VulkanDevice : public offloadtest::Device { VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + + if (IS.DSReadback) { + copyTextureToReadback(IS.CB->CmdBuffer, + llvm::cast(*IS.DepthStencil), + llvm::cast(*IS.DSReadback), + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT); + } } for (auto &R : IS.Resources) @@ -4234,6 +4265,24 @@ class VulkanDevice : public offloadtest::Device { auto *RT = P.Bindings.RTargetBufferPtr; RT->copyFromTexture(Mapped, RT->getImageRowBytes()); vkUnmapMemory(Device, Readback.Memory); + + if (IS.DSReadback) { + auto &DSReadback = llvm::cast(*IS.DSReadback); + + VkMappedMemoryRange DSRange = {}; + DSRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + DSRange.offset = 0; + DSRange.size = VK_WHOLE_SIZE; + DSRange.memory = DSReadback.Memory; + + void *DSMapped = nullptr; // NOLINT(misc-const-correctness) + vkMapMemory(Device, DSReadback.Memory, 0, VK_WHOLE_SIZE, 0, &DSMapped); + vkInvalidateMappedMemoryRanges(Device, 1, &DSRange); + + auto *DSBuf = P.Bindings.DepthBuffer.Ptr; + DSBuf->copyFromTexture(DSMapped, DSBuf->getImageRowBytes()); + vkUnmapMemory(Device, DSReadback.Memory); + } } return llvm::Error::success(); } diff --git a/lib/Support/Check.cpp b/lib/Support/Check.cpp index f9b94a783..e25dad60a 100644 --- a/lib/Support/Check.cpp +++ b/lib/Support/Check.cpp @@ -228,7 +228,6 @@ testBufferFloat(std::function ComparisonFn, case offloadtest::DataFormat::Float64: return testAllArray(ComparisonFn, B1, B2); case offloadtest::DataFormat::Float32: - case offloadtest::DataFormat::Depth32: return testAllArray(ComparisonFn, B1, B2); case offloadtest::DataFormat::Float16: { return testAllArray(ComparisonFn, B1, B2); @@ -250,8 +249,7 @@ static bool testBufferFloatEpsilon(offloadtest::CPUBuffer *B1, }; return testBufferFloat(Fn, B1, B2); } - case offloadtest::DataFormat::Float32: - case offloadtest::DataFormat::Depth32: { + case offloadtest::DataFormat::Float32: { auto Fn = [Epsilon, DM](const float &FS, const float &FR) { return compareFloatEpsilon(FS, FR, (float)Epsilon, DM); }; @@ -280,8 +278,7 @@ static bool testBufferFloatULP(offloadtest::CPUBuffer *B1, }; return testBufferFloat(Fn, B1, B2); } - case offloadtest::DataFormat::Float32: - case offloadtest::DataFormat::Depth32: { + case offloadtest::DataFormat::Float32: { auto Fn = [ULPT, DM](const float &FS, const float &FR) { return compareFloatULP(FS, FR, ULPT, DM); }; @@ -379,7 +376,6 @@ static const std::string getBufferStr(offloadtest::CPUBuffer *B) { case DF::Float16: return formatBuffer(B); // assuming no native float16 case DF::Float32: - case DF::Depth32: return formatBuffer(B); case DF::Float64: return formatBuffer(B); diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index bd58c8431..a8564c97d 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -169,6 +169,13 @@ void MappingTraits::mapping(IO &I, P.Bindings.RenderTarget + " not found!"); } + if (!P.Bindings.DepthBuffer.empty()) { + P.Bindings.DepthBuffer.Ptr = P.getBuffer(P.Bindings.DepthBuffer.Name); + if (!P.Bindings.DepthBuffer.Ptr) + I.setError(Twine("Referenced depth buffer ") + + P.Bindings.DepthBuffer.Name + " not found!"); + } + // Resolve buffer name references in acceleration structure descriptions. for (auto &B : P.AccelStructs.BLAS) { for (auto &T : B.Triangles) { @@ -363,6 +370,7 @@ void MappingTraits::mapping(IO &I, I.mapRequired("Name", B.Name); I.mapRequired("Format", B.Format); I.mapOptional("Channels", B.Channels, 1); + I.mapOptional("GpuFormat", B.GpuFormat); I.mapOptional("Stride", B.Stride, 0); I.mapOptional("ArraySize", B.ArraySize, 1); setCounters(I, B); @@ -407,9 +415,6 @@ void MappingTraits::mapping(IO &I, case DF::Float32: setData(I, B); break; - case DF::Depth32: - setData(I, B); - break; case DF::Float64: setData(I, B); break; @@ -477,11 +482,17 @@ void MappingTraits::mapping( I.mapOptional("VertexBuffer", B.VertexBuffer); I.mapOptional("VertexAttributes", B.VertexAttributes); I.mapOptional("RenderTarget", B.RenderTarget); + I.mapOptional("DepthBuffer", B.DepthBuffer); I.mapOptional("Topology", B.Topology, offloadtest::PrimitiveTopology::TriangleList); I.mapOptional("PatchControlPoints", B.PatchControlPoints); } +void MappingTraits::mapping( + IO &I, offloadtest::IOBindings::DepthBufferBinding &B) { + I.mapRequired("Name", B.Name); +} + void MappingTraits::mapping( IO &I, offloadtest::PushConstantBlock &B) { I.mapRequired("Stage", B.Stage); @@ -528,8 +539,6 @@ void MappingTraits::mapping( return setData(I, B); // assuming no native float16 case DF::Float32: return setData(I, B); - case DF::Depth32: - return setData(I, B); case DF::Float64: return setData(I, B); case DF::Bool: diff --git a/test/Feature/Semantics/SVDepth.test b/test/Feature/Semantics/SVDepth.test new file mode 100644 index 000000000..a316c3486 --- /dev/null +++ b/test/Feature/Semantics/SVDepth.test @@ -0,0 +1,128 @@ +# This test exercises: +# * SV_Depth - Pixel shader output that overrides the rasterizer-interpolated +# depth and is written to the bound depth target. +# +# SV_Depth: https://github.com/llvm/wg-hlsl/issues/1046 +# Clang's HLSL -> DXIL lowering does not yet implement SV_Depth. + +#--- vertex.hlsl +struct VSInput { + float4 pos : POSITION; +}; + +struct VSOutput { + float4 position : SV_POSITION; +}; + +VSOutput main(VSInput input) { + VSOutput o; + o.position = input.pos; + return o; +} + +#--- pixel.hlsl +struct PSInput { + float4 position : SV_POSITION; +}; + +struct PSOutput { + float4 color : SV_TARGET; + float depth : SV_Depth; +}; + +// Four primitives, one per pixel of a 4x1 render target. Each primitive emits +// a distinct depth value via SV_Depth so the depth target read back to the +// CPU can be checked element-by-element. +// +// All emitted depths are exact binary fractions in [0, 1) and all are less +// than the clear value of 1.0, so they pass the default DepthFunc=LESS test. +PSOutput main(PSInput input, uint primID : SV_PrimitiveID) { + PSOutput o; + o.color = float4(1.0, 0.0, 0.0, 1.0); + float depths[4] = { 0.125, 0.250, 0.375, 0.5 }; + o.depth = depths[primID]; + return o; +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Vertex + Entry: main + - Stage: Pixel + Entry: main +Buffers: + # Geometry: 4 triangles, each covering exactly one pixel of a 4x1 render + # target. Pixel-center NDC coordinates are (-0.75, 0), (-0.25, 0), (+0.25, + # 0), (+0.75, 0). Shared edges fall at NDC x in {-0.5, 0, +0.5}, none of + # which coincide with a pixel center, so coverage is unambiguous. + - Name: VertexData + Format: Float32 + Stride: 16 + Data: [ + # Triangle 0 (CCW, pixel 0): + -1.0, -1.0, 0.0, 1.0, + 0.0, -1.0, 0.0, 1.0, + -1.0, 1.0, 0.0, 1.0, + # Triangle 1 (CCW, pixel 1): + 0.0, -1.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, + -1.0, 1.0, 0.0, 1.0, + # Triangle 2 (CW, pixel 2): + 0.0, -1.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, + 1.0, -1.0, 0.0, 1.0, + # Triangle 3 (CW, pixel 3): + 0.0, 1.0, 0.0, 1.0, + 1.0, 1.0, 0.0, 1.0, + 1.0, -1.0, 0.0, 1.0, + ] + - Name: RenderTarget + Format: Float32 + Channels: 4 + FillSize: 64 # 4x1 @ 16 bytes per pixel + OutputProps: + Height: 1 + Width: 4 + Depth: 1 + - Name: DepthTarget + Format: Float32 + GpuFormat: D32Float + Channels: 1 + FillSize: 16 # 4x1 @ 4 bytes per pixel; contents are overwritten on clear + OutputProps: + Height: 1 + Width: 4 + Depth: 1 + - Name: DepthTarget_Expected + Format: Float32 + Channels: 1 + Data: [ 0.125, 0.250, 0.375, 0.5 ] +Bindings: + VertexBuffer: VertexData + VertexAttributes: + - Format: Float32 + Channels: 4 + Offset: 0 + Name: POSITION + RenderTarget: RenderTarget + DepthBuffer: + Name: DepthTarget +DescriptorSets: [] +Results: + - Result: DepthValues + Rule: BufferFloatULP + ULPT: 0 + Actual: DepthTarget + Expected: DepthTarget_Expected +... +#--- end + +# XFAIL: Clang +# Metal backend doesn't yet support Bindings.DepthBuffer. +# UNSUPPORTED: Metal + +# RUN: split-file %s %t +# RUN: %dxc_target -T vs_6_0 -Fo %t-vertex.o %t/vertex.hlsl +# RUN: %dxc_target -T ps_6_0 -Fo %t-pixel.o %t/pixel.hlsl +# RUN: %offloader %t/pipeline.yaml %t-vertex.o %t-pixel.o diff --git a/test/Feature/Textures/Texture2D.GatherCmp.test.yaml b/test/Feature/Textures/Texture2D.GatherCmp.test.yaml index 9a950a6c8..416b141e8 100644 --- a/test/Feature/Textures/Texture2D.GatherCmp.test.yaml +++ b/test/Feature/Textures/Texture2D.GatherCmp.test.yaml @@ -68,7 +68,8 @@ Shaders: Buffers: - Name: Tex - Format: Depth32 + Format: Float32 + GpuFormat: D32Float Channels: 1 OutputProps: { Width: 2, Height: 2, Depth: 1 } Data: [ 0.2, # (0,0) R=0.2 diff --git a/test/Feature/Textures/Texture2D.Load.Depth32.test.yaml b/test/Feature/Textures/Texture2D.Load.Depth32.test.yaml new file mode 100644 index 000000000..e24eb700d --- /dev/null +++ b/test/Feature/Textures/Texture2D.Load.Depth32.test.yaml @@ -0,0 +1,76 @@ +#--- source.hlsl +// Verifies that a Texture2D bound with a depth-compatible format +// (GpuFormat: D32Float in the YAML; VK_FORMAT_D32_SFLOAT on Vulkan, +// DXGI_FORMAT_R32_FLOAT SRV on DirectX) can be Load()ed by a compute shader +// and round-trip the expected single-channel float values. +// +// Tracking: https://github.com/llvm/offload-test-suite/issues/1046 + +[[vk::binding(0, 0)]] Texture2D Tex : register(t0); +[[vk::binding(1, 0)]] RWBuffer Out : register(u0); + +[numthreads(1, 1, 1)] +void main() { + Out[0] = Tex.Load(int3(0, 0, 0)); + Out[1] = Tex.Load(int3(1, 0, 0)); + Out[2] = Tex.Load(int3(0, 1, 0)); + Out[3] = Tex.Load(int3(1, 1, 0)); +} + +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + +DispatchParameters: + DispatchGroupCount: [1, 1, 1] + +Buffers: + - Name: Tex + Format: Float32 + GpuFormat: D32Float + Channels: 1 + OutputProps: { Width: 2, Height: 2, Depth: 1 } + Data: [ 0.2, + 0.8, + 0.4, + 0.6 ] + + - Name: Out + Format: Float32 + Channels: 1 + FillSize: 16 # 4 * sizeof(float) + + - Name: Expected + Format: Float32 + Channels: 1 + Data: [ 0.2, 0.8, 0.4, 0.6 ] + +DescriptorSets: + - Resources: + - Name: Tex + Kind: Texture2D + DirectXBinding: { Register: 0, Space: 0 } + VulkanBinding: { Binding: 0 } + - Name: Out + Kind: RWBuffer + DirectXBinding: { Register: 0, Space: 0 } + VulkanBinding: { Binding: 1 } + +Results: + - Result: LoadDepth32Test + Rule: BufferExact + Actual: Out + Expected: Expected +... +#--- end + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o + +# Metal's offload backend cannot bind a depth-format texture as an SRV +# through this code path (hits "Metal does not support buffer robustness" +# in the MTL backend). Mark unsupported until #1046 has a Metal story. +# UNSUPPORTED: Metal diff --git a/test/Feature/Textures/Texture2D.SampleCmp.test.yaml b/test/Feature/Textures/Texture2D.SampleCmp.test.yaml index 52a06c512..2de485fec 100644 --- a/test/Feature/Textures/Texture2D.SampleCmp.test.yaml +++ b/test/Feature/Textures/Texture2D.SampleCmp.test.yaml @@ -107,7 +107,8 @@ Shaders: Buffers: - Name: Tex - Format: Depth32 + Format: Float32 + GpuFormat: D32Float Channels: 1 OutputProps: { Width: 2, Height: 2, Depth: 1 } Data: [ 0.2, # (0,0) -> 0.2 diff --git a/test/Feature/Vk.SampledTextures/Vk.SampledTexture2D/Vk.SampledTexture2D.GatherCmp.test.yaml b/test/Feature/Vk.SampledTextures/Vk.SampledTexture2D/Vk.SampledTexture2D.GatherCmp.test.yaml index b37435331..2299606d3 100644 --- a/test/Feature/Vk.SampledTextures/Vk.SampledTexture2D/Vk.SampledTexture2D.GatherCmp.test.yaml +++ b/test/Feature/Vk.SampledTextures/Vk.SampledTexture2D/Vk.SampledTexture2D.GatherCmp.test.yaml @@ -49,7 +49,8 @@ Shaders: Buffers: - Name: SampledTexLess - Format: Depth32 + Format: Float32 + GpuFormat: D32Float Channels: 1 OutputProps: { Width: 2, Height: 2, Depth: 1 } Data: [ 0.2, # (0,0) R=0.2 @@ -58,7 +59,8 @@ Buffers: 0.8 ] # (1,1) R=0.8 - Name: SampledTexGreater - Format: Depth32 + Format: Float32 + GpuFormat: D32Float Channels: 1 OutputProps: { Width: 2, Height: 2, Depth: 1 } Data: [ 0.2, @@ -67,7 +69,8 @@ Buffers: 0.8 ] - Name: SampledTexRepeat - Format: Depth32 + Format: Float32 + GpuFormat: D32Float Channels: 1 OutputProps: { Width: 2, Height: 2, Depth: 1 } Data: [ 0.2, diff --git a/test/Feature/Vk.SampledTextures/Vk.SampledTexture2D/Vk.SampledTexture2D.SampleCmp.test.yaml b/test/Feature/Vk.SampledTextures/Vk.SampledTexture2D/Vk.SampledTexture2D.SampleCmp.test.yaml index 7d9a605bc..49f775c7c 100644 --- a/test/Feature/Vk.SampledTextures/Vk.SampledTexture2D/Vk.SampledTexture2D.SampleCmp.test.yaml +++ b/test/Feature/Vk.SampledTextures/Vk.SampledTexture2D/Vk.SampledTexture2D.SampleCmp.test.yaml @@ -88,7 +88,8 @@ Shaders: Buffers: - Name: SampledTexLess - Format: Depth32 + Format: Float32 + GpuFormat: D32Float Channels: 1 OutputProps: { Width: 2, Height: 2, Depth: 1 } Data: [ 0.2, # (0,0) -> 0.2 @@ -97,7 +98,8 @@ Buffers: 0.8 ] # (1,1) -> 0.8 - Name: SampledTexGreater - Format: Depth32 + Format: Float32 + GpuFormat: D32Float Channels: 1 OutputProps: { Width: 2, Height: 2, Depth: 1 } Data: [ 0.2,