diff --git a/include/API/Device.h b/include/API/Device.h index f0b85829b..0ad070a80 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -86,6 +86,7 @@ struct TraditionalRasterPipelineCreateDesc { llvm::SmallVector InputLayout; llvm::SmallVector RTFormats; std::optional DSFormat; + uint32_t SampleCount = 1; PrimitiveTopology Topology; // Set if Topology == PatchList. Validated in // Pipeline.cpp::validatePipelineKind. @@ -252,8 +253,11 @@ llvm::Expected> createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf); // Creates a depth/stencil texture matching the dimensions of a render target. +// SampleCount must match the matching color render target when used in the +// same render pass (default 1, i.e. non-MSAA). llvm::Expected> -createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height); +createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height, + uint32_t SampleCount = 1); llvm::Expected> createBufferWithData(Device &Dev, std::string Name, diff --git a/include/API/Texture.h b/include/API/Texture.h index 26b9b030f..5bee0da28 100644 --- a/include/API/Texture.h +++ b/include/API/Texture.h @@ -71,6 +71,7 @@ struct TextureCreateDesc { uint32_t Width; uint32_t Height; uint32_t MipLevels; + uint32_t SampleCount = 1; // Clear value for render target or depth/stencil textures. // How and when this is applied depends on the backend: // - DX uses it as an optimized clear hint at resource creation time diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 9cf0e5f77..ea50f76fe 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -145,6 +145,7 @@ struct OutputProperties { int Width; int Depth; int MipLevels = 1; + int SampleCount = 1; }; static inline uint32_t getFormatSize(DataFormat Format) { diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index edfaccb7b..3323a36cd 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -979,6 +979,9 @@ class DXDevice : public offloadtest::Device { // Resources for graphics pipelines. std::unique_ptr RenderPass; std::unique_ptr RenderTarget; + // When the render target is multi-sampled, the contents are resolved into + // this single-sample texture before being copied into RTReadback. + std::unique_ptr ResolvedRenderTarget; std::unique_ptr RTReadback; std::unique_ptr DepthStencil; std::unique_ptr VB; @@ -1221,7 +1224,8 @@ class DXDevice : public offloadtest::Device { PSODesc.DSVFormat = getDXGIFormat(*Desc.DSFormat); for (size_t I = 0; I < Desc.RTFormats.size(); ++I) PSODesc.RTVFormats[I] = getDXGIFormat(Desc.RTFormats[I]); - PSODesc.SampleDesc.Count = 1; + PSODesc.SampleDesc.Count = std::max(1u, Desc.SampleCount); + PSODesc.RasterizerState.MultisampleEnable = Desc.SampleCount > 1; ComPtr PSO; if (auto Err = HR::toError( @@ -1367,7 +1371,7 @@ class DXDevice : public offloadtest::Device { TexDesc.DepthOrArraySize = 1; TexDesc.MipLevels = static_cast(Desc.MipLevels); TexDesc.Format = getDXGIFormat(Desc.Fmt); - TexDesc.SampleDesc.Count = 1; + TexDesc.SampleDesc.Count = std::max(1u, Desc.SampleCount); TexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; TexDesc.Flags = getDXResourceFlags(Desc.Usage); @@ -1506,6 +1510,17 @@ class DXDevice : public offloadtest::Device { CD3DX12FeatureSupport Features; Features.Init(Device.Get()); + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS MSAA4xLevels = { + DXGI_FORMAT_R32G32B32A32_FLOAT, 4, + D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, 0}; + const bool SupportsMSAA4x = SUCCEEDED(Device->CheckFeatureSupport( + D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &MSAA4xLevels, sizeof(MSAA4xLevels))) && + MSAA4xLevels.NumQualityLevels > 0; + Caps.insert( + std::make_pair("MSAA_4xSamples", + makeCapability("MSAA_4xSamples", SupportsMSAA4x))); + #define D3D_FEATURE_BOOL(Name) \ Caps.insert( \ std::make_pair(#Name, makeCapability(#Name, Features.Name()))); @@ -2285,6 +2300,28 @@ class DXDevice : public offloadtest::Device { IS.RenderTarget = std::move(*TexOrErr); + // For MSAA render targets, allocate a single-sample texture that + // ResolveSubresource writes into; CopyTextureRegion then copies the + // resolved texture into the readback buffer. + if (OutBuf.OutputProps.SampleCount > 1) { + auto FmtOrErr = toFormat(OutBuf.Format, OutBuf.Channels); + if (!FmtOrErr) + return FmtOrErr.takeError(); + TextureCreateDesc ResolvedDesc = {}; + ResolvedDesc.Location = MemoryLocation::GpuOnly; + ResolvedDesc.Usage = TextureUsage::RenderTarget; + ResolvedDesc.Fmt = *FmtOrErr; + ResolvedDesc.Width = OutBuf.OutputProps.Width; + ResolvedDesc.Height = OutBuf.OutputProps.Height; + ResolvedDesc.MipLevels = 1; + ResolvedDesc.SampleCount = 1; + ResolvedDesc.OptimizedClearValue = ClearColor{}; + auto ResolvedOrErr = createTexture("ResolvedRenderTarget", ResolvedDesc); + if (!ResolvedOrErr) + return ResolvedOrErr.takeError(); + IS.ResolvedRenderTarget = std::move(*ResolvedOrErr); + } + // Create readback buffer sized for the pixel data with row pitch padded // up to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, which is what D3D12 requires // for the placed footprint used by CopyTextureRegion. The compaction @@ -2302,9 +2339,11 @@ class DXDevice : public offloadtest::Device { } llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { + const uint32_t SampleCount = + std::max(1, P.Bindings.RTargetBufferPtr->OutputProps.SampleCount); auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, - P.Bindings.RTargetBufferPtr->OutputProps.Height); + P.Bindings.RTargetBufferPtr->OutputProps.Height, SampleCount); if (!TexOrErr) return TexOrErr.takeError(); IS.DepthStencil = std::move(*TexOrErr); @@ -2367,14 +2406,43 @@ class DXDevice : public offloadtest::Device { Encoder.endEncoding(); - // Transition the render target to copy source and copy to the readback - // buffer. - const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( - RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, - D3D12_RESOURCE_STATE_COPY_SOURCE); - IS.CB->CmdList->ResourceBarrier(1, &Barrier); - + // Transition the render target and (for MSAA) resolve target into the + // states needed to copy pixels back into RTReadback. For non-MSAA we + // just transition RT to COPY_SOURCE; for MSAA we resolve the multi- + // sampled RT into the single-sample resolved RT, then read that. const CPUBuffer &B = *P.Bindings.RTargetBufferPtr; + const bool IsMSAA = B.OutputProps.SampleCount > 1; + + ID3D12Resource *CopySource = RT.Resource.Get(); + if (IsMSAA) { + auto &Resolved = llvm::cast(*IS.ResolvedRenderTarget); + const D3D12_RESOURCE_BARRIER PreBarriers[] = { + CD3DX12_RESOURCE_BARRIER::Transition( + RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_RESOLVE_SOURCE), + CD3DX12_RESOURCE_BARRIER::Transition( + Resolved.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_RESOLVE_DEST)}; + IS.CB->CmdList->ResourceBarrier(2, PreBarriers); + + IS.CB->CmdList->ResolveSubresource(Resolved.Resource.Get(), 0, + RT.Resource.Get(), 0, + getDXFormat(B.Format, B.Channels)); + + const D3D12_RESOURCE_BARRIER PostBarrier = + CD3DX12_RESOURCE_BARRIER::Transition( + Resolved.Resource.Get(), D3D12_RESOURCE_STATE_RESOLVE_DEST, + D3D12_RESOURCE_STATE_COPY_SOURCE); + IS.CB->CmdList->ResourceBarrier(1, &PostBarrier); + CopySource = Resolved.Resource.Get(); + } else { + const D3D12_RESOURCE_BARRIER Barrier = + CD3DX12_RESOURCE_BARRIER::Transition( + RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_COPY_SOURCE); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); + } + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ 0, CD3DX12_SUBRESOURCE_FOOTPRINT( @@ -2383,7 +2451,7 @@ class DXDevice : public offloadtest::Device { getAlignedTexturePitch(B.OutputProps.Width, B.getElementSize()))}; const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RTReadback.Buffer.Get(), Footprint); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RT.Resource.Get(), 0); + const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(CopySource, 0); IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); @@ -2563,6 +2631,8 @@ class DXDevice : public offloadtest::Device { if (!FormatOrErr) return FormatOrErr.takeError(); PipelineDesc.RTFormats.push_back(*FormatOrErr); + PipelineDesc.SampleCount = + std::max(1, P.Bindings.RTargetBufferPtr->OutputProps.SampleCount); auto PipelineStateOrErr = createTraditionalRasterPipeline( "Graphics Pipeline State", BndDesc, PipelineDesc); diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index 86875096e..fd15ceacb 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -84,6 +84,7 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev, Desc.Width = Buf.OutputProps.Width; Desc.Height = Buf.OutputProps.Height; Desc.MipLevels = 1; + Desc.SampleCount = std::max(1, Buf.OutputProps.SampleCount); Desc.OptimizedClearValue = ClearColor{}; if (auto Err = validateTextureDescMatchesCPUBuffer(Desc, Buf)) @@ -94,7 +95,8 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev, llvm::Expected> offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, - uint32_t Height) { + uint32_t Height, + uint32_t SampleCount) { TextureCreateDesc Desc = {}; Desc.Location = MemoryLocation::GpuOnly; Desc.Usage = TextureUsage::DepthStencil; @@ -102,6 +104,7 @@ offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, Desc.Width = Width; Desc.Height = Height; Desc.MipLevels = 1; + Desc.SampleCount = SampleCount; Desc.OptimizedClearValue = ClearDepthStencil{1.0f, 0}; return Dev.createTexture("DepthStencil", Desc); diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 7959daa9b..59bf5dc78 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -1542,6 +1542,15 @@ class MTLDevice : public offloadtest::Device { if (auto Err = validateTextureCreateDesc(Desc)) return Err; + // MSAA support is not yet wired up on the Metal backend. Tests that + // require it should mark themselves `UNSUPPORTED: Metal` so this guard + // never fires in CI; it exists to keep the shared TextureCreateDesc + // contract honest across backends (see #1043). + if (Desc.SampleCount > 1) + return llvm::createStringError( + std::errc::not_supported, + "MSAA textures (SampleCount > 1) are not yet implemented on Metal"); + MTL::TextureDescriptor *TDesc = MTL::TextureDescriptor::texture2DDescriptor( getMetalPixelFormat(Desc.Fmt), Desc.Width, Desc.Height, Desc.MipLevels > 1); diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 282b2b9c2..198a8a3a1 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -2118,6 +2118,15 @@ class VulkanDevice : public offloadtest::Device { if (auto Err = validateTextureCreateDesc(Desc)) return Err; + // MSAA support is not yet wired up on the Vulkan backend. Tests that + // require it should mark themselves `UNSUPPORTED: Vulkan` so this guard + // never fires in CI; it exists to keep the shared TextureCreateDesc + // contract honest across backends (see #1043). + if (Desc.SampleCount > 1) + return llvm::createStringError( + std::errc::not_supported, + "MSAA textures (SampleCount > 1) are not yet implemented on Vulkan"); + VkImageCreateInfo ImageInfo = {}; ImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; ImageInfo.imageType = VK_IMAGE_TYPE_2D; diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 3940a550d..fa7a8999b 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -546,6 +546,7 @@ void MappingTraits::mapping( I.mapRequired("Width", P.Width); I.mapRequired("Depth", P.Depth); I.mapOptional("MipLevels", P.MipLevels, 1); + I.mapOptional("SampleCount", P.SampleCount, 1); } void MappingTraits::mapping( diff --git a/test/Feature/MSAA/render-target-msaa-resolve.test b/test/Feature/MSAA/render-target-msaa-resolve.test new file mode 100644 index 000000000..1d4492e99 --- /dev/null +++ b/test/Feature/MSAA/render-target-msaa-resolve.test @@ -0,0 +1,76 @@ +#--- vertex.hlsl +struct VSInput { + float4 position : POSITION; +}; + +struct VSOutput { + float4 position : SV_POSITION; +}; + +VSOutput main(VSInput input) { + VSOutput output; + output.position = input.position; + return output; +} + +#--- pixel.hlsl +struct PSInput { + float4 position : SV_POSITION; +}; + +float4 main(PSInput input) : SV_TARGET { + return float4(0.25, 0.5, 0.75, 1.0); +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Vertex + Entry: main + - Stage: Pixel + Entry: main +Buffers: + - Name: VertexData + Format: Float32 + Stride: 16 + Data: [ 0.0, 3.0, 0.0, 1.0, + 3.0, -3.0, 0.0, 1.0, + -3.0, -3.0, 0.0, 1.0 ] + - Name: Output + Format: Float32 + Channels: 4 + FillSize: 64 # 2x2 @ 16 bytes per pixel + OutputProps: + Height: 2 + Width: 2 + Depth: 1 + SampleCount: 4 +Bindings: + VertexBuffer: VertexData + VertexAttributes: + - Format: Float32 + Channels: 4 + Offset: 0 + Name: POSITION + RenderTarget: Output +DescriptorSets: [] +... +#--- end + +# MSAA pipelines and Resolve are currently DirectX-only. +# UNSUPPORTED: Vulkan || Metal +# REQUIRES: MSAA_4xSamples +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T vs_6_0 -Fo %t-vertex.o %t/vertex.hlsl +# RUN: %dxc_target -T ps_6_0 -Fo %t-pixel.o %t/pixel.hlsl +# RUN: %offloader %t/pipeline.yaml %t-vertex.o %t-pixel.o | FileCheck %s + +# Every pixel is fully covered by the triangle, so the 4x-MSAA resolve +# averages 4 identical samples per pixel back to the same color. +# CHECK: Name: Output +# CHECK-NEXT: Format: Float32 +# CHECK-NEXT: Channels: 4 +# CHECK-NEXT: Data: [ 0.25, 0.5, 0.75, 1, 0.25, 0.5, 0.75, 1, 0.25, 0.5, +# CHECK-NEXT: 0.75, 1, 0.25, 0.5, 0.75, 1 ] diff --git a/test/lit.cfg.py b/test/lit.cfg.py index a2d757c41..42920a99c 100644 --- a/test/lit.cfg.py +++ b/test/lit.cfg.py @@ -175,6 +175,8 @@ def setDeviceFeatures(config, device, compiler): config.available_features.add("Int64TypedResourceAtomics") if device["Features"].get("MeshShaderTier", "NotSupported") != "NotSupported": config.available_features.add("MeshShader") + if device["Features"].get("MSAA_4xSamples", False): + config.available_features.add("MSAA_4xSamples") setWaveSizeFeaturesDirectX(config, device) if device["Features"].get("RaytracingTier", "NotSupported") != "NotSupported": config.available_features.add("acceleration-structure")