From 11524e384bb9e7e3fadbb6b39ea11868ebf41cf8 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 26 May 2026 20:48:46 -0700 Subject: [PATCH 1/4] [DX] Add MSAA render target + resolve support (#1043) Adds support for multi-sample render targets on the D3D12 backend so the offload test suite can exercise MSAA pipelines and validate resolved output. * Pipeline schema: new `OutputProperties.SampleCount` (default 1) on the render-target buffer, parsed in `Pipeline.cpp`. * API plumbing: `TextureCreateDesc::SampleCount` and `TraditionalRasterPipelineCreateDesc::SampleCount`; `createDefaultDepthStencilTarget` now takes a sample-count override so the DSV always matches the bound RT. * DX backend: - `createTexture` honours the sample count on RT allocations. - `createTraditionalRasterPipeline` sets `SampleDesc.Count` and `RasterizerState.MultisampleEnable` from the descriptor. - `createRenderTarget` allocates a single-sample `ResolvedRenderTarget` alongside the MSAA RT when SampleCount > 1. - `createGraphicsCommands` issues `ResolveSubresource` with the required `RESOLVE_SOURCE`/`RESOLVE_DEST` state transitions and copies the resolved texture (not the MSAA RT) into the readback buffer. - `createDepthStencil` threads SampleCount through so depth matches. * VK / MTL backends: ignore the new field (default 1 keeps existing tests untouched); mesh-shader pipeline path left at SampleCount=1 since no MSAA mesh-shader caller is wired yet. * Test: `Feature/MSAA/render-target-msaa-resolve.test` renders a fully-covering triangle into a 2x2 4x-MSAA RT and validates the resolved 2x2 output. UNSUPPORTED: Vulkan || Metal. XFAIL: Clang. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- include/API/Device.h | 6 +- include/API/Texture.h | 1 + include/Support/Pipeline.h | 1 + lib/API/DX/Device.cpp | 81 ++++++++++++++++--- lib/API/Device.cpp | 5 +- lib/Support/Pipeline.cpp | 1 + .../MSAA/render-target-msaa-resolve.test | 75 +++++++++++++++++ 7 files changed, 157 insertions(+), 13 deletions(-) create mode 100644 test/Feature/MSAA/render-target-msaa-resolve.test diff --git a/include/API/Device.h b/include/API/Device.h index 104c32b60..18f750f1d 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -86,6 +86,7 @@ struct TraditionalRasterPipelineCreateDesc { llvm::SmallVector InputLayout; llvm::SmallVector RTFormats; std::optional DSFormat; + uint32_t SampleCount = 1; PrimitiveTopology Topology; ShaderContainer VS; // TODO: Optional Hull & Domain Shaders @@ -239,8 +240,11 @@ llvm::Expected> createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf); // Creates a depth/stencil texture matching the dimensions of a render target. +// SampleCount must match the matching color render target when used in the +// same render pass (default 1, i.e. non-MSAA). llvm::Expected> -createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height); +createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height, + uint32_t SampleCount = 1); llvm::Expected> createBufferWithData(Device &Dev, std::string Name, diff --git a/include/API/Texture.h b/include/API/Texture.h index 26b9b030f..5bee0da28 100644 --- a/include/API/Texture.h +++ b/include/API/Texture.h @@ -71,6 +71,7 @@ struct TextureCreateDesc { uint32_t Width; uint32_t Height; uint32_t MipLevels; + uint32_t SampleCount = 1; // Clear value for render target or depth/stencil textures. // How and when this is applied depends on the backend: // - DX uses it as an optimized clear hint at resource creation time diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index c13f54d2b..ba6301905 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -140,6 +140,7 @@ struct OutputProperties { int Width; int Depth; int MipLevels = 1; + int SampleCount = 1; }; static inline uint32_t getFormatSize(DataFormat Format) { diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 1a5d09124..0775a1f96 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -945,6 +945,9 @@ class DXDevice : public offloadtest::Device { // Resources for graphics pipelines. std::unique_ptr RenderPass; std::unique_ptr RenderTarget; + // When the render target is multi-sampled, the contents are resolved into + // this single-sample texture before being copied into RTReadback. + std::unique_ptr ResolvedRenderTarget; std::unique_ptr RTReadback; std::unique_ptr DepthStencil; std::unique_ptr VB; @@ -1180,7 +1183,8 @@ class DXDevice : public offloadtest::Device { PSODesc.DSVFormat = getDXGIFormat(*Desc.DSFormat); for (size_t I = 0; I < Desc.RTFormats.size(); ++I) PSODesc.RTVFormats[I] = getDXGIFormat(Desc.RTFormats[I]); - PSODesc.SampleDesc.Count = 1; + PSODesc.SampleDesc.Count = std::max(1u, Desc.SampleCount); + PSODesc.RasterizerState.MultisampleEnable = Desc.SampleCount > 1; ComPtr PSO; if (auto Err = HR::toError( @@ -1325,7 +1329,7 @@ class DXDevice : public offloadtest::Device { TexDesc.DepthOrArraySize = 1; TexDesc.MipLevels = static_cast(Desc.MipLevels); TexDesc.Format = getDXGIFormat(Desc.Fmt); - TexDesc.SampleDesc.Count = 1; + TexDesc.SampleDesc.Count = std::max(1u, Desc.SampleCount); TexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; TexDesc.Flags = getDXResourceFlags(Desc.Usage); @@ -2258,6 +2262,28 @@ class DXDevice : public offloadtest::Device { IS.RenderTarget = std::move(*TexOrErr); + // For MSAA render targets, allocate a single-sample texture that + // ResolveSubresource writes into; CopyTextureRegion then copies the + // resolved texture into the readback buffer. + if (OutBuf.OutputProps.SampleCount > 1) { + auto FmtOrErr = toFormat(OutBuf.Format, OutBuf.Channels); + if (!FmtOrErr) + return FmtOrErr.takeError(); + TextureCreateDesc ResolvedDesc = {}; + ResolvedDesc.Location = MemoryLocation::GpuOnly; + ResolvedDesc.Usage = TextureUsage::RenderTarget; + ResolvedDesc.Fmt = *FmtOrErr; + ResolvedDesc.Width = OutBuf.OutputProps.Width; + ResolvedDesc.Height = OutBuf.OutputProps.Height; + ResolvedDesc.MipLevels = 1; + ResolvedDesc.SampleCount = 1; + ResolvedDesc.OptimizedClearValue = ClearColor{}; + auto ResolvedOrErr = createTexture("ResolvedRenderTarget", ResolvedDesc); + if (!ResolvedOrErr) + return ResolvedOrErr.takeError(); + IS.ResolvedRenderTarget = std::move(*ResolvedOrErr); + } + // Create readback buffer sized for the pixel data with row pitch padded // up to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, which is what D3D12 requires // for the placed footprint used by CopyTextureRegion. The compaction @@ -2275,9 +2301,11 @@ class DXDevice : public offloadtest::Device { } llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { + const uint32_t SampleCount = + std::max(1, P.Bindings.RTargetBufferPtr->OutputProps.SampleCount); auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, - P.Bindings.RTargetBufferPtr->OutputProps.Height); + P.Bindings.RTargetBufferPtr->OutputProps.Height, SampleCount); if (!TexOrErr) return TexOrErr.takeError(); IS.DepthStencil = std::move(*TexOrErr); @@ -2340,14 +2368,43 @@ class DXDevice : public offloadtest::Device { Encoder.endEncoding(); - // Transition the render target to copy source and copy to the readback - // buffer. - const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( - RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, - D3D12_RESOURCE_STATE_COPY_SOURCE); - IS.CB->CmdList->ResourceBarrier(1, &Barrier); - + // Transition the render target and (for MSAA) resolve target into the + // states needed to copy pixels back into RTReadback. For non-MSAA we + // just transition RT to COPY_SOURCE; for MSAA we resolve the multi- + // sampled RT into the single-sample resolved RT, then read that. const CPUBuffer &B = *P.Bindings.RTargetBufferPtr; + const bool IsMSAA = B.OutputProps.SampleCount > 1; + + ID3D12Resource *CopySource = RT.Resource.Get(); + if (IsMSAA) { + auto &Resolved = llvm::cast(*IS.ResolvedRenderTarget); + const D3D12_RESOURCE_BARRIER PreBarriers[] = { + CD3DX12_RESOURCE_BARRIER::Transition( + RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_RESOLVE_SOURCE), + CD3DX12_RESOURCE_BARRIER::Transition( + Resolved.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_RESOLVE_DEST)}; + IS.CB->CmdList->ResourceBarrier(2, PreBarriers); + + IS.CB->CmdList->ResolveSubresource(Resolved.Resource.Get(), 0, + RT.Resource.Get(), 0, + getDXFormat(B.Format, B.Channels)); + + const D3D12_RESOURCE_BARRIER PostBarrier = + CD3DX12_RESOURCE_BARRIER::Transition( + Resolved.Resource.Get(), D3D12_RESOURCE_STATE_RESOLVE_DEST, + D3D12_RESOURCE_STATE_COPY_SOURCE); + IS.CB->CmdList->ResourceBarrier(1, &PostBarrier); + CopySource = Resolved.Resource.Get(); + } else { + const D3D12_RESOURCE_BARRIER Barrier = + CD3DX12_RESOURCE_BARRIER::Transition( + RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_COPY_SOURCE); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); + } + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ 0, CD3DX12_SUBRESOURCE_FOOTPRINT( @@ -2356,7 +2413,7 @@ class DXDevice : public offloadtest::Device { getAlignedTexturePitch(B.OutputProps.Width, B.getElementSize()))}; const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RTReadback.Buffer.Get(), Footprint); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RT.Resource.Get(), 0); + const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(CopySource, 0); IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); @@ -2533,6 +2590,8 @@ class DXDevice : public offloadtest::Device { if (!FormatOrErr) return FormatOrErr.takeError(); PipelineDesc.RTFormats.push_back(*FormatOrErr); + PipelineDesc.SampleCount = + std::max(1, P.Bindings.RTargetBufferPtr->OutputProps.SampleCount); auto PipelineStateOrErr = createTraditionalRasterPipeline( "Graphics Pipeline State", BndDesc, PipelineDesc); diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index 86875096e..fd15ceacb 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -84,6 +84,7 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev, Desc.Width = Buf.OutputProps.Width; Desc.Height = Buf.OutputProps.Height; Desc.MipLevels = 1; + Desc.SampleCount = std::max(1, Buf.OutputProps.SampleCount); Desc.OptimizedClearValue = ClearColor{}; if (auto Err = validateTextureDescMatchesCPUBuffer(Desc, Buf)) @@ -94,7 +95,8 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev, llvm::Expected> offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, - uint32_t Height) { + uint32_t Height, + uint32_t SampleCount) { TextureCreateDesc Desc = {}; Desc.Location = MemoryLocation::GpuOnly; Desc.Usage = TextureUsage::DepthStencil; @@ -102,6 +104,7 @@ offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, Desc.Width = Width; Desc.Height = Height; Desc.MipLevels = 1; + Desc.SampleCount = SampleCount; Desc.OptimizedClearValue = ClearDepthStencil{1.0f, 0}; return Dev.createTexture("DepthStencil", Desc); diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 80ff1b03f..703685763 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -497,6 +497,7 @@ void MappingTraits::mapping( I.mapRequired("Width", P.Width); I.mapRequired("Depth", P.Depth); I.mapOptional("MipLevels", P.MipLevels, 1); + I.mapOptional("SampleCount", P.SampleCount, 1); } void MappingTraits::mapping( diff --git a/test/Feature/MSAA/render-target-msaa-resolve.test b/test/Feature/MSAA/render-target-msaa-resolve.test new file mode 100644 index 000000000..f0d373aba --- /dev/null +++ b/test/Feature/MSAA/render-target-msaa-resolve.test @@ -0,0 +1,75 @@ +#--- vertex.hlsl +struct VSInput { + float4 position : POSITION; +}; + +struct VSOutput { + float4 position : SV_POSITION; +}; + +VSOutput main(VSInput input) { + VSOutput output; + output.position = input.position; + return output; +} + +#--- pixel.hlsl +struct PSInput { + float4 position : SV_POSITION; +}; + +float4 main(PSInput input) : SV_TARGET { + return float4(0.25, 0.5, 0.75, 1.0); +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Vertex + Entry: main + - Stage: Pixel + Entry: main +Buffers: + - Name: VertexData + Format: Float32 + Stride: 16 + Data: [ 0.0, 3.0, 0.0, 1.0, + 3.0, -3.0, 0.0, 1.0, + -3.0, -3.0, 0.0, 1.0 ] + - Name: Output + Format: Float32 + Channels: 4 + FillSize: 64 # 2x2 @ 16 bytes per pixel + OutputProps: + Height: 2 + Width: 2 + Depth: 1 + SampleCount: 4 +Bindings: + VertexBuffer: VertexData + VertexAttributes: + - Format: Float32 + Channels: 4 + Offset: 0 + Name: POSITION + RenderTarget: Output +DescriptorSets: [] +... +#--- end + +# MSAA pipelines and Resolve are currently DirectX-only. +# UNSUPPORTED: Vulkan || Metal +# XFAIL: Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T vs_6_0 -Fo %t-vertex.o %t/vertex.hlsl +# RUN: %dxc_target -T ps_6_0 -Fo %t-pixel.o %t/pixel.hlsl +# RUN: %offloader %t/pipeline.yaml %t-vertex.o %t-pixel.o | FileCheck %s + +# Every pixel is fully covered by the triangle, so the 4x-MSAA resolve +# averages 4 identical samples per pixel back to the same color. +# CHECK: Name: Output +# CHECK-NEXT: Format: Float32 +# CHECK-NEXT: Channels: 4 +# CHECK-NEXT: Data: [ 0.25, 0.5, 0.75, 1, 0.25, 0.5, 0.75, 1, 0.25, 0.5, +# CHECK-NEXT: 0.75, 1, 0.25, 0.5, 0.75, 1 ] From c5b3b55eb0d9d39941dae698d58bf5ee37ca8c20 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Wed, 27 May 2026 13:35:58 -0700 Subject: [PATCH 2/4] [DX] Gate 4x MSAA tests on device support Report 4x R32G32B32A32_FLOAT MSAA support through api-query and expose it as the MSAA_4xSamples lit feature. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- lib/API/DX/Device.cpp | 11 +++++++++++ test/Feature/MSAA/render-target-msaa-resolve.test | 1 + test/lit.cfg.py | 2 ++ 3 files changed, 14 insertions(+) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 91a3a6b75..45e5e3eb8 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -1485,6 +1485,17 @@ class DXDevice : public offloadtest::Device { CD3DX12FeatureSupport Features; Features.Init(Device.Get()); + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS MSAA4xLevels = { + DXGI_FORMAT_R32G32B32A32_FLOAT, 4, + D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, 0}; + const bool SupportsMSAA4x = SUCCEEDED(Device->CheckFeatureSupport( + D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &MSAA4xLevels, sizeof(MSAA4xLevels))) && + MSAA4xLevels.NumQualityLevels > 0; + Caps.insert( + std::make_pair("MSAA_4xSamples", + makeCapability("MSAA_4xSamples", SupportsMSAA4x))); + #define D3D_FEATURE_BOOL(Name) \ Caps.insert( \ std::make_pair(#Name, makeCapability(#Name, Features.Name()))); diff --git a/test/Feature/MSAA/render-target-msaa-resolve.test b/test/Feature/MSAA/render-target-msaa-resolve.test index f0d373aba..1d4492e99 100644 --- a/test/Feature/MSAA/render-target-msaa-resolve.test +++ b/test/Feature/MSAA/render-target-msaa-resolve.test @@ -59,6 +59,7 @@ DescriptorSets: [] # MSAA pipelines and Resolve are currently DirectX-only. # UNSUPPORTED: Vulkan || Metal +# REQUIRES: MSAA_4xSamples # XFAIL: Clang # RUN: split-file %s %t diff --git a/test/lit.cfg.py b/test/lit.cfg.py index cc59667d7..bd1b80f1c 100644 --- a/test/lit.cfg.py +++ b/test/lit.cfg.py @@ -152,6 +152,8 @@ def setDeviceFeatures(config, device, compiler): config.available_features.add("Int64GroupSharedAtomics") if device["Features"].get("MeshShaderTier", "NotSupported") != "NotSupported": config.available_features.add("MeshShader") + if device["Features"].get("MSAA_4xSamples", False): + config.available_features.add("MSAA_4xSamples") setWaveSizeFeaturesDirectX(config, device) if device["API"] == "Metal": From a68ead32854d8545bd7d3725a857f0930de6e0b9 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Wed, 27 May 2026 13:43:42 -0700 Subject: [PATCH 3/4] [NFC] Add const to ReadbackDX references for clang-tidy Fixes misc-const-correctness warnings-as-errors from clang-tidy in CI. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- lib/API/DX/Device.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 45e5e3eb8..f3c5fa0d7 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -2441,7 +2441,7 @@ class DXDevice : public offloadtest::Device { for (const ResourceSet &RS : R.second) { if (RS.Readback == nullptr) continue; - DXBuffer &ReadbackDX = llvm::cast(*RS.Readback); + const DXBuffer &ReadbackDX = llvm::cast(*RS.Readback); addReadbackBeginBarrier(IS, RS.Buffer); const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(ReadbackDX.Buffer.Get(), Footprint); @@ -2454,7 +2454,7 @@ class DXDevice : public offloadtest::Device { for (const ResourceSet &RS : R.second) { if (RS.Readback == nullptr) continue; - DXBuffer &ReadbackDX = llvm::cast(*RS.Readback); + const DXBuffer &ReadbackDX = llvm::cast(*RS.Readback); addReadbackBeginBarrier(IS, RS.Buffer); IS.CB->CmdList->CopyResource(ReadbackDX.Buffer.Get(), RS.Buffer.Get()); addReadbackEndBarrier(IS, RS.Buffer); From 58afcae4e39bc52699e74c9b61ee95257ca75722 Mon Sep 17 00:00:00 2001 From: alsepkow Date: Thu, 28 May 2026 12:43:48 -0700 Subject: [PATCH 4/4] Reject MSAA on VK/MTL with clean error for convergence The shared TextureCreateDesc.SampleCount field was added but VK and MTL silently ignored it. Add explicit not-supported errors so the contract is honest across backends. Tests that need MSAA already mark themselves UNSUPPORTED: Vulkan || Metal, so this guard is belt-and-suspenders. Tracks divergence concern raised in early PR review. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- lib/API/MTL/MTLDevice.cpp | 9 +++++++++ lib/API/VK/Device.cpp | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 604cbe243..6dd21ae7d 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -1501,6 +1501,15 @@ class MTLDevice : public offloadtest::Device { if (auto Err = validateTextureCreateDesc(Desc)) return Err; + // MSAA support is not yet wired up on the Metal backend. Tests that + // require it should mark themselves `UNSUPPORTED: Metal` so this guard + // never fires in CI; it exists to keep the shared TextureCreateDesc + // contract honest across backends (see #1043). + if (Desc.SampleCount > 1) + return llvm::createStringError( + std::errc::not_supported, + "MSAA textures (SampleCount > 1) are not yet implemented on Metal"); + MTL::TextureDescriptor *TDesc = MTL::TextureDescriptor::texture2DDescriptor( getMetalPixelFormat(Desc.Fmt), Desc.Width, Desc.Height, Desc.MipLevels > 1); diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index da683603a..564c2aae2 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -1768,6 +1768,15 @@ class VulkanDevice : public offloadtest::Device { if (auto Err = validateTextureCreateDesc(Desc)) return Err; + // MSAA support is not yet wired up on the Vulkan backend. Tests that + // require it should mark themselves `UNSUPPORTED: Vulkan` so this guard + // never fires in CI; it exists to keep the shared TextureCreateDesc + // contract honest across backends (see #1043). + if (Desc.SampleCount > 1) + return llvm::createStringError( + std::errc::not_supported, + "MSAA textures (SampleCount > 1) are not yet implemented on Vulkan"); + VkImageCreateInfo ImageInfo = {}; ImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; ImageInfo.imageType = VK_IMAGE_TYPE_2D;