Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion include/API/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ struct TraditionalRasterPipelineCreateDesc {
llvm::SmallVector<InputLayoutDesc> InputLayout;
llvm::SmallVector<Format> RTFormats;
std::optional<Format> DSFormat;
uint32_t SampleCount = 1;
PrimitiveTopology Topology;
// Set if Topology == PatchList. Validated in
// Pipeline.cpp::validatePipelineKind.
Expand Down Expand Up @@ -252,8 +253,11 @@ llvm::Expected<std::unique_ptr<Texture>>
createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf);

// Creates a depth/stencil texture matching the dimensions of a render target.
// SampleCount must match the matching color render target when used in the
// same render pass (default 1, i.e. non-MSAA).
llvm::Expected<std::unique_ptr<Texture>>
createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height);
createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height,
uint32_t SampleCount = 1);

llvm::Expected<std::unique_ptr<offloadtest::Buffer>>
createBufferWithData(Device &Dev, std::string Name,
Expand Down
1 change: 1 addition & 0 deletions include/API/Texture.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct TextureCreateDesc {
uint32_t Width;
uint32_t Height;
uint32_t MipLevels;
uint32_t SampleCount = 1;
// Clear value for render target or depth/stencil textures.
// How and when this is applied depends on the backend:
// - DX uses it as an optimized clear hint at resource creation time
Expand Down
1 change: 1 addition & 0 deletions include/Support/Pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ struct OutputProperties {
int Width;
int Depth;
int MipLevels = 1;
int SampleCount = 1;
};

static inline uint32_t getFormatSize(DataFormat Format) {
Expand Down
92 changes: 81 additions & 11 deletions lib/API/DX/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,9 @@ class DXDevice : public offloadtest::Device {
// Resources for graphics pipelines.
std::unique_ptr<offloadtest::RenderPass> RenderPass;
std::unique_ptr<offloadtest::Texture> RenderTarget;
// When the render target is multi-sampled, the contents are resolved into
// this single-sample texture before being copied into RTReadback.
std::unique_ptr<offloadtest::Texture> ResolvedRenderTarget;
std::unique_ptr<offloadtest::Buffer> RTReadback;
std::unique_ptr<offloadtest::Texture> DepthStencil;
std::unique_ptr<offloadtest::Buffer> VB;
Expand Down Expand Up @@ -1221,7 +1224,8 @@ class DXDevice : public offloadtest::Device {
PSODesc.DSVFormat = getDXGIFormat(*Desc.DSFormat);
for (size_t I = 0; I < Desc.RTFormats.size(); ++I)
PSODesc.RTVFormats[I] = getDXGIFormat(Desc.RTFormats[I]);
PSODesc.SampleDesc.Count = 1;
PSODesc.SampleDesc.Count = std::max(1u, Desc.SampleCount);
PSODesc.RasterizerState.MultisampleEnable = Desc.SampleCount > 1;

ComPtr<ID3D12PipelineState> PSO;
if (auto Err = HR::toError(
Expand Down Expand Up @@ -1367,7 +1371,7 @@ class DXDevice : public offloadtest::Device {
TexDesc.DepthOrArraySize = 1;
TexDesc.MipLevels = static_cast<UINT16>(Desc.MipLevels);
TexDesc.Format = getDXGIFormat(Desc.Fmt);
TexDesc.SampleDesc.Count = 1;
TexDesc.SampleDesc.Count = std::max(1u, Desc.SampleCount);
TexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
TexDesc.Flags = getDXResourceFlags(Desc.Usage);

Expand Down Expand Up @@ -1506,6 +1510,17 @@ class DXDevice : public offloadtest::Device {
CD3DX12FeatureSupport Features;
Features.Init(Device.Get());

D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS MSAA4xLevels = {
DXGI_FORMAT_R32G32B32A32_FLOAT, 4,
D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE, 0};
const bool SupportsMSAA4x = SUCCEEDED(Device->CheckFeatureSupport(
D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
&MSAA4xLevels, sizeof(MSAA4xLevels))) &&
MSAA4xLevels.NumQualityLevels > 0;
Caps.insert(
std::make_pair("MSAA_4xSamples",
makeCapability<bool>("MSAA_4xSamples", SupportsMSAA4x)));

#define D3D_FEATURE_BOOL(Name) \
Caps.insert( \
std::make_pair(#Name, makeCapability<bool>(#Name, Features.Name())));
Expand Down Expand Up @@ -2285,6 +2300,28 @@ class DXDevice : public offloadtest::Device {

IS.RenderTarget = std::move(*TexOrErr);

// For MSAA render targets, allocate a single-sample texture that
// ResolveSubresource writes into; CopyTextureRegion then copies the
// resolved texture into the readback buffer.
if (OutBuf.OutputProps.SampleCount > 1) {
auto FmtOrErr = toFormat(OutBuf.Format, OutBuf.Channels);
if (!FmtOrErr)
return FmtOrErr.takeError();
TextureCreateDesc ResolvedDesc = {};
ResolvedDesc.Location = MemoryLocation::GpuOnly;
ResolvedDesc.Usage = TextureUsage::RenderTarget;
ResolvedDesc.Fmt = *FmtOrErr;
ResolvedDesc.Width = OutBuf.OutputProps.Width;
ResolvedDesc.Height = OutBuf.OutputProps.Height;
ResolvedDesc.MipLevels = 1;
ResolvedDesc.SampleCount = 1;
ResolvedDesc.OptimizedClearValue = ClearColor{};
auto ResolvedOrErr = createTexture("ResolvedRenderTarget", ResolvedDesc);
if (!ResolvedOrErr)
return ResolvedOrErr.takeError();
IS.ResolvedRenderTarget = std::move(*ResolvedOrErr);
}

// Create readback buffer sized for the pixel data with row pitch padded
// up to D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, which is what D3D12 requires
// for the placed footprint used by CopyTextureRegion. The compaction
Expand All @@ -2302,9 +2339,11 @@ class DXDevice : public offloadtest::Device {
}

llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) {
const uint32_t SampleCount =
std::max(1, P.Bindings.RTargetBufferPtr->OutputProps.SampleCount);
auto TexOrErr = offloadtest::createDefaultDepthStencilTarget(
*this, P.Bindings.RTargetBufferPtr->OutputProps.Width,
P.Bindings.RTargetBufferPtr->OutputProps.Height);
P.Bindings.RTargetBufferPtr->OutputProps.Height, SampleCount);
if (!TexOrErr)
return TexOrErr.takeError();
IS.DepthStencil = std::move(*TexOrErr);
Expand Down Expand Up @@ -2367,14 +2406,43 @@ class DXDevice : public offloadtest::Device {

Encoder.endEncoding();

// Transition the render target to copy source and copy to the readback
// buffer.
const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition(
RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_COPY_SOURCE);
IS.CB->CmdList->ResourceBarrier(1, &Barrier);

// Transition the render target and (for MSAA) resolve target into the
// states needed to copy pixels back into RTReadback. For non-MSAA we
// just transition RT to COPY_SOURCE; for MSAA we resolve the multi-
// sampled RT into the single-sample resolved RT, then read that.
const CPUBuffer &B = *P.Bindings.RTargetBufferPtr;
const bool IsMSAA = B.OutputProps.SampleCount > 1;

ID3D12Resource *CopySource = RT.Resource.Get();
if (IsMSAA) {
auto &Resolved = llvm::cast<DXTexture>(*IS.ResolvedRenderTarget);
const D3D12_RESOURCE_BARRIER PreBarriers[] = {
CD3DX12_RESOURCE_BARRIER::Transition(
RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_RESOLVE_SOURCE),
CD3DX12_RESOURCE_BARRIER::Transition(
Resolved.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_RESOLVE_DEST)};
IS.CB->CmdList->ResourceBarrier(2, PreBarriers);

IS.CB->CmdList->ResolveSubresource(Resolved.Resource.Get(), 0,
RT.Resource.Get(), 0,
getDXFormat(B.Format, B.Channels));

const D3D12_RESOURCE_BARRIER PostBarrier =
CD3DX12_RESOURCE_BARRIER::Transition(
Resolved.Resource.Get(), D3D12_RESOURCE_STATE_RESOLVE_DEST,
D3D12_RESOURCE_STATE_COPY_SOURCE);
IS.CB->CmdList->ResourceBarrier(1, &PostBarrier);
CopySource = Resolved.Resource.Get();
} else {
const D3D12_RESOURCE_BARRIER Barrier =
CD3DX12_RESOURCE_BARRIER::Transition(
RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET,
D3D12_RESOURCE_STATE_COPY_SOURCE);
IS.CB->CmdList->ResourceBarrier(1, &Barrier);
}

const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{
0,
CD3DX12_SUBRESOURCE_FOOTPRINT(
Expand All @@ -2383,7 +2451,7 @@ class DXDevice : public offloadtest::Device {
getAlignedTexturePitch(B.OutputProps.Width, B.getElementSize()))};
const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RTReadback.Buffer.Get(),
Footprint);
const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RT.Resource.Get(), 0);
const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(CopySource, 0);

IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr);

Expand Down Expand Up @@ -2563,6 +2631,8 @@ class DXDevice : public offloadtest::Device {
if (!FormatOrErr)
return FormatOrErr.takeError();
PipelineDesc.RTFormats.push_back(*FormatOrErr);
PipelineDesc.SampleCount =
std::max(1, P.Bindings.RTargetBufferPtr->OutputProps.SampleCount);

auto PipelineStateOrErr = createTraditionalRasterPipeline(
"Graphics Pipeline State", BndDesc, PipelineDesc);
Expand Down
5 changes: 4 additions & 1 deletion lib/API/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev,
Desc.Width = Buf.OutputProps.Width;
Desc.Height = Buf.OutputProps.Height;
Desc.MipLevels = 1;
Desc.SampleCount = std::max(1, Buf.OutputProps.SampleCount);
Desc.OptimizedClearValue = ClearColor{};

if (auto Err = validateTextureDescMatchesCPUBuffer(Desc, Buf))
Expand All @@ -94,14 +95,16 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev,

llvm::Expected<std::unique_ptr<Texture>>
offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width,
uint32_t Height) {
uint32_t Height,
uint32_t SampleCount) {
TextureCreateDesc Desc = {};
Desc.Location = MemoryLocation::GpuOnly;
Desc.Usage = TextureUsage::DepthStencil;
Desc.Fmt = Format::D32FloatS8Uint;
Desc.Width = Width;
Desc.Height = Height;
Desc.MipLevels = 1;
Desc.SampleCount = SampleCount;
Desc.OptimizedClearValue = ClearDepthStencil{1.0f, 0};

return Dev.createTexture("DepthStencil", Desc);
Expand Down
9 changes: 9 additions & 0 deletions lib/API/MTL/MTLDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1542,6 +1542,15 @@ class MTLDevice : public offloadtest::Device {
if (auto Err = validateTextureCreateDesc(Desc))
return Err;

// MSAA support is not yet wired up on the Metal backend. Tests that
// require it should mark themselves `UNSUPPORTED: Metal` so this guard
// never fires in CI; it exists to keep the shared TextureCreateDesc
// contract honest across backends (see #1043).
if (Desc.SampleCount > 1)
return llvm::createStringError(
std::errc::not_supported,
"MSAA textures (SampleCount > 1) are not yet implemented on Metal");

MTL::TextureDescriptor *TDesc = MTL::TextureDescriptor::texture2DDescriptor(
getMetalPixelFormat(Desc.Fmt), Desc.Width, Desc.Height,
Desc.MipLevels > 1);
Expand Down
9 changes: 9 additions & 0 deletions lib/API/VK/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2118,6 +2118,15 @@ class VulkanDevice : public offloadtest::Device {
if (auto Err = validateTextureCreateDesc(Desc))
return Err;

// MSAA support is not yet wired up on the Vulkan backend. Tests that
// require it should mark themselves `UNSUPPORTED: Vulkan` so this guard
// never fires in CI; it exists to keep the shared TextureCreateDesc
// contract honest across backends (see #1043).
if (Desc.SampleCount > 1)
return llvm::createStringError(
std::errc::not_supported,
"MSAA textures (SampleCount > 1) are not yet implemented on Vulkan");

VkImageCreateInfo ImageInfo = {};
ImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
ImageInfo.imageType = VK_IMAGE_TYPE_2D;
Expand Down
1 change: 1 addition & 0 deletions lib/Support/Pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ void MappingTraits<offloadtest::OutputProperties>::mapping(
I.mapRequired("Width", P.Width);
I.mapRequired("Depth", P.Depth);
I.mapOptional("MipLevels", P.MipLevels, 1);
I.mapOptional("SampleCount", P.SampleCount, 1);
}

void MappingTraits<offloadtest::dx::RootResource>::mapping(
Expand Down
76 changes: 76 additions & 0 deletions test/Feature/MSAA/render-target-msaa-resolve.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#--- vertex.hlsl
struct VSInput {
float4 position : POSITION;
};

struct VSOutput {
float4 position : SV_POSITION;
};

VSOutput main(VSInput input) {
VSOutput output;
output.position = input.position;
return output;
}

#--- pixel.hlsl
struct PSInput {
float4 position : SV_POSITION;
};

float4 main(PSInput input) : SV_TARGET {
return float4(0.25, 0.5, 0.75, 1.0);
}

#--- pipeline.yaml
---
Shaders:
- Stage: Vertex
Entry: main
- Stage: Pixel
Entry: main
Buffers:
- Name: VertexData
Format: Float32
Stride: 16
Data: [ 0.0, 3.0, 0.0, 1.0,
3.0, -3.0, 0.0, 1.0,
-3.0, -3.0, 0.0, 1.0 ]
- Name: Output
Format: Float32
Channels: 4
FillSize: 64 # 2x2 @ 16 bytes per pixel
OutputProps:
Height: 2
Width: 2
Depth: 1
SampleCount: 4
Bindings:
VertexBuffer: VertexData
VertexAttributes:
- Format: Float32
Channels: 4
Offset: 0
Name: POSITION
RenderTarget: Output
DescriptorSets: []
...
#--- end

# MSAA pipelines and Resolve are currently DirectX-only.
# UNSUPPORTED: Vulkan || Metal
# REQUIRES: MSAA_4xSamples
# XFAIL: Clang

# RUN: split-file %s %t
# RUN: %dxc_target -T vs_6_0 -Fo %t-vertex.o %t/vertex.hlsl
# RUN: %dxc_target -T ps_6_0 -Fo %t-pixel.o %t/pixel.hlsl
# RUN: %offloader %t/pipeline.yaml %t-vertex.o %t-pixel.o | FileCheck %s

# Every pixel is fully covered by the triangle, so the 4x-MSAA resolve
# averages 4 identical samples per pixel back to the same color.
# CHECK: Name: Output
# CHECK-NEXT: Format: Float32
# CHECK-NEXT: Channels: 4
# CHECK-NEXT: Data: [ 0.25, 0.5, 0.75, 1, 0.25, 0.5, 0.75, 1, 0.25, 0.5,
# CHECK-NEXT: 0.75, 1, 0.25, 0.5, 0.75, 1 ]
2 changes: 2 additions & 0 deletions test/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ def setDeviceFeatures(config, device, compiler):
config.available_features.add("Int64TypedResourceAtomics")
if device["Features"].get("MeshShaderTier", "NotSupported") != "NotSupported":
config.available_features.add("MeshShader")
if device["Features"].get("MSAA_4xSamples", False):
config.available_features.add("MSAA_4xSamples")
setWaveSizeFeaturesDirectX(config, device)
if device["Features"].get("RaytracingTier", "NotSupported") != "NotSupported":
config.available_features.add("acceleration-structure")
Expand Down
Loading