Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions include/API/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,10 @@ createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf);
llvm::Expected<std::unique_ptr<Texture>>
createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height);

// Creates a depth texture from a CPUBuffer whose GpuFormat is a depth format.
llvm::Expected<std::unique_ptr<Texture>>
createDepthBufferFromCPUBuffer(Device &Dev, const CPUBuffer &Buf);

llvm::Expected<std::unique_ptr<offloadtest::Buffer>>
createBufferWithData(Device &Dev, std::string Name,
const BufferCreateDesc &Desc, const void *Data,
Expand Down
43 changes: 36 additions & 7 deletions include/API/FormatConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,6 @@ inline llvm::Expected<Format> toFormat(DataFormat Format, int Channels) {
return Format::RGBA32Float;
}
break;
case DataFormat::Depth32:
// D32FloatS8Uint is not expressible as DataFormat + Channels because the
// stencil component is uint8, not a second Depth32 channel. Once the
// pipeline uses Format directly, this limitation goes away.
if (Channels == 1)
return Format::D32Float;
break;
case DataFormat::UInt64:
// Only 1 and 2 channels of 64-bit integers are supported.
switch (Channels) {
Expand Down Expand Up @@ -172,6 +165,42 @@ validateTextureDescMatchesCPUBuffer(const TextureCreateDesc &Desc,
return llvm::Error::success();
}

// Validates that a TextureCreateDesc's dimensions and footprint are consistent
// with the CPUBuffer used for readback storage. Call this when format
// equivalence is not derived from DataFormat and Channels.
// This helper intentionally skips the toFormat-based format check.
// In that path, Desc.Fmt is set directly from GpuFormat.
inline llvm::Error
validateTextureDimsMatchCPUBuffer(const TextureCreateDesc &Desc,
const CPUBuffer &Buf) {
if (Desc.Width != static_cast<uint32_t>(Buf.OutputProps.Width))
return llvm::createStringError(
std::errc::invalid_argument,
"TextureCreateDesc width %u does not match CPUBuffer width %d.",
Desc.Width, Buf.OutputProps.Width);
if (Desc.Height != static_cast<uint32_t>(Buf.OutputProps.Height))
return llvm::createStringError(
std::errc::invalid_argument,
"TextureCreateDesc height %u does not match CPUBuffer height %d.",
Desc.Height, Buf.OutputProps.Height);
if (Desc.MipLevels != static_cast<uint32_t>(Buf.OutputProps.MipLevels))
return llvm::createStringError(
std::errc::invalid_argument,
"TextureCreateDesc mip levels %u does not match CPUBuffer mip "
"levels %d.",
Desc.MipLevels, Buf.OutputProps.MipLevels);
const uint32_t TexelSize = getFormatSizeInBytes(Desc.Fmt);
const uint64_t ExpectedSize =
static_cast<uint64_t>(Desc.Width) * Desc.Height * TexelSize;
if (static_cast<uint64_t>(Buf.size()) != ExpectedSize)
return llvm::createStringError(
std::errc::invalid_argument,
"CPUBuffer size %u does not match expected size %llu "
"(width %u * height %u * element size %u).",
Buf.size(), ExpectedSize, Desc.Width, Desc.Height, TexelSize);
return llvm::Error::success();
}

} // namespace offloadtest

#endif // OFFLOADTEST_API_FORMATCONVERSION_H
25 changes: 22 additions & 3 deletions include/Support/Pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ enum class DataFormat {
Float16,
Float32,
Float64,
Depth32,
Bool,
};

Expand Down Expand Up @@ -198,7 +197,6 @@ static inline uint32_t getFormatSize(DataFormat Format) {
case DataFormat::UInt32:
case DataFormat::Int32:
case DataFormat::Float32:
case DataFormat::Depth32:
case DataFormat::Bool:
return 4;
case DataFormat::Hex64:
Expand All @@ -216,6 +214,11 @@ struct CPUBuffer {
int Channels;
int Stride;
uint32_t ArraySize;
// When set, names the GPU texture format directly (e.g. D32Float) instead of
// inferring it from DataFormat + Channels via toFormat(). This lets depth
// buffers and other special formats be expressed without extending
// DataFormat.
std::optional<offloadtest::Format> GpuFormat;
// Data can contain one block of data for a singular resource
// or multiple blocks for a resource array.
llvm::SmallVector<std::unique_ptr<char[]>> Data;
Expand Down Expand Up @@ -458,6 +461,19 @@ struct IOBindings {

std::string RenderTarget;
CPUBuffer *RTargetBufferPtr = nullptr;

// Optional depth target bound for readback; when Name is empty, backends
// create an internal depth target. Ptr is resolved after parsing to the
// named CPUBuffer entry that owns the readback storage; the GPU format for
// the depth texture comes from the buffer's GpuFormat field.
struct DepthBufferBinding {
std::string Name;
CPUBuffer *Ptr = nullptr;

bool empty() const { return Name.empty(); }
};
DepthBufferBinding DepthBuffer;

PrimitiveTopology Topology = PrimitiveTopology::TriangleList;

// Set if Topology == PatchList. Validated in
Expand Down Expand Up @@ -746,6 +762,10 @@ template <> struct MappingTraits<offloadtest::IOBindings> {
static void mapping(IO &I, offloadtest::IOBindings &B);
};

template <> struct MappingTraits<offloadtest::IOBindings::DepthBufferBinding> {
static void mapping(IO &I, offloadtest::IOBindings::DepthBufferBinding &B);
};

template <> struct MappingTraits<offloadtest::PushConstantValue> {
static void mapping(IO &I, offloadtest::PushConstantValue &B);
};
Expand Down Expand Up @@ -911,7 +931,6 @@ template <> struct ScalarEnumerationTraits<offloadtest::DataFormat> {
ENUM_CASE(Float16);
ENUM_CASE(Float32);
ENUM_CASE(Float64);
ENUM_CASE(Depth32);
ENUM_CASE(Bool);
#undef ENUM_CASE
}
Expand Down
76 changes: 72 additions & 4 deletions lib/API/DX/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,6 @@ static DXGI_FORMAT getDXFormat(DataFormat Format, int Channels) {
if (Channels == 2)
return DXGI_FORMAT_R32G32B32A32_UINT;
llvm_unreachable("Unsupported channel count for 64-bit format");
case DataFormat::Depth32:
llvm_unreachable(
"Depth32 format is not yet supported in the DirectX backend.");
default:
llvm_unreachable("Unsupported Resource format specified");
}
Expand Down Expand Up @@ -1173,6 +1170,7 @@ class DXDevice : public offloadtest::Device {
std::unique_ptr<offloadtest::Texture> RenderTarget;
std::unique_ptr<offloadtest::Buffer> RTReadback;
std::unique_ptr<offloadtest::Texture> DepthStencil;
std::unique_ptr<offloadtest::Buffer> DSReadback;
std::unique_ptr<offloadtest::Buffer> VB;

llvm::SmallVector<DescriptorTable> DescTables;
Expand Down Expand Up @@ -2788,6 +2786,28 @@ class DXDevice : public offloadtest::Device {
P.Bindings.RTargetBufferPtr->copyFromTexture(Mapped,
Placed.Footprint.RowPitch);
Readback.Buffer->Unmap(0, nullptr);

if (IS.DSReadback) {
void *DSMapped = nullptr;
auto &DSReadback = llvm::cast<DXBuffer>(*IS.DSReadback);
if (auto Err = HR::toError(DSReadback.Buffer->Map(0, nullptr, &DSMapped),
"Failed to map depth buffer readback"))
return Err;

auto &DS = llvm::cast<DXTexture>(*IS.DepthStencil);
const D3D12_RESOURCE_DESC DSDesc = DS.Resource->GetDesc();
D3D12_PLACED_SUBRESOURCE_FOOTPRINT DSPlaced = {};
uint32_t DSNumRows = 0;
uint64_t DSRowSizeInBytes = 0;
uint64_t DSTotalBytes = 0;
Device->GetCopyableFootprints(&DSDesc, 0u, 1u, 0u, &DSPlaced, &DSNumRows,
&DSRowSizeInBytes, &DSTotalBytes);

P.Bindings.DepthBuffer.Ptr->copyFromTexture(DSMapped,
DSPlaced.Footprint.RowPitch);
DSReadback.Buffer->Unmap(0, nullptr);
}
Comment on lines +2790 to +2809

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use the copyTextureToBuffer function on the ComputeEncoder instead ^.^
This got recently added to main for the DX12 backend and the VK and MTL backends will use that path soon too.


return llvm::Error::success();
}

Expand Down Expand Up @@ -2821,6 +2841,27 @@ class DXDevice : public offloadtest::Device {
}

llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) {
// If the test bound a CPU-readable depth buffer, create the depth target
// from it and allocate a readback buffer. Otherwise fall back to the
// default depth target (which is not read back).
if (P.Bindings.DepthBuffer.Ptr) {
const CPUBuffer &DSBuf = *P.Bindings.DepthBuffer.Ptr;
auto TexOrErr = offloadtest::createDepthBufferFromCPUBuffer(*this, DSBuf);
if (!TexOrErr)
return TexOrErr.takeError();
IS.DepthStencil = std::move(*TexOrErr);

BufferCreateDesc BufDesc = {};
BufDesc.Location = MemoryLocation::GpuToCpu;
BufDesc.Usage = BufferUsage::Storage;
auto BufOrErr = createBuffer("DSReadback", BufDesc,
getAlignedTextureBufferSize(DSBuf));
if (!BufOrErr)
return BufOrErr.takeError();
IS.DSReadback = std::move(*BufOrErr);
return llvm::Error::success();
}

auto TexOrErr = offloadtest::createDefaultDepthStencilTarget(
*this, P.Bindings.RTargetBufferPtr->OutputProps.Width,
P.Bindings.RTargetBufferPtr->OutputProps.Height);
Expand Down Expand Up @@ -2906,6 +2947,33 @@ class DXDevice : public offloadtest::Device {

IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr);

// If a depth buffer is bound for readback, transition the depth target
// from DEPTH_WRITE to COPY_SOURCE and copy its contents to the readback
// buffer using the depth-aspect placed footprint.
if (IS.DSReadback) {
auto &DSReadback = llvm::cast<DXBuffer>(*IS.DSReadback);
const D3D12_RESOURCE_BARRIER DSBarrier =
CD3DX12_RESOURCE_BARRIER::Transition(
DS.Resource.Get(), D3D12_RESOURCE_STATE_DEPTH_WRITE,
D3D12_RESOURCE_STATE_COPY_SOURCE);
IS.CB->CmdList->ResourceBarrier(1, &DSBarrier);

const CPUBuffer &DSBuf = *P.Bindings.DepthBuffer.Ptr;
// CopyTextureRegion footprint format must match the source resource
// (D32_FLOAT), not the shader-visible R32_FLOAT SRV cast.
const DXGI_FORMAT DSResFormat = DS.Resource->GetDesc().Format;
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT DSFootprint{
0,
CD3DX12_SUBRESOURCE_FOOTPRINT(
DSResFormat, DSBuf.OutputProps.Width, DSBuf.OutputProps.Height, 1,
getAlignedTexturePitch(DSBuf.OutputProps.Width,
DSBuf.getElementSize()))};
const CD3DX12_TEXTURE_COPY_LOCATION DSDstLoc(DSReadback.Buffer.Get(),
DSFootprint);
const CD3DX12_TEXTURE_COPY_LOCATION DSSrcLoc(DS.Resource.Get(), 0);
IS.CB->CmdList->CopyTextureRegion(&DSDstLoc, 0, 0, 0, &DSSrcLoc, nullptr);
}

auto CopyBackResource = [&IS, this](ResourcePair &R) {
if (R.first->isTexture()) {
const offloadtest::CPUBuffer &B = *R.first->BufferPtr;
Expand Down Expand Up @@ -3068,7 +3136,7 @@ class DXDevice : public offloadtest::Device {
TraditionalRasterPipelineCreateDesc PipelineDesc = {};
PipelineDesc.Topology = P.Bindings.Topology;
PipelineDesc.PatchControlPoints = P.Bindings.PatchControlPoints;
PipelineDesc.DSFormat = Format::D32FloatS8Uint;
PipelineDesc.DSFormat = State.DepthStencil->getDesc().Fmt;
for (auto &Shader : P.Shaders) {
ShaderContainer SC = {};
SC.EntryPoint = Shader.Entry;
Expand Down
25 changes: 25 additions & 0 deletions lib/API/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,31 @@ offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width,
return Dev.createTexture("DepthStencil", Desc);
}

llvm::Expected<std::unique_ptr<Texture>>
offloadtest::createDepthBufferFromCPUBuffer(Device &Dev, const CPUBuffer &Buf) {
if (!Buf.GpuFormat || !isDepthFormat(*Buf.GpuFormat))
return llvm::createStringError(
std::errc::invalid_argument,
"Depth buffer requires a CPUBuffer with a depth GpuFormat; got '%s'.",
Buf.GpuFormat ? getFormatName(*Buf.GpuFormat).data() : "<none>");

const Format Fmt = *Buf.GpuFormat;

TextureCreateDesc Desc = {};
Desc.Location = MemoryLocation::GpuOnly;
Desc.Usage = TextureUsage::DepthStencil;
Desc.Fmt = Fmt;
Desc.Width = Buf.OutputProps.Width;
Desc.Height = Buf.OutputProps.Height;
Desc.MipLevels = 1;
Desc.OptimizedClearValue = ClearDepthStencil{1.0f, 0};

if (auto Err = validateTextureDimsMatchCPUBuffer(Desc, Buf))
return Err;

return Dev.createTexture("DepthBuffer", Desc);
}

// This is a separate function because recursion is not allowed in this code
// base.
static llvm::Expected<std::unique_ptr<offloadtest::Buffer>>
Expand Down
4 changes: 4 additions & 0 deletions lib/API/MTL/MTLDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1560,6 +1560,10 @@ class MTLDevice : public offloadtest::Device {
}

llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) {
if (P.Bindings.DepthBuffer.Ptr)
return llvm::createStringError(
std::errc::not_supported,
"Bindings.DepthBuffer is not yet supported on the Metal backend.");
auto TexOrErr = offloadtest::createDefaultDepthStencilTarget(
*this, P.Bindings.RTargetBufferPtr->OutputProps.Width,
P.Bindings.RTargetBufferPtr->OutputProps.Height);
Expand Down
Loading
Loading